diff options
Diffstat (limited to 'sys/fs/nfsclient')
-rw-r--r-- | sys/fs/nfsclient/nfs_clbio.c | 2 | ||||
-rw-r--r-- | sys/fs/nfsclient/nfs_clcomsubs.c | 88 | ||||
-rw-r--r-- | sys/fs/nfsclient/nfs_clkdtrace.c | 26 | ||||
-rw-r--r-- | sys/fs/nfsclient/nfs_clkrpc.c | 19 | ||||
-rw-r--r-- | sys/fs/nfsclient/nfs_clport.c | 9 | ||||
-rw-r--r-- | sys/fs/nfsclient/nfs_clrpcops.c | 1845 | ||||
-rw-r--r-- | sys/fs/nfsclient/nfs_clstate.c | 1168 | ||||
-rw-r--r-- | sys/fs/nfsclient/nfs_clvfsops.c | 100 | ||||
-rw-r--r-- | sys/fs/nfsclient/nfs_clvnops.c | 56 | ||||
-rw-r--r-- | sys/fs/nfsclient/nfsmount.h | 8 | ||||
-rw-r--r-- | sys/fs/nfsclient/nfsnode.h | 2 |
11 files changed, 3055 insertions, 268 deletions
diff --git a/sys/fs/nfsclient/nfs_clbio.c b/sys/fs/nfsclient/nfs_clbio.c index 26286c5..dba3bc9 100644 --- a/sys/fs/nfsclient/nfs_clbio.c +++ b/sys/fs/nfsclient/nfs_clbio.c @@ -1371,6 +1371,8 @@ ncl_vinvalbuf(struct vnode *vp, int flags, struct thread *td, int intrflg) goto out; error = vinvalbuf(vp, flags, 0, slptimeo); } + if (NFSHASPNFS(nmp)) + nfscl_layoutcommit(vp, td); mtx_lock(&np->n_mtx); if (np->n_directio_asyncwr == 0) np->n_flag &= ~NMODIFIED; diff --git a/sys/fs/nfsclient/nfs_clcomsubs.c b/sys/fs/nfsclient/nfs_clcomsubs.c index b68e5bd..073c6cc 100644 --- a/sys/fs/nfsclient/nfs_clcomsubs.c +++ b/sys/fs/nfsclient/nfs_clcomsubs.c @@ -43,10 +43,11 @@ __FBSDID("$FreeBSD$"); #include <fs/nfs/nfsport.h> extern struct nfsstats newnfsstats; -extern struct nfsv4_opflag nfsv4_opflag[NFSV4OP_NOPS]; +extern struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS]; extern int ncl_mbuf_mlen; extern enum vtype newnv2tov_type[8]; extern enum vtype nv34tov_type[8]; +extern int nfs_bigreply[NFSV41_NPROCS]; NFSCLSTATEMUTEX; #endif /* !APPLEKEXT */ @@ -56,7 +57,7 @@ static struct { int opcnt; const u_char *tag; int taglen; -} nfsv4_opmap[NFS_NPROCS] = { +} nfsv4_opmap[NFSV41_NPROCS] = { { 0, 1, "Null", 4 }, { NFSV4OP_GETATTR, 1, "Getattr", 7, }, { NFSV4OP_SETATTR, 2, "Setattr", 7, }, @@ -98,15 +99,28 @@ static struct { { NFSV4OP_DELEGRETURN, 9, "DelegRename2", 12, }, { NFSV4OP_GETATTR, 1, "Getacl", 6, }, { NFSV4OP_SETATTR, 1, "Setacl", 6, }, + { NFSV4OP_EXCHANGEID, 1, "ExchangeID", 10, }, + { NFSV4OP_CREATESESSION, 1, "CreateSession", 13, }, + { NFSV4OP_DESTROYSESSION, 1, "DestroySession", 14, }, + { NFSV4OP_DESTROYCLIENTID, 1, "DestroyClient", 13, }, + { NFSV4OP_FREESTATEID, 1, "FreeStateID", 11, }, + { NFSV4OP_LAYOUTGET, 1, "LayoutGet", 9, }, + { NFSV4OP_GETDEVINFO, 1, "GetDeviceInfo", 13, }, + { NFSV4OP_LAYOUTCOMMIT, 1, "LayoutCommit", 12, }, + { NFSV4OP_LAYOUTRETURN, 1, "LayoutReturn", 12, }, + { NFSV4OP_RECLAIMCOMPL, 1, "ReclaimComplete", 15, }, + { NFSV4OP_WRITE, 1, "WriteDS", 7, }, + { NFSV4OP_READ, 1, "ReadDS", 6, }, + { NFSV4OP_COMMIT, 1, "CommitDS", 8, }, }; - /* * NFS RPCS that have large request message size. */ -static int nfs_bigrequest[NFS_NPROCS] = { +static int nfs_bigrequest[NFSV41_NPROCS] = { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0 }; /* @@ -115,7 +129,7 @@ static int nfs_bigrequest[NFS_NPROCS] = { */ APPLESTATIC void nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, - u_int8_t *nfhp, int fhlen, u_int32_t **opcntpp) + u_int8_t *nfhp, int fhlen, u_int32_t **opcntpp, struct nfsclsession *sep) { struct mbuf *mb; u_int32_t *tl; @@ -125,9 +139,12 @@ nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, /* * First, fill in some of the fields of nd. */ - if (NFSHASNFSV4(nmp)) + nd->nd_slotseq = NULL; + if (NFSHASNFSV4(nmp)) { nd->nd_flag = ND_NFSV4 | ND_NFSCL; - else if (NFSHASNFSV3(nmp)) + if (NFSHASNFSV4N(nmp)) + nd->nd_flag |= ND_NFSV41; + } else if (NFSHASNFSV3(nmp)) nd->nd_flag = ND_NFSV3 | ND_NFSCL; else nd->nd_flag = ND_NFSV2 | ND_NFSCL; @@ -151,33 +168,71 @@ nfscl_reqstart(struct nfsrv_descript *nd, int procnum, struct nfsmount *nmp, if (nd->nd_flag & ND_NFSV4) { opcnt = nfsv4_opmap[procnum].opcnt + nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh; + if ((nd->nd_flag & ND_NFSV41) != 0) { + opcnt += nfsv4_opflag[nfsv4_opmap[procnum].op].needsseq; + if (procnum == NFSPROC_RENEW) + /* + * For the special case of Renew, just do a + * Sequence Op. + */ + opcnt = 1; + else if (procnum == NFSPROC_WRITEDS || + procnum == NFSPROC_COMMITDS) + /* + * For the special case of a Writeor Commit to + * a DS, the opcnt == 3, for Sequence, PutFH, + * Write/Commit. + */ + opcnt = 3; + } /* * What should the tag really be? */ (void) nfsm_strtom(nd, nfsv4_opmap[procnum].tag, nfsv4_opmap[procnum].taglen); - NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED); - *tl++ = txdr_unsigned(NFSV4_MINORVERSION); + NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + if ((nd->nd_flag & ND_NFSV41) != 0) + *tl++ = txdr_unsigned(NFSV41_MINORVERSION); + else + *tl++ = txdr_unsigned(NFSV4_MINORVERSION); if (opcntpp != NULL) *opcntpp = tl; - *tl++ = txdr_unsigned(opcnt); + *tl = txdr_unsigned(opcnt); + if ((nd->nd_flag & ND_NFSV41) != 0 && + nfsv4_opflag[nfsv4_opmap[procnum].op].needsseq > 0) { + NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(NFSV4OP_SEQUENCE); + if (sep == NULL) + nfsv4_setsequence(nd, NFSMNT_MDSSESSION(nmp), + nfs_bigreply[procnum]); + else + nfsv4_setsequence(nd, sep, + nfs_bigreply[procnum]); + } if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh > 0) { + NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_PUTFH); (void) nfsm_fhtom(nd, nfhp, fhlen, 0); - if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh==2){ + if (nfsv4_opflag[nfsv4_opmap[procnum].op].needscfh + == 2 && procnum != NFSPROC_WRITEDS && + procnum != NFSPROC_COMMITDS) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETATTR); NFSWCCATTR_ATTRBIT(&attrbits); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_V4WCCATTR; } + } + if (procnum != NFSPROC_RENEW || + (nd->nd_flag & ND_NFSV41) == 0) { NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(nfsv4_opmap[procnum].op); } - *tl = txdr_unsigned(nfsv4_opmap[procnum].op); } else { (void) nfsm_fhtom(nd, nfhp, fhlen, 0); } - NFSINCRGLOBAL(newnfsstats.rpccnt[procnum]); + if (procnum < NFSV4_NPROCS) + NFSINCRGLOBAL(newnfsstats.rpccnt[procnum]); } #ifndef APPLE @@ -453,6 +508,11 @@ nfsm_stateidtom(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, int flag) st->other[0] = 0xffffffff; st->other[1] = 0xffffffff; st->other[2] = 0xffffffff; + } else if (flag == NFSSTATEID_PUTSEQIDZERO) { + st->seqid = 0; + st->other[0] = stateidp->other[0]; + st->other[1] = stateidp->other[1]; + st->other[2] = stateidp->other[2]; } else { st->seqid = stateidp->seqid; st->other[0] = stateidp->other[0]; diff --git a/sys/fs/nfsclient/nfs_clkdtrace.c b/sys/fs/nfsclient/nfs_clkdtrace.c index c7db3a4..cf3d8b0 100644 --- a/sys/fs/nfsclient/nfs_clkdtrace.c +++ b/sys/fs/nfsclient/nfs_clkdtrace.c @@ -92,7 +92,7 @@ struct dtnfsclient_rpc { * This table is indexed by NFSv3 procedure number, but also used for NFSv2 * procedure names and NFSv4 operations. */ -static struct dtnfsclient_rpc dtnfsclient_rpcs[NFS_NPROCS + 1] = { +static struct dtnfsclient_rpc dtnfsclient_rpcs[NFSV41_NPROCS + 1] = { { "null", "null", "null" }, { "getattr", "getattr", "getattr" }, { "setattr", "setattr", "setattr" }, @@ -196,17 +196,17 @@ extern uint32_t nfscl_attrcache_load_done_id; * stored in one of these two NFS client-allocated arrays; 0 indicates that * the event is not being traced so probes should not be called. * - * For simplicity, we allocate both v2, v3 and v4 arrays as NFS_NPROCS + 1, and - * the v2, v3 arrays are simply sparse. + * For simplicity, we allocate both v2, v3 and v4 arrays as NFSV41_NPROCS + 1, + * and the v2, v3 arrays are simply sparse. */ -extern uint32_t nfscl_nfs2_start_probes[NFS_NPROCS + 1]; -extern uint32_t nfscl_nfs2_done_probes[NFS_NPROCS + 1]; +extern uint32_t nfscl_nfs2_start_probes[NFSV41_NPROCS + 1]; +extern uint32_t nfscl_nfs2_done_probes[NFSV41_NPROCS + 1]; -extern uint32_t nfscl_nfs3_start_probes[NFS_NPROCS + 1]; -extern uint32_t nfscl_nfs3_done_probes[NFS_NPROCS + 1]; +extern uint32_t nfscl_nfs3_start_probes[NFSV41_NPROCS + 1]; +extern uint32_t nfscl_nfs3_done_probes[NFSV41_NPROCS + 1]; -extern uint32_t nfscl_nfs4_start_probes[NFS_NPROCS + 1]; -extern uint32_t nfscl_nfs4_done_probes[NFS_NPROCS + 1]; +extern uint32_t nfscl_nfs4_start_probes[NFSV41_NPROCS + 1]; +extern uint32_t nfscl_nfs4_done_probes[NFSV41_NPROCS + 1]; /* * Look up a DTrace probe ID to see if it's associated with a "done" event -- @@ -217,7 +217,7 @@ dtnfs234_isdoneprobe(dtrace_id_t id) { int i; - for (i = 0; i < NFS_NPROCS + 1; i++) { + for (i = 0; i < NFSV41_NPROCS + 1; i++) { if (dtnfsclient_rpcs[i].nr_v4_id_done == id || dtnfsclient_rpcs[i].nr_v3_id_done == id || dtnfsclient_rpcs[i].nr_v2_id_done == id) @@ -401,7 +401,7 @@ dtnfsclient_provide(void *arg, dtrace_probedesc_t *desc) * Register NFSv2 RPC procedures; note sparseness check for each slot * in the NFSv3, NFSv4 procnum-indexed array. */ - for (i = 0; i < NFS_NPROCS + 1; i++) { + for (i = 0; i < NFSV41_NPROCS + 1; i++) { if (dtnfsclient_rpcs[i].nr_v2_name != NULL && dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs2_str, dtnfsclient_rpcs[i].nr_v2_name, dtnfsclient_start_str) == @@ -430,7 +430,7 @@ dtnfsclient_provide(void *arg, dtrace_probedesc_t *desc) * Register NFSv3 RPC procedures; note sparseness check for each slot * in the NFSv4 procnum-indexed array. */ - for (i = 0; i < NFS_NPROCS + 1; i++) { + for (i = 0; i < NFSV41_NPROCS + 1; i++) { if (dtnfsclient_rpcs[i].nr_v3_name != NULL && dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs3_str, dtnfsclient_rpcs[i].nr_v3_name, dtnfsclient_start_str) == @@ -458,7 +458,7 @@ dtnfsclient_provide(void *arg, dtrace_probedesc_t *desc) /* * Register NFSv4 RPC procedures. */ - for (i = 0; i < NFS_NPROCS + 1; i++) { + for (i = 0; i < NFSV41_NPROCS + 1; i++) { if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs4_str, dtnfsclient_rpcs[i].nr_v4_name, dtnfsclient_start_str) == 0) { diff --git a/sys/fs/nfsclient/nfs_clkrpc.c b/sys/fs/nfsclient/nfs_clkrpc.c index fceb36d..71889fc 100644 --- a/sys/fs/nfsclient/nfs_clkrpc.c +++ b/sys/fs/nfsclient/nfs_clkrpc.c @@ -45,12 +45,13 @@ __FBSDID("$FreeBSD$"); NFSDLOCKMUTEX; -SVCPOOL *nfscbd_pool; +extern SVCPOOL *nfscbd_pool; static int nfs_cbproc(struct nfsrv_descript *, u_int32_t); extern u_long sb_max_adj; extern int nfs_numnfscbd; +extern int nfscl_debuglevel; /* * NFS client system calls for handling callbacks. @@ -90,6 +91,7 @@ nfscb_program(struct svc_req *rqst, SVCXPRT *xprt) nd.nd_mreq = NULL; nd.nd_cred = NULL; + NFSCL_DEBUG(1, "cbproc=%d\n",nd.nd_procnum); if (nd.nd_procnum != NFSPROC_NULL) { if (!svc_getcred(rqst, &nd.nd_cred, &credflavor)) { svcerr_weakauth(rqst); @@ -133,9 +135,10 @@ nfscb_program(struct svc_req *rqst, SVCXPRT *xprt) svcerr_auth(rqst, nd.nd_repstat & ~NFSERR_AUTHERR); if (nd.nd_mreq != NULL) m_freem(nd.nd_mreq); - } else if (!svc_sendreply_mbuf(rqst, nd.nd_mreq)) { + } else if (!svc_sendreply_mbuf(rqst, nd.nd_mreq)) svcerr_systemerr(rqst); - } + else + NFSCL_DEBUG(1, "cbrep sent\n"); svc_freereq(rqst); } @@ -271,13 +274,15 @@ nfsrvd_cbinit(int terminating) NFSD_LOCK_ASSERT(); if (terminating) { + /* Wait for any xprt registrations to complete. */ + while (nfs_numnfscbd > 0) + msleep(&nfs_numnfscbd, NFSDLOCKMUTEXPTR, PZERO, + "nfscbdt", 0); NFSD_UNLOCK(); svcpool_destroy(nfscbd_pool); nfscbd_pool = NULL; - NFSD_LOCK(); - } - - NFSD_UNLOCK(); + } else + NFSD_UNLOCK(); nfscbd_pool = svcpool_create("nfscbd", NULL); nfscbd_pool->sp_rcache = NULL; diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c index fe28975..145ff63 100644 --- a/sys/fs/nfsclient/nfs_clport.c +++ b/sys/fs/nfsclient/nfs_clport.c @@ -853,7 +853,7 @@ nfscl_request(struct nfsrv_descript *nd, struct vnode *vp, NFSPROC_T *p, else vers = NFS_VER2; ret = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, - NFS_PROG, vers, NULL, 1, NULL); + NFS_PROG, vers, NULL, 1, NULL, NULL); return (ret); } @@ -1112,10 +1112,15 @@ nfscl_maperr(struct thread *td, int error, uid_t uid, gid_t gid) "No name and/or group mapping for uid,gid:(%d,%d)\n", uid, gid); return (EPERM); + case NFSERR_BADNAME: + case NFSERR_BADCHAR: + printf("nfsv4 char/name not handled by server\n"); + return (ENOENT); case NFSERR_STALECLIENTID: case NFSERR_STALESTATEID: case NFSERR_EXPIRED: case NFSERR_BADSTATEID: + case NFSERR_BADSESSION: printf("nfsv4 recover err returned %d\n", error); return (EIO); case NFSERR_BADHANDLE: @@ -1131,8 +1136,6 @@ nfscl_maperr(struct thread *td, int error, uid_t uid, gid_t gid) case NFSERR_LEASEMOVED: case NFSERR_RECLAIMBAD: case NFSERR_BADXDR: - case NFSERR_BADCHAR: - case NFSERR_BADNAME: case NFSERR_OPILLEGAL: printf("nfsv4 client/server protocol prob err=%d\n", error); diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c index 7da93cf..be0476a 100644 --- a/sys/fs/nfsclient/nfs_clrpcops.c +++ b/sys/fs/nfsclient/nfs_clrpcops.c @@ -67,6 +67,19 @@ int nfstest_openallsetattr = 0; #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) +/* + * nfscl_getsameserver() can return one of three values: + * NFSDSP_USETHISSESSION - Use this session for the DS. + * NFSDSP_SEQTHISSESSION - Use the nfsclds_sequence field of this dsp for new + * session. + * NFSDSP_NOTFOUND - No matching server was found. + */ +enum nfsclds_state { + NFSDSP_USETHISSESSION = 0, + NFSDSP_SEQTHISSESSION = 1, + NFSDSP_NOTFOUND = 2, +}; + static int nfsrpc_setattrrpc(vnode_t , struct vattr *, nfsv4stateid_t *, struct ucred *, NFSPROC_T *, struct nfsvattr *, int *, void *); static int nfsrpc_readrpc(vnode_t , struct uio *, struct ucred *, @@ -86,6 +99,27 @@ static int nfsrpc_locku(struct nfsrv_descript *, struct nfsmount *, u_int32_t, struct ucred *, NFSPROC_T *, int); static int nfsrpc_setaclrpc(vnode_t, struct ucred *, NFSPROC_T *, struct acl *, nfsv4stateid_t *, void *); +static int nfsrpc_getlayout(struct nfsmount *, vnode_t, struct nfsfh *, int, + uint32_t *, nfsv4stateid_t *, uint64_t, struct nfscllayout **, + struct ucred *, NFSPROC_T *); +static int nfsrpc_fillsa(struct nfsmount *, struct sockaddr_storage *, + struct nfsclds **, NFSPROC_T *); +static void nfscl_initsessionslots(struct nfsclsession *); +static int nfscl_doflayoutio(vnode_t, struct uio *, int *, int *, int *, + nfsv4stateid_t *, int, struct nfscldevinfo *, struct nfscllayout *, + struct nfsclflayout *, uint64_t, uint64_t, struct ucred *, NFSPROC_T *); +static int nfsrpc_readds(vnode_t, struct uio *, nfsv4stateid_t *, int *, + struct nfsclds *, uint64_t, int, struct nfsfh *, struct ucred *, + NFSPROC_T *); +static int nfsrpc_writeds(vnode_t, struct uio *, int *, int *, + nfsv4stateid_t *, struct nfsclds *, uint64_t, int, + struct nfsfh *, int, struct ucred *, NFSPROC_T *); +static enum nfsclds_state nfscl_getsameserver(struct nfsmount *, + struct nfsclds *, struct nfsclds **); +#ifdef notyet +static int nfsrpc_commitds(vnode_t, uint64_t, int, struct nfsclds *, + struct nfsfh *, struct ucred *, NFSPROC_T *, void *); +#endif /* * nfs null call from vfs. @@ -308,7 +342,8 @@ else printf(" fhl=0\n"); op->nfso_opencnt++; nfscl_openrelease(op, error, newone); if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || - error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY) { + error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_open"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { @@ -317,6 +352,7 @@ else printf(" fhl=0\n"); } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) @@ -344,13 +380,13 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, dp = *dpp; *dpp = NULL; - nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL); + nfscl_reqstart(nd, NFSPROC_OPEN, nmp, nfhp, fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); *tl++ = txdr_unsigned(mode & NFSV4OPEN_ACCESSBOTH); *tl++ = txdr_unsigned((mode >> NFSLCK_SHIFT) & NFSV4OPEN_DENYBOTH); - *tl++ = op->nfso_own->nfsow_clp->nfsc_clientid.lval[0]; - *tl = op->nfso_own->nfsow_clp->nfsc_clientid.lval[1]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; (void) nfsm_strtom(nd, op->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_NOCREATE); @@ -362,7 +398,10 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, if (dp != NULL) { *tl = txdr_unsigned(NFSV4OPEN_CLAIMDELEGATECUR); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); - *tl++ = dp->nfsdl_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = dp->nfsdl_stateid.seqid; *tl++ = dp->nfsdl_stateid.other[0]; *tl++ = dp->nfsdl_stateid.other[1]; *tl = dp->nfsdl_stateid.other[2]; @@ -380,7 +419,7 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); @@ -501,14 +540,15 @@ nfsrpc_openrpc(struct nfsmount *nmp, vnode_t vp, u_int8_t *nfhp, int fhlen, if (ndp != NULL) FREE((caddr_t)ndp, M_NFSCLDELEG); if (ret == NFSERR_STALECLIENTID || - ret == NFSERR_STALEDONTRECOVER) + ret == NFSERR_STALEDONTRECOVER || + ret == NFSERR_BADSESSION) error = ret; } } } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; - if (error == NFSERR_STALECLIENTID) + if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: if (!error) @@ -532,7 +572,10 @@ nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op, NFSCL_REQSTART(nd, NFSPROC_OPENDOWNGRADE, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED); - *tl++ = op->nfso_stateid.seqid; + if (NFSHASNFSV4N(VFSTONFS(vnode_mount(vp)))) + *tl++ = 0; + else + *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl++ = op->nfso_stateid.other[2]; @@ -552,7 +595,7 @@ nfsrpc_opendowngrade(vnode_t vp, u_int32_t mode, struct nfsclopen *op, } if (nd->nd_repstat && error == 0) error = nd->nd_repstat; - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); @@ -690,24 +733,27 @@ nfsrpc_closerpc(struct nfsrv_descript *nd, struct nfsmount *nmp, int error; nfscl_reqstart(nd, NFSPROC_CLOSE, nmp, op->nfso_fh, - op->nfso_fhlen, NULL); + op->nfso_fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = txdr_unsigned(op->nfso_own->nfsow_seqid); - *tl++ = op->nfso_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; *tl++ = op->nfso_stateid.other[1]; *tl = op->nfso_stateid.other[2]; if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); NFSCL_INCRSEQID(op->nfso_own->nfsow_seqid, nd); if (nd->nd_repstat == 0) NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID); error = nd->nd_repstat; - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); @@ -723,10 +769,13 @@ nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen, { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; + struct nfsmount *nmp; int error; - nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, VFSTONFS(vnode_mount(vp)), - nfhp, fhlen, NULL); + nmp = VFSTONFS(vnode_mount(vp)); + if (NFSHASNFSV4N(nmp)) + return (0); /* No confirmation for NFSv4.1. */ + nfscl_reqstart(nd, NFSPROC_OPENCONFIRM, nmp, nfhp, fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = op->nfso_stateid.seqid; *tl++ = op->nfso_stateid.other[0]; @@ -745,7 +794,7 @@ nfsrpc_openconfirm(vnode_t vp, u_int8_t *nfhp, int fhlen, op->nfso_stateid.other[2] = *tl; } error = nd->nd_repstat; - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(op->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); @@ -757,7 +806,7 @@ nfsmout: * when a mount has just occurred and when the server replies NFSERR_EXPIRED. */ APPLESTATIC int -nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, +nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, int reclaim, struct ucred *cred, NFSPROC_T *p) { u_int32_t *tl; @@ -770,13 +819,58 @@ nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, nfsquad_t confirm; u_int32_t lease; static u_int32_t rev = 0; + struct nfsclds *dsp, *ndsp, *tdsp; if (nfsboottime.tv_sec == 0) NFSSETBOOTTIME(nfsboottime); - nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL); + clp->nfsc_rev = rev++; + if (NFSHASNFSV4N(nmp)) { + error = nfsrpc_exchangeid(nmp, clp, &nmp->nm_sockreq, + NFSV4EXCH_USEPNFSMDS | NFSV4EXCH_USENONPNFS, &dsp, cred, p); + NFSCL_DEBUG(1, "aft exch=%d\n", error); + if (error == 0) { + error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, + &nmp->nm_sockreq, + dsp->nfsclds_sess.nfsess_sequenceid, 1, cred, p); + if (error == 0) { + NFSLOCKMNT(nmp); + TAILQ_FOREACH_SAFE(tdsp, &nmp->nm_sess, + nfsclds_list, ndsp) + nfscl_freenfsclds(tdsp); + TAILQ_INIT(&nmp->nm_sess); + TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, + nfsclds_list); + NFSUNLOCKMNT(nmp); + } else + nfscl_freenfsclds(dsp); + NFSCL_DEBUG(1, "aft createsess=%d\n", error); + } + if (error == 0 && reclaim == 0) { + error = nfsrpc_reclaimcomplete(nmp, cred, p); + NFSCL_DEBUG(1, "aft reclaimcomp=%d\n", error); + if (error == NFSERR_COMPLETEALREADY || + error == NFSERR_NOTSUPP) + /* Ignore this error. */ + error = 0; + } + return (error); + } + + /* + * Allocate a single session structure for NFSv4.0, because some of + * the fields are used by NFSv4.0 although it doesn't do a session. + */ + dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS, M_WAITOK | M_ZERO); + mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); + mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", NULL, MTX_DEF); + NFSLOCKMNT(nmp); + TAILQ_INSERT_HEAD(&nmp->nm_sess, dsp, nfsclds_list); + NFSUNLOCKMNT(nmp); + + nfscl_reqstart(nd, NFSPROC_SETCLIENTID, nmp, NULL, 0, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsboottime.tv_sec); - *tl = txdr_unsigned(rev++); + *tl = txdr_unsigned(clp->nfsc_rev); (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen); /* @@ -827,13 +921,13 @@ nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, *tl = txdr_unsigned(clp->nfsc_cbident); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); - clp->nfsc_clientid.lval[0] = *tl++; - clp->nfsc_clientid.lval[1] = *tl++; + NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0] = *tl++; + NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1] = *tl++; confirm.lval[0] = *tl++; confirm.lval[1] = *tl; mbuf_freem(nd->nd_mrep); @@ -842,28 +936,29 @@ nfsrpc_setclient(struct nfsmount *nmp, struct nfsclclient *clp, /* * and confirm it. */ - nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL); + nfscl_reqstart(nd, NFSPROC_SETCLIENTIDCFRM, nmp, NULL, 0, NULL, + NULL); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED); - *tl++ = clp->nfsc_clientid.lval[0]; - *tl++ = clp->nfsc_clientid.lval[1]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; *tl++ = confirm.lval[0]; *tl = confirm.lval[1]; nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, - cred, NFS_PROG, NFS_VER4, NULL, 1, NULL); + cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; if (nd->nd_repstat == 0) { nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, nmp->nm_fh, - nmp->nm_fhsize, NULL); + nmp->nm_fhsize, NULL, NULL); NFSZERO_ATTRBIT(&attrbits); NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME); (void) nfsrv_putattrbit(nd, &attrbits); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, - cred, NFS_PROG, NFS_VER4, NULL, 1, NULL); + cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { @@ -917,16 +1012,18 @@ nfsrpc_getattr(vnode_t vp, struct ucred *cred, NFSPROC_T *p, */ APPLESTATIC int nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred, - struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp) + struct ucred *cred, NFSPROC_T *p, struct nfsvattr *nap, u_int64_t *xidp, + uint32_t *leasep) { struct nfsrv_descript nfsd, *nd = &nfsd; int error, vers = NFS_VER2; nfsattrbit_t attrbits; - nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL); + nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, fhp, fhlen, NULL, NULL); if (nd->nd_flag & ND_NFSV4) { vers = NFS_VER4; NFSGETATTR_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_LEASETIME); (void) nfsrv_putattrbit(nd, &attrbits); } else if (nd->nd_flag & ND_NFSV3) { vers = NFS_VER3; @@ -934,12 +1031,17 @@ nfsrpc_getattrnovp(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, int syscred, if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, vers, NULL, 1, xidp); + NFS_PROG, vers, NULL, 1, xidp, NULL); if (error) return (error); - if (!nd->nd_repstat) - error = nfsm_loadattr(nd, nap); - else + if (nd->nd_repstat == 0) { + if ((nd->nd_flag & ND_NFSV4) != 0) + error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, + NULL, NULL, NULL, NULL, NULL, 0, NULL, leasep, NULL, + NULL, NULL); + else + error = nfsm_loadattr(nd, nap); + } else error = nd->nd_repstat; mbuf_freem(nd->nd_mrep); return (error); @@ -973,7 +1075,7 @@ nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp, if (NFSHASNFSV4(nmp)) { nfhp = VTONFS(vp)->n_fhp; error = nfscl_getstateid(vp, nfhp->nfh_fh, - nfhp->nfh_len, mode, cred, p, &stateid, &lckp); + nfhp->nfh_len, mode, 0, cred, p, &stateid, &lckp); if (error && vnode_vtype(vp) == VREG && (mode == NFSV4OPEN_ACCESSWRITE || nfstest_openallsetattr)) { @@ -990,7 +1092,7 @@ nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp, if (!openerr) (void) nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, - mode, cred, p, &stateid, &lckp); + mode, 0, cred, p, &stateid, &lckp); } } if (vap != NULL) @@ -999,7 +1101,7 @@ nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp, else error = nfsrpc_setaclrpc(vp, cred, p, aclp, &stateid, stuff); - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); @@ -1007,7 +1109,7 @@ nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp, (void) nfsrpc_close(vp, 0, p); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || - error == NFSERR_OLDSTATEID) { + error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_setattr"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { @@ -1016,6 +1118,7 @@ nfsrpc_setattr(vnode_t vp, struct vattr *vap, NFSACL_T *aclp, retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); @@ -1242,16 +1345,17 @@ nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred, lckp = NULL; if (NFSHASNFSV4(nmp)) (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, - NFSV4OPEN_ACCESSREAD, newcred, p, &stateid, &lckp); + NFSV4OPEN_ACCESSREAD, 0, newcred, p, &stateid, + &lckp); error = nfsrpc_readrpc(vp, uiop, newcred, &stateid, p, nap, attrflagp, stuff); - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || - error == NFSERR_OLDSTATEID) { + error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_read"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { @@ -1260,6 +1364,7 @@ nfsrpc_read(vnode_t vp, struct uio *uiop, struct ucred *cred, retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); @@ -1395,7 +1500,8 @@ nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, nostateid = 0; if (NFSHASNFSV4(nmp)) { (void)nfscl_getstateid(vp, nfhp->nfh_fh, nfhp->nfh_len, - NFSV4OPEN_ACCESSWRITE, newcred, p, &stateid, &lckp); + NFSV4OPEN_ACCESSWRITE, 0, newcred, p, &stateid, + &lckp); if (stateid.other[0] == 0 && stateid.other[1] == 0 && stateid.other[2] == 0) { nostateid = 1; @@ -1413,13 +1519,13 @@ nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, else error = nfsrpc_writerpc(vp, uiop, iomode, must_commit, newcred, &stateid, p, nap, attrflagp, stuff); - if (error == NFSERR_STALESTATEID) + if (error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(nmp->nm_clp); if (lckp != NULL) nfscl_lockderef(lckp); if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || - error == NFSERR_OLDSTATEID) { + error == NFSERR_OLDSTATEID || error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_write"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { @@ -1427,13 +1533,13 @@ nfsrpc_write(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, } retrycnt++; } while (error == NFSERR_GRACE || error == NFSERR_DELAY || - ((error == NFSERR_STALESTATEID || + ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && called_from_strategy == 0) || (error == NFSERR_OLDSTATEID && retrycnt < 20) || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error != 0 && (retrycnt >= 4 || - ((error == NFSERR_STALESTATEID || + ((error == NFSERR_STALESTATEID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && called_from_strategy != 0))) error = EIO; if (NFSHASNFSV4(nmp)) @@ -1747,7 +1853,8 @@ nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap, (*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp); nfscl_ownerrelease(owp, error, newone, unlocked); if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || - error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY) { + error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_open"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { @@ -1756,6 +1863,7 @@ nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap, } } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY || + error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) @@ -1836,7 +1944,9 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, nfsattrbit_t attrbits; nfsv4stateid_t stateid; u_int32_t rflags; + struct nfsmount *nmp; + nmp = VFSTONFS(dvp->v_mount); *unlockedp = 0; *nfhpp = NULL; *dpp = NULL; @@ -1853,16 +1963,32 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, *tl++ = txdr_unsigned(NFSV4OPEN_ACCESSWRITE | NFSV4OPEN_ACCESSREAD); *tl++ = txdr_unsigned(NFSV4OPEN_DENYNONE); - *tl++ = owp->nfsow_clp->nfsc_clientid.lval[0]; - *tl = owp->nfsow_clp->nfsc_clientid.lval[1]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; (void) nfsm_strtom(nd, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN); NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV4OPEN_CREATE); if (fmode & O_EXCL) { - *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE); - NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); - *tl++ = cverf.lval[0]; - *tl = cverf.lval[1]; + if (NFSHASNFSV4N(nmp)) { + if (NFSHASSESSPERSIST(nmp)) { + /* Use GUARDED for persistent sessions. */ + *tl = txdr_unsigned(NFSCREATE_GUARDED); + nfscl_fillsattr(nd, vap, dvp, 0, 0); + } else { + /* Otherwise, use EXCLUSIVE4_1. */ + *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE41); + NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); + *tl++ = cverf.lval[0]; + *tl = cverf.lval[1]; + nfscl_fillsattr(nd, vap, dvp, 0, 0); + } + } else { + /* NFSv4.0 */ + *tl = txdr_unsigned(NFSCREATE_EXCLUSIVE); + NFSM_BUILD(tl, u_int32_t *, NFSX_VERF); + *tl++ = cverf.lval[0]; + *tl = cverf.lval[1]; + } } else { *tl = txdr_unsigned(NFSCREATE_UNCHECKED); nfscl_fillsattr(nd, vap, dvp, 0, 0); @@ -2009,7 +2135,8 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, if (dp != NULL) FREE((caddr_t)dp, M_NFSCLDELEG); if (ret == NFSERR_STALECLIENTID || - ret == NFSERR_STALEDONTRECOVER) + ret == NFSERR_STALEDONTRECOVER || + ret == NFSERR_BADSESSION) error = ret; } } @@ -2018,7 +2145,7 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap, } if (nd->nd_repstat != 0 && error == 0) error = nd->nd_repstat; - if (error == NFSERR_STALECLIENTID) + if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION) nfscl_initiate_recovery(owp->nfsow_clp); nfsmout: if (!error) @@ -2055,7 +2182,10 @@ tryagain: NFSCL_REQSTART(nd, NFSPROC_RETDELEGREMOVE, vp); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); - *tl++ = dstateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = dstateid.seqid; *tl++ = dstateid.other[0]; *tl++ = dstateid.other[1]; *tl++ = dstateid.other[2]; @@ -2138,7 +2268,10 @@ tryagain: } if (gotfd) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); - *tl++ = fdstateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = fdstateid.seqid; *tl++ = fdstateid.other[0]; *tl++ = fdstateid.other[1]; *tl = fdstateid.other[2]; @@ -2154,7 +2287,10 @@ tryagain: } if (gottd) { NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); - *tl++ = tdstateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = tdstateid.seqid; *tl++ = tdstateid.other[0]; *tl++ = tdstateid.other[1]; *tl = tdstateid.other[2]; @@ -3421,13 +3557,13 @@ nfsmout: */ APPLESTATIC int nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred, - NFSPROC_T *p, u_char *verfp, struct nfsvattr *nap, int *attrflagp, - void *stuff) + NFSPROC_T *p, struct nfsvattr *nap, int *attrflagp, void *stuff) { u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; nfsattrbit_t attrbits; int error; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); *attrflagp = 0; NFSCL_REQSTART(nd, NFSPROC_COMMIT, vp); @@ -3450,7 +3586,12 @@ nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred, error = nfscl_wcc_data(nd, vp, nap, attrflagp, NULL, stuff); if (!error && !nd->nd_repstat) { NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); - NFSBCOPY((caddr_t)tl, verfp, NFSX_VERF); + NFSLOCKMNT(nmp); + if (NFSBCMP(nmp->nm_verf, tl, NFSX_VERF)) { + NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); + nd->nd_repstat = NFSERR_STALEWRITEVERF; + } + NFSUNLOCKMNT(nmp); if (nd->nd_flag & ND_NFSV4) error = nfscl_postop_attr(nd, nap, attrflagp, stuff); } @@ -3516,7 +3657,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, do { nd->nd_repstat = 0; if (op == F_GETLK) { - error = nfscl_getcl(vp, cred, p, &clp); + error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (error); error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags); @@ -3533,7 +3674,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, * We must loop around for all lockowner cases. */ callcnt = 0; - error = nfscl_getcl(vp, cred, p, &clp); + error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (error); do { @@ -3610,7 +3751,8 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, error = nd->nd_repstat; if (error == NFSERR_GRACE || error == NFSERR_STALESTATEID || error == NFSERR_STALEDONTRECOVER || - error == NFSERR_STALECLIENTID || error == NFSERR_DELAY) { + error == NFSERR_STALECLIENTID || error == NFSERR_DELAY || + error == NFSERR_BADSESSION) { (void) nfs_catnap(PZERO, error, "nfs_advlock"); } else if ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && clidrev != 0) { @@ -3620,6 +3762,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, } while (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID || error == NFSERR_DELAY || error == NFSERR_STALEDONTRECOVER || error == NFSERR_STALESTATEID || + error == NFSERR_BADSESSION || ((error == NFSERR_EXPIRED || error == NFSERR_BADSTATEID) && expireret == 0 && clidrev != 0 && retrycnt < 4)); if (error && retrycnt >= 4) @@ -3639,7 +3782,9 @@ nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp, int error, type, size; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; struct nfsnode *np; + struct nfsmount *nmp; + nmp = VFSTONFS(vp->v_mount); NFSCL_REQSTART(nd, NFSPROC_LOCKT, vp); NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); if (fl->l_type == F_RDLCK) @@ -3650,8 +3795,8 @@ nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp, tl += 2; txdr_hyper(len, tl); tl += 2; - *tl++ = clp->nfsc_clientid.lval[0]; - *tl = clp->nfsc_clientid.lval[1]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; nfscl_filllockowner(id, own, flags); np = VTONFS(vp); NFSBCOPY(np->n_fhp->nfh_fh, &own[NFSV4CL_LOCKNAMELEN], @@ -3691,7 +3836,8 @@ nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp, error = EBADRPC; if (!error) error = nfsm_advance(nd, NFSM_RNDUP(size), -1); - } else if (nd->nd_repstat == NFSERR_STALECLIENTID) + } else if (nd->nd_repstat == NFSERR_STALECLIENTID || + nd->nd_repstat == NFSERR_BADSESSION) nfscl_initiate_recovery(clp); nfsmout: mbuf_freem(nd->nd_mrep); @@ -3710,7 +3856,7 @@ nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp, int error; nfscl_reqstart(nd, NFSPROC_LOCKU, nmp, lp->nfsl_open->nfso_fh, - lp->nfsl_open->nfso_fhlen, NULL); + lp->nfsl_open->nfso_fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(type); *tl = txdr_unsigned(lp->nfsl_seqid); @@ -3718,7 +3864,10 @@ nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp, (arc4random() % nfstest_outofseq) == 0) *tl = txdr_unsigned(lp->nfsl_seqid + 1); tl++; - *tl++ = lp->nfsl_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = lp->nfsl_stateid.seqid; *tl++ = lp->nfsl_stateid.other[0]; *tl++ = lp->nfsl_stateid.other[1]; *tl++ = lp->nfsl_stateid.other[2]; @@ -3728,7 +3877,7 @@ nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp, if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); NFSCL_INCRSEQID(lp->nfsl_seqid, nd); if (error) return (error); @@ -3738,7 +3887,8 @@ nfsrpc_locku(struct nfsrv_descript *nd, struct nfsmount *nmp, lp->nfsl_stateid.other[0] = *tl++; lp->nfsl_stateid.other[1] = *tl++; lp->nfsl_stateid.other[2] = *tl; - } else if (nd->nd_repstat == NFSERR_STALESTATEID) + } else if (nd->nd_repstat == NFSERR_STALESTATEID || + nd->nd_repstat == NFSERR_BADSESSION) nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); @@ -3758,7 +3908,7 @@ nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp, int error, size; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; - nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL); + nfscl_reqstart(nd, NFSPROC_LOCK, nmp, nfhp, fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED); if (type == F_RDLCK) *tl++ = txdr_unsigned(NFSV4LOCKT_READ); @@ -3774,20 +3924,26 @@ nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp, NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 2 * NFSX_UNSIGNED + NFSX_HYPER); *tl++ = txdr_unsigned(lp->nfsl_open->nfso_own->nfsow_seqid); - *tl++ = lp->nfsl_open->nfso_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = lp->nfsl_open->nfso_stateid.seqid; *tl++ = lp->nfsl_open->nfso_stateid.other[0]; *tl++ = lp->nfsl_open->nfso_stateid.other[1]; *tl++ = lp->nfsl_open->nfso_stateid.other[2]; *tl++ = txdr_unsigned(lp->nfsl_seqid); - *tl++ = lp->nfsl_open->nfso_own->nfsow_clp->nfsc_clientid.lval[0]; - *tl = lp->nfsl_open->nfso_own->nfsow_clp->nfsc_clientid.lval[1]; + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); NFSBCOPY(nfhp, &own[NFSV4CL_LOCKNAMELEN], fhlen); (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); } else { *tl = newnfs_false; NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); - *tl++ = lp->nfsl_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = lp->nfsl_stateid.seqid; *tl++ = lp->nfsl_stateid.other[0]; *tl++ = lp->nfsl_stateid.other[1]; *tl++ = lp->nfsl_stateid.other[2]; @@ -3799,7 +3955,7 @@ nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp, if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, vp, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (newone) @@ -3818,7 +3974,8 @@ nfsrpc_lock(struct nfsrv_descript *nd, struct nfsmount *nmp, vnode_t vp, error = EBADRPC; if (!error) error = nfsm_advance(nd, NFSM_RNDUP(size), -1); - } else if (nd->nd_repstat == NFSERR_STALESTATEID) + } else if (nd->nd_repstat == NFSERR_STALESTATEID || + nd->nd_repstat == NFSERR_BADSESSION) nfscl_initiate_recovery(lp->nfsl_open->nfso_own->nfsow_clp); nfsmout: mbuf_freem(nd->nd_mrep); @@ -4009,24 +4166,34 @@ nfsmout: * This function performs the Renew RPC. */ APPLESTATIC int -nfsrpc_renew(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) +nfsrpc_renew(struct nfsclclient *clp, struct nfsclds *dsp, struct ucred *cred, + NFSPROC_T *p) { u_int32_t *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; struct nfsmount *nmp; int error; + struct nfssockreq *nrp; nmp = clp->nfsc_nmp; if (nmp == NULL) return (0); - nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL); - NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); - *tl++ = clp->nfsc_clientid.lval[0]; - *tl = clp->nfsc_clientid.lval[1]; + nfscl_reqstart(nd, NFSPROC_RENEW, nmp, NULL, 0, NULL, + &dsp->nfsclds_sess); + if (!NFSHASNFSV4N(nmp)) { + /* NFSv4.1 just uses a Sequence Op and not a Renew. */ + NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; + } + nrp = dsp->nfsclds_sockp; + if (nrp == NULL) + /* If NULL, use the MDS socket. */ + nrp = &nmp->nm_sockreq; nd->nd_flag |= ND_USEGSSNAME; - error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); if (error) return (error); error = nd->nd_repstat; @@ -4046,16 +4213,24 @@ nfsrpc_rellockown(struct nfsmount *nmp, struct nfscllockowner *lp, int error; uint8_t own[NFSV4CL_LOCKNAMELEN + NFSX_V4FHMAX]; - nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL); - NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); - *tl++ = nmp->nm_clp->nfsc_clientid.lval[0]; - *tl = nmp->nm_clp->nfsc_clientid.lval[1]; - NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); - NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen); - (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); + if (NFSHASNFSV4N(nmp)) { + /* For NFSv4.1, do a FreeStateID. */ + nfscl_reqstart(nd, NFSPROC_FREESTATEID, nmp, NULL, 0, NULL, + NULL); + nfsm_stateidtom(nd, &lp->nfsl_stateid, NFSSTATEID_PUTSTATEID); + } else { + nfscl_reqstart(nd, NFSPROC_RELEASELCKOWN, nmp, NULL, 0, NULL, + NULL); + NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; + NFSBCOPY(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN); + NFSBCOPY(fh, &own[NFSV4CL_LOCKNAMELEN], fhlen); + (void)nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN + fhlen); + } nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); error = nd->nd_repstat; @@ -4077,7 +4252,7 @@ nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred, int error, cnt, len, setnil; u_int32_t *opcntp; - nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp); + nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL); cp = dirpath; cnt = 0; do { @@ -4101,12 +4276,16 @@ nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred, *cp2++ = '/'; cp = cp2; } while (*cp != '\0'); - *opcntp = txdr_unsigned(2 + cnt); + if (NFSHASNFSV4N(nmp)) + /* Has a Sequence Op done by nfscl_reqstart(). */ + *opcntp = txdr_unsigned(3 + cnt); + else + *opcntp = txdr_unsigned(2 + cnt); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV4OP_GETFH); nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); if (nd->nd_repstat == 0) { @@ -4140,16 +4319,19 @@ nfsrpc_delegreturn(struct nfscldeleg *dp, struct ucred *cred, int error; nfscl_reqstart(nd, NFSPROC_DELEGRETURN, nmp, dp->nfsdl_fh, - dp->nfsdl_fhlen, NULL); + dp->nfsdl_fhlen, NULL, NULL); NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID); - *tl++ = dp->nfsdl_stateid.seqid; + if (NFSHASNFSV4N(nmp)) + *tl++ = 0; + else + *tl++ = dp->nfsdl_stateid.seqid; *tl++ = dp->nfsdl_stateid.other[0]; *tl++ = dp->nfsdl_stateid.other[1]; *tl = dp->nfsdl_stateid.other[2]; if (syscred) nd->nd_flag |= ND_USEGSSNAME; error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, - NFS_PROG, NFS_VER4, NULL, 1, NULL); + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); if (error) return (error); error = nd->nd_repstat; @@ -4230,3 +4412,1466 @@ nfsrpc_setaclrpc(vnode_t vp, struct ucred *cred, NFSPROC_T *p, mbuf_freem(nd->nd_mrep); return (nd->nd_repstat); } + +/* + * Do the NFSv4.1 Exchange ID. + */ +int +nfsrpc_exchangeid(struct nfsmount *nmp, struct nfsclclient *clp, + struct nfssockreq *nrp, uint32_t exchflags, struct nfsclds **dspp, + struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl, v41flags; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + struct nfsclds *dsp; + struct timespec verstime; + int error, len; + + *dspp = NULL; + nfscl_reqstart(nd, NFSPROC_EXCHANGEID, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* Client owner */ + *tl = txdr_unsigned(clp->nfsc_rev); + (void) nfsm_strtom(nd, clp->nfsc_id, clp->nfsc_idlen); + + NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(exchflags); + *tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE); + + /* Set the implementation id4 */ + *tl = txdr_unsigned(1); + (void) nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org")); + (void) nfsm_strtom(nd, version, strlen(version)); + NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME); + verstime.tv_sec = 1293840000; /* Jan 1, 2011 */ + verstime.tv_nsec = 0; + txdr_nfsv4time(&verstime, tl); + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + NFSCL_DEBUG(1, "exchangeid err=%d reps=%d\n", error, + (int)nd->nd_repstat); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_UNSIGNED + NFSX_HYPER); + len = fxdr_unsigned(int, *(tl + 7)); + if (len < 0 || len > NFSV4_OPAQUELIMIT) { + error = NFSERR_BADXDR; + goto nfsmout; + } + dsp = malloc(sizeof(struct nfsclds) + len, M_NFSCLDS, + M_WAITOK | M_ZERO); + dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew; + dsp->nfsclds_servownlen = len; + dsp->nfsclds_sess.nfsess_clientid.lval[0] = *tl++; + dsp->nfsclds_sess.nfsess_clientid.lval[1] = *tl++; + dsp->nfsclds_sess.nfsess_sequenceid = + fxdr_unsigned(uint32_t, *tl++); + v41flags = fxdr_unsigned(uint32_t, *tl); + if ((v41flags & NFSV4EXCH_USEPNFSMDS) != 0 && + NFSHASPNFSOPT(nmp)) { + NFSCL_DEBUG(1, "set PNFS\n"); + NFSLOCKMNT(nmp); + nmp->nm_state |= NFSSTA_PNFS; + NFSUNLOCKMNT(nmp); + dsp->nfsclds_flags |= NFSCLDS_MDS; + } + if ((v41flags & NFSV4EXCH_USEPNFSDS) != 0) + dsp->nfsclds_flags |= NFSCLDS_DS; + if (len > 0) + nd->nd_repstat = nfsrv_mtostr(nd, + dsp->nfsclds_serverown, len); + if (nd->nd_repstat == 0) { + mtx_init(&dsp->nfsclds_mtx, "nfsds", NULL, MTX_DEF); + mtx_init(&dsp->nfsclds_sess.nfsess_mtx, "nfssession", + NULL, MTX_DEF); + nfscl_initsessionslots(&dsp->nfsclds_sess); + *dspp = dsp; + } else + free(dsp, M_NFSCLDS); + } + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 Create Session. + */ +int +nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, + struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred, + NFSPROC_T *p) +{ + uint32_t crflags, *tl; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + int error, irdcnt; + + nfscl_reqstart(nd, NFSPROC_CREATESESSION, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); + *tl++ = sep->nfsess_clientid.lval[0]; + *tl++ = sep->nfsess_clientid.lval[1]; + *tl++ = txdr_unsigned(sequenceid); + crflags = (NFSMNT_RDONLY(nmp->nm_mountp) ? 0 : NFSV4CRSESS_PERSIST); + if (nfscl_enablecallb != 0 && nfs_numnfscbd > 0) + crflags |= NFSV4CRSESS_CONNBACKCHAN; + *tl = txdr_unsigned(crflags); + + /* Fill in fore channel attributes. */ + NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); + *tl++ = 0; /* Header pad size */ + *tl++ = txdr_unsigned(100000); /* Max request size */ + *tl++ = txdr_unsigned(100000); /* Max response size */ + *tl++ = txdr_unsigned(4096); /* Max response size cached */ + *tl++ = txdr_unsigned(20); /* Max operations */ + *tl++ = txdr_unsigned(64); /* Max slots */ + *tl = 0; /* No rdma ird */ + + /* Fill in back channel attributes. */ + NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); + *tl++ = 0; /* Header pad size */ + *tl++ = txdr_unsigned(10000); /* Max request size */ + *tl++ = txdr_unsigned(10000); /* Max response size */ + *tl++ = txdr_unsigned(4096); /* Max response size cached */ + *tl++ = txdr_unsigned(4); /* Max operations */ + *tl++ = txdr_unsigned(NFSV4_CBSLOTS); /* Max slots */ + *tl = 0; /* No rdma ird */ + + NFSM_BUILD(tl, uint32_t *, 8 * NFSX_UNSIGNED); + *tl++ = txdr_unsigned(NFS_CALLBCKPROG); /* Call back prog # */ + + /* Allow AUTH_SYS callbacks as uid, gid == 0. */ + *tl++ = txdr_unsigned(1); /* Auth_sys only */ + *tl++ = txdr_unsigned(AUTH_SYS); /* AUTH_SYS type */ + *tl++ = txdr_unsigned(nfsboottime.tv_sec); /* time stamp */ + *tl++ = 0; /* Null machine name */ + *tl++ = 0; /* Uid == 0 */ + *tl++ = 0; /* Gid == 0 */ + *tl = 0; /* No additional gids */ + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, nrp, NULL, p, cred, NFS_PROG, + NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + + 2 * NFSX_UNSIGNED); + bcopy(tl, sep->nfsess_sessionid, NFSX_V4SESSIONID); + tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; + sep->nfsess_sequenceid = fxdr_unsigned(uint32_t, *tl++); + crflags = fxdr_unsigned(uint32_t, *tl); + if ((crflags & NFSV4CRSESS_PERSIST) != 0 && mds != 0) { + NFSLOCKMNT(nmp); + nmp->nm_state |= NFSSTA_SESSPERSIST; + NFSUNLOCKMNT(nmp); + } + + /* Get the fore channel slot count. */ + NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); + tl += 3; /* Skip the other counts. */ + sep->nfsess_maxcache = fxdr_unsigned(int, *tl++); + tl++; + sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++); + NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots); + irdcnt = fxdr_unsigned(int, *tl); + if (irdcnt > 0) + NFSM_DISSECT(tl, uint32_t *, irdcnt * NFSX_UNSIGNED); + + /* and the back channel slot count. */ + NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); + tl += 5; + sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl); + NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots); + } + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 Destroy Session. + */ +int +nfsrpc_destroysession(struct nfsmount *nmp, struct nfsclclient *clp, + struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + int error; + + nfscl_reqstart(nd, NFSPROC_DESTROYSESSION, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID); + bcopy(NFSMNT_MDSSESSION(nmp)->nfsess_sessionid, tl, NFSX_V4SESSIONID); + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 Destroy Client. + */ +int +nfsrpc_destroyclient(struct nfsmount *nmp, struct nfsclclient *clp, + struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + int error; + + nfscl_reqstart(nd, NFSPROC_DESTROYCLIENT, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED); + *tl++ = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[0]; + *tl = NFSMNT_MDSSESSION(nmp)->nfsess_clientid.lval[1]; + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 LayoutGet. + */ +int +nfsrpc_layoutget(struct nfsmount *nmp, uint8_t *fhp, int fhlen, int iomode, + uint64_t offset, uint64_t len, uint64_t minlen, int layoutlen, + nfsv4stateid_t *stateidp, int *retonclosep, struct nfsclflayouthead *flhp, + struct ucred *cred, NFSPROC_T *p, void *stuff) +{ + uint32_t *tl; + struct nfsrv_descript nfsd, *nd = &nfsd; + struct nfsfh *nfhp; + struct nfsclflayout *flp, *prevflp, *tflp; + int cnt, error, gotiomode, fhcnt, nfhlen, i, j; + uint8_t *cp; + uint64_t retlen; + + flp = NULL; + gotiomode = -1; + nfscl_reqstart(nd, NFSPROC_LAYOUTGET, nmp, fhp, fhlen, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER + + NFSX_STATEID); + *tl++ = newnfs_false; /* Don't signal availability. */ + *tl++ = txdr_unsigned(NFSLAYOUT_NFSV4_1_FILES); + *tl++ = txdr_unsigned(iomode); + txdr_hyper(offset, tl); + tl += 2; + txdr_hyper(len, tl); + tl += 2; + txdr_hyper(minlen, tl); + tl += 2; + *tl++ = txdr_unsigned(stateidp->seqid); + NFSCL_DEBUG(4, "layget seq=%d\n", (int)stateidp->seqid); + *tl++ = stateidp->other[0]; + *tl++ = stateidp->other[1]; + *tl++ = stateidp->other[2]; + *tl = txdr_unsigned(layoutlen); + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_STATEID); + if (*tl++ != 0) + *retonclosep = 1; + else + *retonclosep = 0; + stateidp->seqid = fxdr_unsigned(uint32_t, *tl++); + NFSCL_DEBUG(4, "retoncls=%d stseq=%d\n", *retonclosep, + (int)stateidp->seqid); + stateidp->other[0] = *tl++; + stateidp->other[1] = *tl++; + stateidp->other[2] = *tl++; + cnt = fxdr_unsigned(int, *tl); + NFSCL_DEBUG(4, "layg cnt=%d\n", cnt); + if (cnt <= 0 || cnt > 10000) { + /* Don't accept more than 10000 layouts in reply. */ + error = NFSERR_BADXDR; + goto nfsmout; + } + for (i = 0; i < cnt; i++) { + /* Dissect all the way to the file handle cnt. */ + NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_HYPER + + 6 * NFSX_UNSIGNED + NFSX_V4DEVICEID); + fhcnt = fxdr_unsigned(int, *(tl + 11 + + NFSX_V4DEVICEID / NFSX_UNSIGNED)); + NFSCL_DEBUG(4, "fhcnt=%d\n", fhcnt); + if (fhcnt < 0 || fhcnt > 100) { + /* Don't accept more than 100 file handles. */ + error = NFSERR_BADXDR; + goto nfsmout; + } + if (fhcnt > 1) + flp = malloc(sizeof(*flp) + (fhcnt - 1) * + sizeof(struct nfsfh *), + M_NFSFLAYOUT, M_WAITOK); + else + flp = malloc(sizeof(*flp), + M_NFSFLAYOUT, M_WAITOK); + flp->nfsfl_flags = 0; + flp->nfsfl_fhcnt = 0; + flp->nfsfl_devp = NULL; + flp->nfsfl_off = fxdr_hyper(tl); tl += 2; + retlen = fxdr_hyper(tl); tl += 2; + if (flp->nfsfl_off + retlen < flp->nfsfl_off) + flp->nfsfl_end = UINT64_MAX - flp->nfsfl_off; + else + flp->nfsfl_end = flp->nfsfl_off + retlen; + flp->nfsfl_iomode = fxdr_unsigned(int, *tl++); + if (gotiomode == -1) + gotiomode = flp->nfsfl_iomode; + NFSCL_DEBUG(4, "layg reqiom=%d retiom=%d\n", iomode, + (int)flp->nfsfl_iomode); + if (fxdr_unsigned(int, *tl++) != + NFSLAYOUT_NFSV4_1_FILES) { + printf("NFSv4.1: got non-files layout\n"); + error = NFSERR_BADXDR; + goto nfsmout; + } + NFSBCOPY(++tl, flp->nfsfl_dev, NFSX_V4DEVICEID); + tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); + flp->nfsfl_util = fxdr_unsigned(uint32_t, *tl++); + NFSCL_DEBUG(4, "flutil=0x%x\n", flp->nfsfl_util); + flp->nfsfl_stripe1 = fxdr_unsigned(uint32_t, *tl++); + flp->nfsfl_patoff = fxdr_hyper(tl); tl += 2; + if (fxdr_unsigned(int, *tl) != fhcnt) { + printf("EEK! bad fhcnt\n"); + error = NFSERR_BADXDR; + goto nfsmout; + } + for (j = 0; j < fhcnt; j++) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + nfhlen = fxdr_unsigned(int, *tl); + if (nfhlen <= 0 || nfhlen > NFSX_V4FHMAX) { + error = NFSERR_BADXDR; + goto nfsmout; + } + nfhp = malloc(sizeof(*nfhp) + nfhlen - 1, + M_NFSFH, M_WAITOK); + flp->nfsfl_fh[j] = nfhp; + flp->nfsfl_fhcnt++; + nfhp->nfh_len = nfhlen; + NFSM_DISSECT(cp, uint8_t *, NFSM_RNDUP(nfhlen)); + NFSBCOPY(cp, nfhp->nfh_fh, nfhlen); + } + if (flp->nfsfl_iomode == gotiomode) { + /* Keep the list in increasing offset order. */ + tflp = LIST_FIRST(flhp); + prevflp = NULL; + while (tflp != NULL && + tflp->nfsfl_off < flp->nfsfl_off) { + prevflp = tflp; + tflp = LIST_NEXT(tflp, nfsfl_list); + } + if (prevflp == NULL) + LIST_INSERT_HEAD(flhp, flp, nfsfl_list); + else + LIST_INSERT_AFTER(prevflp, flp, + nfsfl_list); + } else { + printf("nfscl_layoutget(): got wrong iomode\n"); + nfscl_freeflayout(flp); + } + flp = NULL; + } + } + if (nd->nd_repstat != 0 && error == 0) + error = nd->nd_repstat; +nfsmout: + if (error != 0 && flp != NULL) + nfscl_freeflayout(flp); + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 Get Device Info. + */ +int +nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, + uint32_t *notifybitsp, struct nfscldevinfo **ndip, struct ucred *cred, + NFSPROC_T *p) +{ + uint32_t cnt, *tl; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + struct sockaddr_storage ss; + struct nfsclds *dsp = NULL, **dspp; + struct nfscldevinfo *ndi; + int addrcnt, bitcnt, error, i, isudp, j, pos, safilled, stripecnt; + uint8_t stripeindex; + + *ndip = NULL; + ndi = NULL; + nfscl_reqstart(nd, NFSPROC_GETDEVICEINFO, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, NFSX_V4DEVICEID + 3 * NFSX_UNSIGNED); + NFSBCOPY(deviceid, tl, NFSX_V4DEVICEID); + tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED); + *tl++ = txdr_unsigned(layouttype); + *tl++ = txdr_unsigned(100000); + if (notifybitsp != NULL && *notifybitsp != 0) { + *tl = txdr_unsigned(1); /* One word of bits. */ + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = txdr_unsigned(*notifybitsp); + } else + *tl = txdr_unsigned(0); + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED); + if (layouttype != fxdr_unsigned(int, *tl++)) + printf("EEK! devinfo layout type not same!\n"); + stripecnt = fxdr_unsigned(int, *++tl); + NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt); + if (stripecnt < 1 || stripecnt > 4096) { + printf("NFS devinfo stripecnt %d: out of range\n", + stripecnt); + error = NFSERR_BADXDR; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, (stripecnt + 1) * NFSX_UNSIGNED); + addrcnt = fxdr_unsigned(int, *(tl + stripecnt)); + NFSCL_DEBUG(4, "addrcnt=%d\n", addrcnt); + if (addrcnt < 1 || addrcnt > 128) { + printf("NFS devinfo addrcnt %d: out of range\n", + addrcnt); + error = NFSERR_BADXDR; + goto nfsmout; + } + + /* + * Now we know how many stripe indices and addresses, so + * we can allocate the structure the correct size. + */ + i = (stripecnt * sizeof(uint8_t)) / sizeof(struct nfsclds *) + + 1; + NFSCL_DEBUG(4, "stripeindices=%d\n", i); + ndi = malloc(sizeof(*ndi) + (addrcnt + i) * + sizeof(struct nfsclds *), M_NFSDEVINFO, M_WAITOK | M_ZERO); + NFSBCOPY(deviceid, ndi->nfsdi_deviceid, NFSX_V4DEVICEID); + ndi->nfsdi_refcnt = 0; + ndi->nfsdi_stripecnt = stripecnt; + ndi->nfsdi_addrcnt = addrcnt; + /* Fill in the stripe indices. */ + for (i = 0; i < stripecnt; i++) { + stripeindex = fxdr_unsigned(uint8_t, *tl++); + NFSCL_DEBUG(4, "stripeind=%d\n", stripeindex); + if (stripeindex >= addrcnt) { + printf("NFS devinfo stripeindex %d: too big\n", + (int)stripeindex); + error = NFSERR_BADXDR; + goto nfsmout; + } + nfsfldi_setstripeindex(ndi, i, stripeindex); + } + + /* Now, dissect the server address(es). */ + safilled = 0; + for (i = 0; i < addrcnt; i++) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + cnt = fxdr_unsigned(uint32_t, *tl); + if (cnt == 0) { + printf("NFS devinfo 0 len addrlist\n"); + error = NFSERR_BADXDR; + goto nfsmout; + } + dspp = nfsfldi_addr(ndi, i); + pos = arc4random() % cnt; /* Choose one. */ + safilled = 0; + for (j = 0; j < cnt; j++) { + error = nfsv4_getipaddr(nd, &ss, &isudp); + if (error != 0 && error != EPERM) { + error = NFSERR_BADXDR; + goto nfsmout; + } + if (error == 0 && isudp == 0) { + /* + * The algorithm is: + * - use "pos" entry if it is of the + * same af_family or none of them + * is of the same af_family + * else + * - use the first one of the same + * af_family. + */ + if ((safilled == 0 && ss.ss_family == + nmp->nm_nam->sa_family) || + (j == pos && + (safilled == 0 || ss.ss_family == + nmp->nm_nam->sa_family)) || + (safilled == 1 && ss.ss_family == + nmp->nm_nam->sa_family)) { + error = nfsrpc_fillsa(nmp, &ss, + &dsp, p); + if (error == 0) { + *dspp = dsp; + if (ss.ss_family == + nmp->nm_nam->sa_family) + safilled = 2; + else + safilled = 1; + } + } + } + } + if (safilled == 0) + break; + } + + /* And the notify bits. */ + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + if (safilled != 0) { + bitcnt = fxdr_unsigned(int, *tl); + if (bitcnt > 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + if (notifybitsp != NULL) + *notifybitsp = + fxdr_unsigned(uint32_t, *tl); + } + *ndip = ndi; + } else + error = EPERM; + } + if (nd->nd_repstat != 0) + error = nd->nd_repstat; +nfsmout: + if (error != 0 && ndi != NULL) + nfscl_freedevinfo(ndi); + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 LayoutCommit. + */ +int +nfsrpc_layoutcommit(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim, + uint64_t off, uint64_t len, uint64_t lastbyte, nfsv4stateid_t *stateidp, + int layouttype, int layoutupdatecnt, uint8_t *layp, struct ucred *cred, + NFSPROC_T *p, void *stuff) +{ + uint32_t *tl; + struct nfsrv_descript nfsd, *nd = &nfsd; + int error, outcnt, i; + uint8_t *cp; + + nfscl_reqstart(nd, NFSPROC_LAYOUTCOMMIT, nmp, fh, fhlen, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED + 3 * NFSX_HYPER + + NFSX_STATEID); + txdr_hyper(off, tl); + tl += 2; + txdr_hyper(len, tl); + tl += 2; + if (reclaim != 0) + *tl++ = newnfs_true; + else + *tl++ = newnfs_false; + *tl++ = txdr_unsigned(stateidp->seqid); + *tl++ = stateidp->other[0]; + *tl++ = stateidp->other[1]; + *tl++ = stateidp->other[2]; + *tl++ = newnfs_true; + if (lastbyte < off) + lastbyte = off; + else if (lastbyte >= (off + len)) + lastbyte = off + len - 1; + txdr_hyper(lastbyte, tl); + tl += 2; + *tl++ = newnfs_false; + *tl++ = txdr_unsigned(layouttype); + *tl = txdr_unsigned(layoutupdatecnt); + if (layoutupdatecnt > 0) { + KASSERT(layouttype != NFSLAYOUT_NFSV4_1_FILES, + ("Must be nil for Files Layout")); + outcnt = NFSM_RNDUP(layoutupdatecnt); + NFSM_BUILD(cp, uint8_t *, outcnt); + NFSBCOPY(layp, cp, layoutupdatecnt); + cp += layoutupdatecnt; + for (i = 0; i < (outcnt - layoutupdatecnt); i++) + *cp++ = 0x0; + } + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Do the NFSv4.1 LayoutReturn. + */ +int +nfsrpc_layoutreturn(struct nfsmount *nmp, uint8_t *fh, int fhlen, int reclaim, + int layouttype, uint32_t iomode, int layoutreturn, uint64_t offset, + uint64_t len, nfsv4stateid_t *stateidp, int layoutcnt, uint32_t *layp, + struct ucred *cred, NFSPROC_T *p, void *stuff) +{ + uint32_t *tl; + struct nfsrv_descript nfsd, *nd = &nfsd; + int error, outcnt, i; + uint8_t *cp; + + nfscl_reqstart(nd, NFSPROC_LAYOUTRETURN, nmp, fh, fhlen, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); + if (reclaim != 0) + *tl++ = newnfs_true; + else + *tl++ = newnfs_false; + *tl++ = txdr_unsigned(layouttype); + *tl++ = txdr_unsigned(iomode); + *tl = txdr_unsigned(layoutreturn); + if (layoutreturn == NFSLAYOUTRETURN_FILE) { + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID + + NFSX_UNSIGNED); + txdr_hyper(offset, tl); + tl += 2; + txdr_hyper(len, tl); + tl += 2; + NFSCL_DEBUG(4, "layoutret stseq=%d\n", (int)stateidp->seqid); + *tl++ = txdr_unsigned(stateidp->seqid); + *tl++ = stateidp->other[0]; + *tl++ = stateidp->other[1]; + *tl++ = stateidp->other[2]; + *tl = txdr_unsigned(layoutcnt); + if (layoutcnt > 0) { + outcnt = NFSM_RNDUP(layoutcnt); + NFSM_BUILD(cp, uint8_t *, outcnt); + NFSBCOPY(layp, cp, layoutcnt); + cp += layoutcnt; + for (i = 0; i < (outcnt - layoutcnt); i++) + *cp++ = 0x0; + } + } + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + if (*tl != 0) { + NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID); + stateidp->seqid = fxdr_unsigned(uint32_t, *tl++); + stateidp->other[0] = *tl++; + stateidp->other[1] = *tl++; + stateidp->other[2] = *tl; + } + } else + error = nd->nd_repstat; +nfsmout: + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Acquire a layout and devinfo, if possible. The caller must have acquired + * a reference count on the nfsclclient structure before calling this. + * Return the layout in lypp with a reference count on it, if successful. + */ +static int +nfsrpc_getlayout(struct nfsmount *nmp, vnode_t vp, struct nfsfh *nfhp, + int iomode, uint32_t *notifybitsp, nfsv4stateid_t *stateidp, uint64_t off, + struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p) +{ + struct nfscllayout *lyp; + struct nfsclflayout *flp, *tflp; + struct nfscldevinfo *dip; + struct nfsclflayouthead flh; + int error = 0, islocked, layoutlen, recalled, retonclose; + nfsv4stateid_t stateid; + + *lypp = NULL; + /* + * If lyp is returned non-NULL, there will be a refcnt (shared lock) + * on it, iff flp != NULL or a lock (exclusive lock) on it iff + * flp == NULL. + */ + lyp = nfscl_getlayout(nmp->nm_clp, nfhp->nfh_fh, nfhp->nfh_len, + off, &flp, &recalled); + islocked = 0; + if (lyp == NULL || flp == NULL) { + if (recalled != 0) + return (EIO); + LIST_INIT(&flh); + layoutlen = NFSMNT_MDSSESSION(nmp)->nfsess_maxcache - + (NFSX_STATEID + 3 * NFSX_UNSIGNED); + if (lyp == NULL) { + stateid.seqid = 0; + stateid.other[0] = stateidp->other[0]; + stateid.other[1] = stateidp->other[1]; + stateid.other[2] = stateidp->other[2]; + error = nfsrpc_layoutget(nmp, nfhp->nfh_fh, + nfhp->nfh_len, iomode, (uint64_t)0, INT64_MAX, + (uint64_t)0, layoutlen, &stateid, &retonclose, + &flh, cred, p, NULL); + } else { + islocked = 1; + stateid.seqid = lyp->nfsly_stateid.seqid; + stateid.other[0] = lyp->nfsly_stateid.other[0]; + stateid.other[1] = lyp->nfsly_stateid.other[1]; + stateid.other[2] = lyp->nfsly_stateid.other[2]; + error = nfsrpc_layoutget(nmp, nfhp->nfh_fh, + nfhp->nfh_len, iomode, off, INT64_MAX, + (uint64_t)0, layoutlen, &stateid, &retonclose, + &flh, cred, p, NULL); + } + if (error == 0) + LIST_FOREACH(tflp, &flh, nfsfl_list) { + error = nfscl_adddevinfo(nmp, NULL, tflp); + if (error != 0) { + error = nfsrpc_getdeviceinfo(nmp, + tflp->nfsfl_dev, + NFSLAYOUT_NFSV4_1_FILES, + notifybitsp, &dip, cred, p); + if (error != 0) + break; + error = nfscl_adddevinfo(nmp, dip, + tflp); + if (error != 0) + printf( + "getlayout: cannot add\n"); + } + } + if (error == 0) { + /* + * nfscl_layout() always returns with the nfsly_lock + * set to a refcnt (shared lock). + */ + error = nfscl_layout(nmp, vp, nfhp->nfh_fh, + nfhp->nfh_len, &stateid, retonclose, &flh, &lyp, + cred, p); + if (error == 0) + *lypp = lyp; + } else if (islocked != 0) + nfsv4_unlock(&lyp->nfsly_lock, 0); + } else + *lypp = lyp; + return (error); +} + +/* + * Do a TCP connection plus exchange id and create session. + * If successful, a "struct nfsclds" is linked into the list for the + * mount point and a pointer to it is returned. + */ +static int +nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_storage *ssp, + struct nfsclds **dspp, NFSPROC_T *p) +{ + struct sockaddr_in *msad, *sad, *ssd; + struct sockaddr_in6 *msad6, *sad6, *ssd6; + struct nfsclclient *clp; + struct nfssockreq *nrp; + struct nfsclds *dsp, *tdsp; + int error; + enum nfsclds_state retv; + uint32_t sequenceid; + + KASSERT(nmp->nm_sockreq.nr_cred != NULL, + ("nfsrpc_fillsa: NULL nr_cred")); + NFSLOCKCLSTATE(); + clp = nmp->nm_clp; + NFSUNLOCKCLSTATE(); + if (clp == NULL) + return (EPERM); + if (ssp->ss_family == AF_INET) { + ssd = (struct sockaddr_in *)ssp; + NFSLOCKMNT(nmp); + + /* + * Check to see if we already have a session for this + * address that is usable for a DS. + * Note that the MDS's address is in a different place + * than the sessions already acquired for DS's. + */ + msad = (struct sockaddr_in *)nmp->nm_sockreq.nr_nam; + tdsp = TAILQ_FIRST(&nmp->nm_sess); + while (tdsp != NULL) { + if (msad != NULL && msad->sin_family == AF_INET && + ssd->sin_addr.s_addr == msad->sin_addr.s_addr && + ssd->sin_port == msad->sin_port && + (tdsp->nfsclds_flags & NFSCLDS_DS) != 0) { + *dspp = tdsp; + NFSUNLOCKMNT(nmp); + NFSCL_DEBUG(4, "fnd same addr\n"); + return (0); + } + tdsp = TAILQ_NEXT(tdsp, nfsclds_list); + if (tdsp != NULL && tdsp->nfsclds_sockp != NULL) + msad = (struct sockaddr_in *) + tdsp->nfsclds_sockp->nr_nam; + else + msad = NULL; + } + NFSUNLOCKMNT(nmp); + + /* No IP address match, so look for new/trunked one. */ + sad = malloc(sizeof(*sad), M_SONAME, M_WAITOK | M_ZERO); + sad->sin_len = sizeof(*sad); + sad->sin_family = AF_INET; + sad->sin_port = ssd->sin_port; + sad->sin_addr.s_addr = ssd->sin_addr.s_addr; + nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO); + nrp->nr_nam = (struct sockaddr *)sad; + } else if (ssp->ss_family == AF_INET6) { + ssd6 = (struct sockaddr_in6 *)ssp; + NFSLOCKMNT(nmp); + + /* + * Check to see if we already have a session for this + * address that is usable for a DS. + * Note that the MDS's address is in a different place + * than the sessions already acquired for DS's. + */ + msad6 = (struct sockaddr_in6 *)nmp->nm_sockreq.nr_nam; + tdsp = TAILQ_FIRST(&nmp->nm_sess); + while (tdsp != NULL) { + if (msad6 != NULL && msad6->sin6_family == AF_INET6 && + IN6_ARE_ADDR_EQUAL(&ssd6->sin6_addr, + &msad6->sin6_addr) && + ssd6->sin6_port == msad6->sin6_port && + (tdsp->nfsclds_flags & NFSCLDS_DS) != 0) { + *dspp = tdsp; + NFSUNLOCKMNT(nmp); + return (0); + } + tdsp = TAILQ_NEXT(tdsp, nfsclds_list); + if (tdsp != NULL && tdsp->nfsclds_sockp != NULL) + msad6 = (struct sockaddr_in6 *) + tdsp->nfsclds_sockp->nr_nam; + else + msad6 = NULL; + } + NFSUNLOCKMNT(nmp); + + /* No IP address match, so look for new/trunked one. */ + sad6 = malloc(sizeof(*sad6), M_SONAME, M_WAITOK | M_ZERO); + sad6->sin6_len = sizeof(*sad6); + sad6->sin6_family = AF_INET6; + sad6->sin6_port = ssd6->sin6_port; + NFSBCOPY(&ssd6->sin6_addr, &sad6->sin6_addr, + sizeof(struct in6_addr)); + nrp = malloc(sizeof(*nrp), M_NFSSOCKREQ, M_WAITOK | M_ZERO); + nrp->nr_nam = (struct sockaddr *)sad6; + } else + return (EPERM); + + nrp->nr_sotype = SOCK_STREAM; + mtx_init(&nrp->nr_mtx, "nfssock", NULL, MTX_DEF); + nrp->nr_prog = NFS_PROG; + nrp->nr_vers = NFS_VER4; + + /* + * Use the credentials that were used for the mount, which are + * in nmp->nm_sockreq.nr_cred for newnfs_connect() etc. + * Ref. counting the credentials with crhold() is probably not + * necessary, since nm_sockreq.nr_cred won't be crfree()'d until + * unmount, but I did it anyhow. + */ + nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred); + error = newnfs_connect(nmp, nrp, NULL, p, 0); + NFSCL_DEBUG(3, "DS connect=%d\n", error); + + /* Now, do the exchangeid and create session. */ + if (error == 0) + error = nfsrpc_exchangeid(nmp, clp, nrp, NFSV4EXCH_USEPNFSDS, + &dsp, nrp->nr_cred, p); + NFSCL_DEBUG(3, "DS exchangeid=%d\n", error); + if (error == 0) { + dsp->nfsclds_sockp = nrp; + NFSLOCKMNT(nmp); + retv = nfscl_getsameserver(nmp, dsp, &tdsp); + NFSCL_DEBUG(3, "getsame ret=%d\n", retv); + if (retv == NFSDSP_USETHISSESSION) { + NFSUNLOCKMNT(nmp); + /* + * If there is already a session for this server, + * use it. + */ + (void)newnfs_disconnect(nrp); + nfscl_freenfsclds(dsp); + *dspp = tdsp; + return (0); + } + if (retv == NFSDSP_SEQTHISSESSION) + sequenceid = tdsp->nfsclds_sess.nfsess_sequenceid; + else + sequenceid = dsp->nfsclds_sess.nfsess_sequenceid; + NFSUNLOCKMNT(nmp); + error = nfsrpc_createsession(nmp, &dsp->nfsclds_sess, + nrp, sequenceid, 0, nrp->nr_cred, p); + NFSCL_DEBUG(3, "DS createsess=%d\n", error); + } else { + NFSFREECRED(nrp->nr_cred); + NFSFREEMUTEX(&nrp->nr_mtx); + free(nrp->nr_nam, M_SONAME); + free(nrp, M_NFSSOCKREQ); + } + if (error == 0) { + NFSCL_DEBUG(3, "add DS session\n"); + /* + * Put it at the end of the list. That way the list + * is ordered by when the entry was added. This matters + * since the one done first is the one that should be + * used for sequencid'ing any subsequent create sessions. + */ + NFSLOCKMNT(nmp); + TAILQ_INSERT_TAIL(&nmp->nm_sess, dsp, nfsclds_list); + NFSUNLOCKMNT(nmp); + *dspp = dsp; + } else if (dsp != NULL) + nfscl_freenfsclds(dsp); + return (error); +} + +/* + * Do the NFSv4.1 Reclaim Complete. + */ +int +nfsrpc_reclaimcomplete(struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + int error; + + nfscl_reqstart(nd, NFSPROC_RECLAIMCOMPL, nmp, NULL, 0, NULL, NULL); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + *tl = newnfs_false; + nd->nd_flag |= ND_USEGSSNAME; + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) + return (error); + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * Initialize the slot tables for a session. + */ +static void +nfscl_initsessionslots(struct nfsclsession *sep) +{ + int i; + + for (i = 0; i < NFSV4_CBSLOTS; i++) { + if (sep->nfsess_cbslots[i].nfssl_reply != NULL) + m_freem(sep->nfsess_cbslots[i].nfssl_reply); + NFSBZERO(&sep->nfsess_cbslots[i], sizeof(struct nfsslot)); + } + for (i = 0; i < 64; i++) + sep->nfsess_slotseq[i] = 0; + sep->nfsess_slots = 0; +} + +/* + * Called to try and do an I/O operation via an NFSv4.1 Data Server (DS). + */ +int +nfscl_doiods(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, + uint32_t rwaccess, struct ucred *cred, NFSPROC_T *p) +{ + struct nfsnode *np = VTONFS(vp); + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + struct nfscllayout *layp; + struct nfscldevinfo *dip; + struct nfsclflayout *rflp; + nfsv4stateid_t stateid; + struct ucred *newcred; + uint64_t lastbyte, len, off, oresid, xfer; + int eof, error, iolaymode, recalled; + void *lckp; + + if (!NFSHASPNFS(nmp) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 || + (np->n_flag & NNOLAYOUT) != 0) + return (EIO); + /* Now, get a reference cnt on the clientid for this mount. */ + if (nfscl_getref(nmp) == 0) + return (EIO); + + /* Find an appropriate stateid. */ + newcred = NFSNEWCRED(cred); + error = nfscl_getstateid(vp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, + rwaccess, 1, newcred, p, &stateid, &lckp); + if (error != 0) { + NFSFREECRED(newcred); + nfscl_relref(nmp); + return (error); + } + /* Search for a layout for this file. */ + off = uiop->uio_offset; + layp = nfscl_getlayout(nmp->nm_clp, np->n_fhp->nfh_fh, + np->n_fhp->nfh_len, off, &rflp, &recalled); + if (layp == NULL || rflp == NULL) { + if (recalled != 0) { + NFSFREECRED(newcred); + nfscl_relref(nmp); + return (EIO); + } + if (layp != NULL) { + nfscl_rellayout(layp, (rflp == NULL) ? 1 : 0); + layp = NULL; + } + /* Try and get a Layout, if it is supported. */ + if (rwaccess == NFSV4OPEN_ACCESSWRITE || + (np->n_flag & NWRITEOPENED) != 0) + iolaymode = NFSLAYOUTIOMODE_RW; + else + iolaymode = NFSLAYOUTIOMODE_READ; + error = nfsrpc_getlayout(nmp, vp, np->n_fhp, iolaymode, + NULL, &stateid, off, &layp, newcred, p); + if (error != 0) { + NFSLOCKNODE(np); + np->n_flag |= NNOLAYOUT; + NFSUNLOCKNODE(np); + if (lckp != NULL) + nfscl_lockderef(lckp); + NFSFREECRED(newcred); + if (layp != NULL) + nfscl_rellayout(layp, 0); + nfscl_relref(nmp); + return (error); + } + } + + /* + * Loop around finding a layout that works for the first part of + * this I/O operation, and then call the function that actually + * does the RPC. + */ + eof = 0; + len = (uint64_t)uiop->uio_resid; + while (len > 0 && error == 0 && eof == 0) { + off = uiop->uio_offset; + error = nfscl_findlayoutforio(layp, off, rwaccess, &rflp); + if (error == 0) { + oresid = xfer = (uint64_t)uiop->uio_resid; + if (xfer > (rflp->nfsfl_end - rflp->nfsfl_off)) + xfer = rflp->nfsfl_end - rflp->nfsfl_off; + dip = nfscl_getdevinfo(nmp->nm_clp, rflp->nfsfl_dev, + rflp->nfsfl_devp); + if (dip != NULL) { + error = nfscl_doflayoutio(vp, uiop, iomode, + must_commit, &eof, &stateid, rwaccess, dip, + layp, rflp, off, xfer, newcred, p); + nfscl_reldevinfo(dip); + lastbyte = off + xfer - 1; + if (error == 0) { + NFSLOCKCLSTATE(); + if (lastbyte > layp->nfsly_lastbyte) + layp->nfsly_lastbyte = lastbyte; + NFSUNLOCKCLSTATE(); + } + } else + error = EIO; + if (error == 0) + len -= (oresid - (uint64_t)uiop->uio_resid); + } + } + if (lckp != NULL) + nfscl_lockderef(lckp); + NFSFREECRED(newcred); + nfscl_rellayout(layp, 0); + nfscl_relref(nmp); + return (error); +} + +/* + * Find a file layout that will handle the first bytes of the requested + * range and return the information from it needed to to the I/O operation. + */ +int +nfscl_findlayoutforio(struct nfscllayout *lyp, uint64_t off, uint32_t rwaccess, + struct nfsclflayout **retflpp) +{ + struct nfsclflayout *flp, *nflp, *rflp; + uint32_t rw; + + rflp = NULL; + rw = rwaccess; + /* For reading, do the Read list first and then the Write list. */ + do { + if (rw == NFSV4OPEN_ACCESSREAD) + flp = LIST_FIRST(&lyp->nfsly_flayread); + else + flp = LIST_FIRST(&lyp->nfsly_flayrw); + while (flp != NULL) { + nflp = LIST_NEXT(flp, nfsfl_list); + if (flp->nfsfl_off > off) + break; + if (flp->nfsfl_end > off && + (rflp == NULL || rflp->nfsfl_end < flp->nfsfl_end)) + rflp = flp; + flp = nflp; + } + if (rw == NFSV4OPEN_ACCESSREAD) + rw = NFSV4OPEN_ACCESSWRITE; + else + rw = 0; + } while (rw != 0); + if (rflp != NULL) { + /* This one covers the most bytes starting at off. */ + *retflpp = rflp; + return (0); + } + return (EIO); +} + +/* + * Do I/O using an NFSv4.1 file layout. + */ +static int +nfscl_doflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, + int *eofp, nfsv4stateid_t *stateidp, int rwflag, struct nfscldevinfo *dp, + struct nfscllayout *lyp, struct nfsclflayout *flp, uint64_t off, + uint64_t len, struct ucred *cred, NFSPROC_T *p) +{ + uint64_t io_off, rel_off, stripe_unit_size, transfer, xfer; + int commit_thru_mds, error = 0, stripe_index, stripe_pos; + struct nfsnode *np; + struct nfsfh *fhp; + struct nfsclds **dspp; + + np = VTONFS(vp); + rel_off = off - flp->nfsfl_patoff; + stripe_unit_size = (flp->nfsfl_util >> 6) & 0x3ffffff; + stripe_pos = (rel_off / stripe_unit_size + flp->nfsfl_stripe1) % + dp->nfsdi_stripecnt; + transfer = stripe_unit_size - (rel_off % stripe_unit_size); + + /* Loop around, doing I/O for each stripe unit. */ + while (len > 0 && error == 0) { + stripe_index = nfsfldi_stripeindex(dp, stripe_pos); + dspp = nfsfldi_addr(dp, stripe_index); + if (len > transfer) + xfer = transfer; + else + xfer = len; + if ((flp->nfsfl_util & NFSFLAYUTIL_DENSE) != 0) { + /* Dense layout. */ + if (stripe_pos >= flp->nfsfl_fhcnt) + return (EIO); + fhp = flp->nfsfl_fh[stripe_pos]; + io_off = (rel_off / (stripe_unit_size * + dp->nfsdi_stripecnt)) * stripe_unit_size + + rel_off % stripe_unit_size; + } else { + /* Sparse layout. */ + if (flp->nfsfl_fhcnt > 1) { + if (stripe_index >= flp->nfsfl_fhcnt) + return (EIO); + fhp = flp->nfsfl_fh[stripe_index]; + } else if (flp->nfsfl_fhcnt == 1) + fhp = flp->nfsfl_fh[0]; + else + fhp = np->n_fhp; + io_off = off; + } + if ((flp->nfsfl_util & NFSFLAYUTIL_COMMIT_THRU_MDS) != 0) + commit_thru_mds = 1; + else + commit_thru_mds = 0; + if (rwflag == FREAD) + error = nfsrpc_readds(vp, uiop, stateidp, eofp, *dspp, + io_off, xfer, fhp, cred, p); + else { + error = nfsrpc_writeds(vp, uiop, iomode, must_commit, + stateidp, *dspp, io_off, xfer, fhp, commit_thru_mds, + cred, p); + if (error == 0) { + NFSLOCKCLSTATE(); + lyp->nfsly_flags |= NFSLY_WRITTEN; + NFSUNLOCKCLSTATE(); + } + } + if (error == 0) { + transfer = stripe_unit_size; + stripe_pos = (stripe_pos + 1) % dp->nfsdi_stripecnt; + len -= xfer; + off += xfer; + } + } + return (error); +} + +/* + * The actual read RPC done to a DS. + */ +static int +nfsrpc_readds(vnode_t vp, struct uio *uiop, nfsv4stateid_t *stateidp, int *eofp, + struct nfsclds *dsp, uint64_t io_off, int len, struct nfsfh *fhp, + struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + int error, retlen; + struct nfsrv_descript nfsd; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + struct nfsrv_descript *nd = &nfsd; + struct nfssockreq *nrp; + + nd->nd_mrep = NULL; + nfscl_reqstart(nd, NFSPROC_READDS, nmp, fhp->nfh_fh, fhp->nfh_len, + NULL, &dsp->nfsclds_sess); + nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO); + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3); + txdr_hyper(io_off, tl); + *(tl + 2) = txdr_unsigned(len); + nrp = dsp->nfsclds_sockp; + if (nrp == NULL) + /* If NULL, use the MDS socket. */ + nrp = &nmp->nm_sockreq; + error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); + if (error != 0) + return (error); + if (nd->nd_repstat != 0) { + error = nd->nd_repstat; + goto nfsmout; + } + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + *eofp = fxdr_unsigned(int, *tl); + NFSM_STRSIZ(retlen, len); + error = nfsm_mbufuio(nd, uiop, retlen); +nfsmout: + if (nd->nd_mrep != NULL) + mbuf_freem(nd->nd_mrep); + return (error); +} + +/* + * The actual write RPC done to a DS. + */ +static int +nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, + nfsv4stateid_t *stateidp, struct nfsclds *dsp, uint64_t io_off, int len, + struct nfsfh *fhp, int commit_thru_mds, struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + int error, rlen, commit, committed = NFSWRITE_FILESYNC; + int32_t backup; + struct nfsrv_descript nfsd; + struct nfsrv_descript *nd = &nfsd; + struct nfssockreq *nrp; + + KASSERT(uiop->uio_iovcnt == 1, ("nfs: writerpc iovcnt > 1")); + nd->nd_mrep = NULL; + nfscl_reqstart(nd, NFSPROC_WRITEDS, nmp, fhp->nfh_fh, fhp->nfh_len, + NULL, &dsp->nfsclds_sess); + nfsm_stateidtom(nd, stateidp, NFSSTATEID_PUTSEQIDZERO); + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED); + txdr_hyper(io_off, tl); + tl += 2; + *tl++ = txdr_unsigned(*iomode); + *tl = txdr_unsigned(len); + nfsm_uiombuf(nd, uiop, len); + nrp = dsp->nfsclds_sockp; + if (nrp == NULL) + /* If NULL, use the MDS socket. */ + nrp = &nmp->nm_sockreq; + error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); + if (error != 0) + return (error); + if (nd->nd_repstat != 0) { + /* + * In case the rpc gets retried, roll + * the uio fileds changed by nfsm_uiombuf() + * back. + */ + uiop->uio_offset -= len; + uio_uio_resid_add(uiop, len); + uio_iov_base_add(uiop, -len); + uio_iov_len_add(uiop, len); + error = nd->nd_repstat; + } else { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); + rlen = fxdr_unsigned(int, *tl++); + if (rlen == 0) { + error = NFSERR_IO; + goto nfsmout; + } else if (rlen < len) { + backup = len - rlen; + uio_iov_base_add(uiop, -(backup)); + uio_iov_len_add(uiop, backup); + uiop->uio_offset -= backup; + uio_uio_resid_add(uiop, backup); + len = rlen; + } + commit = fxdr_unsigned(int, *tl++); + + /* + * Return the lowest committment level + * obtained by any of the RPCs. + */ + if (committed == NFSWRITE_FILESYNC) + committed = commit; + else if (committed == NFSWRITE_DATASYNC && + commit == NFSWRITE_UNSTABLE) + committed = commit; + if (commit_thru_mds != 0) { + NFSLOCKMNT(nmp); + if (!NFSHASWRITEVERF(nmp)) { + NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); + NFSSETWRITEVERF(nmp); + } else if (NFSBCMP(tl, nmp->nm_verf, NFSX_VERF)) { + *must_commit = 1; + NFSBCOPY(tl, nmp->nm_verf, NFSX_VERF); + } + NFSUNLOCKMNT(nmp); + } else { + NFSLOCKDS(dsp); + if ((dsp->nfsclds_flags & NFSCLDS_HASWRITEVERF) == 0) { + NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); + dsp->nfsclds_flags |= NFSCLDS_HASWRITEVERF; + } else if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { + *must_commit = 1; + NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); + } + NFSUNLOCKDS(dsp); + } + } +nfsmout: + if (nd->nd_mrep != NULL) + mbuf_freem(nd->nd_mrep); + *iomode = committed; + if (nd->nd_repstat != 0 && error == 0) + error = nd->nd_repstat; + return (error); +} + +/* + * Free up the nfsclds structure. + */ +void +nfscl_freenfsclds(struct nfsclds *dsp) +{ + int i; + + if (dsp == NULL) + return; + if (dsp->nfsclds_sockp != NULL) { + NFSFREECRED(dsp->nfsclds_sockp->nr_cred); + NFSFREEMUTEX(&dsp->nfsclds_sockp->nr_mtx); + free(dsp->nfsclds_sockp->nr_nam, M_SONAME); + free(dsp->nfsclds_sockp, M_NFSSOCKREQ); + } + NFSFREEMUTEX(&dsp->nfsclds_mtx); + NFSFREEMUTEX(&dsp->nfsclds_sess.nfsess_mtx); + for (i = 0; i < NFSV4_CBSLOTS; i++) { + if (dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply != NULL) + m_freem( + dsp->nfsclds_sess.nfsess_cbslots[i].nfssl_reply); + } + free(dsp, M_NFSCLDS); +} + +static enum nfsclds_state +nfscl_getsameserver(struct nfsmount *nmp, struct nfsclds *newdsp, + struct nfsclds **retdspp) +{ + struct nfsclds *dsp, *cur_dsp; + + /* + * Search the list of nfsclds structures for one with the same + * server. + */ + cur_dsp = NULL; + TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) { + if (dsp->nfsclds_servownlen == newdsp->nfsclds_servownlen && + dsp->nfsclds_servownlen != 0 && + !NFSBCMP(dsp->nfsclds_serverown, newdsp->nfsclds_serverown, + dsp->nfsclds_servownlen)) { + NFSCL_DEBUG(4, "fnd same fdsp=%p dsp=%p flg=0x%x\n", + TAILQ_FIRST(&nmp->nm_sess), dsp, + dsp->nfsclds_flags); + /* Server major id matches. */ + if ((dsp->nfsclds_flags & NFSCLDS_DS) != 0) { + *retdspp = dsp; + return (NFSDSP_USETHISSESSION); + } + + /* + * Note the first match, so it can be used for + * sequence'ing new sessions. + */ + if (cur_dsp == NULL) + cur_dsp = dsp; + } + } + if (cur_dsp != NULL) { + *retdspp = cur_dsp; + return (NFSDSP_SEQTHISSESSION); + } + return (NFSDSP_NOTFOUND); +} + +#ifdef notyet +/* + * NFS commit rpc to a DS. + */ +static int +nfsrpc_commitds(vnode_t vp, uint64_t offset, int cnt, struct nfsclds *dsp, + struct nfsfh *fhp, struct ucred *cred, NFSPROC_T *p, void *stuff) +{ + uint32_t *tl; + struct nfsrv_descript nfsd, *nd = &nfsd; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + struct nfssockreq *nrp; + int error; + + nfscl_reqstart(nd, NFSPROC_COMMITDS, nmp, fhp->nfh_fh, fhp->nfh_len, + NULL, &dsp->nfsclds_sess); + NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED); + txdr_hyper(offset, tl); + tl += 2; + *tl = txdr_unsigned(cnt); + nrp = dsp->nfsclds_sockp; + if (nrp == NULL) + /* If NULL, use the MDS socket. */ + nrp = &nmp->nm_sockreq; + error = newnfs_request(nd, nmp, NULL, nrp, vp, p, cred, + NFS_PROG, NFS_VER4, NULL, 1, NULL, &dsp->nfsclds_sess); + if (error) + return (error); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF); + NFSLOCKDS(dsp); + if (NFSBCMP(tl, dsp->nfsclds_verf, NFSX_VERF)) { + NFSBCOPY(tl, dsp->nfsclds_verf, NFSX_VERF); + error = NFSERR_STALEWRITEVERF; + } + NFSUNLOCKDS(dsp); + } +nfsmout: + if (error == 0 && nd->nd_repstat != 0) + error = nd->nd_repstat; + mbuf_freem(nd->nd_mrep); + return (error); +} +#endif + diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c index b54805d..8b5acb9 100644 --- a/sys/fs/nfsclient/nfs_clstate.c +++ b/sys/fs/nfsclient/nfs_clstate.c @@ -86,14 +86,18 @@ __FBSDID("$FreeBSD$"); */ extern struct nfsstats newnfsstats; extern struct nfsreqhead nfsd_reqq; +extern u_int32_t newnfs_false, newnfs_true; +extern int nfscl_debuglevel; NFSREQSPINLOCK; NFSCLSTATEMUTEX; int nfscl_inited = 0; struct nfsclhead nfsclhead; /* Head of clientid list */ int nfscl_deleghighwater = NFSCLDELEGHIGHWATER; +int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER; #endif /* !APPLEKEXT */ static int nfscl_delegcnt = 0; +static int nfscl_layoutcnt = 0; static int nfscl_getopen(struct nfsclownerhead *, u_int8_t *, int, u_int8_t *, u_int8_t *, u_int32_t, struct nfscllockowner **, struct nfsclopen **); static void nfscl_clrelease(struct nfsclclient *); @@ -109,9 +113,16 @@ static int nfscl_updatelock(struct nfscllockowner *, struct nfscllock **, struct nfscllock **, int); static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *); static u_int32_t nfscl_nextcbident(void); -static mount_t nfscl_getmnt(u_int32_t); +static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **); +static struct nfsclclient *nfscl_getclnt(u_int32_t); +static struct nfsclclient *nfscl_getclntsess(uint8_t *); static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *, int); +static void nfscl_retoncloselayout(struct nfsclclient *, uint8_t *, int); +static void nfscl_reldevinfo_locked(struct nfscldevinfo *); +static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *, + int); +static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *); static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *, u_int8_t *, struct nfscllock **); static void nfscl_freealllocks(struct nfscllockownerhead *, int); @@ -145,6 +156,15 @@ static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *, struct nfsmount *, NFSPROC_T *); static void nfscl_emptylockowner(struct nfscllockowner *, struct nfscllockownerfhhead *); +static void nfscl_mergeflayouts(struct nfsclflayouthead *, + struct nfsclflayouthead *); +static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t, + uint64_t, uint32_t, struct nfsclrecalllayout *); +static int nfscl_seq(uint32_t, uint32_t); +static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *, + struct ucred *, NFSPROC_T *); +static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *, + struct ucred *, NFSPROC_T *); static short nfscberr_null[] = { 0, @@ -214,7 +234,7 @@ nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg, if (nfhp != NULL) MALLOC(nop, struct nfsclopen *, sizeof (struct nfsclopen) + fhlen - 1, M_NFSCLOPEN, M_WAITOK); - ret = nfscl_getcl(vp, cred, p, &clp); + ret = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (ret != 0) { FREE((caddr_t)nowp, M_NFSCLOWNER); if (nop != NULL) @@ -451,7 +471,7 @@ nfscl_finddeleg(struct nfsclclient *clp, u_int8_t *fhp, int fhlen) */ APPLESTATIC int nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode, - struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp, + int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp, void **lckpp) { struct nfsclclient *clp; @@ -466,11 +486,14 @@ nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode, *lckpp = NULL; /* * Initially, just set the special stateid of all zeros. + * (Don't do this for a DS, since the special stateid can't be used.) */ - stateidp->seqid = 0; - stateidp->other[0] = 0; - stateidp->other[1] = 0; - stateidp->other[2] = 0; + if (fords == 0) { + stateidp->seqid = 0; + stateidp->other[0] = 0; + stateidp->other[1] = 0; + stateidp->other[2] = 0; + } if (vnode_vtype(vp) != VREG) return (EISDIR); np = VTONFS(vp); @@ -526,7 +549,8 @@ nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode, lp = NULL; error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, own, own, mode, &lp, &op); - if (error == 0 && lp != NULL) { + if (error == 0 && lp != NULL && fords == 0) { + /* Don't return a lock stateid for a DS. */ stateidp->seqid = lp->nfsl_stateid.seqid; stateidp->other[0] = @@ -697,21 +721,21 @@ nfscl_openrelease(struct nfsclopen *op, int error, int candelete) * If the "cred" argument is NULL, a new clientid should not be created. * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot * be done. + * The start_renewthread argument tells nfscl_getcl() to start a renew + * thread if this creates a new clp. * It always clpp with a reference count on it, unless returning an error. */ APPLESTATIC int -nfscl_getcl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, - struct nfsclclient **clpp) +nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p, + int start_renewthread, struct nfsclclient **clpp) { struct nfsclclient *clp; struct nfsclclient *newclp = NULL; - struct mount *mp; struct nfsmount *nmp; char uuid[HOSTUUIDLEN]; int igotlock = 0, error, trystalecnt, clidinusedelay, i; u_int16_t idlen = 0; - mp = vnode_mount(vp); nmp = VFSTONFS(mp); if (cred != NULL) { getcredhostuuid(cred, uuid, sizeof uuid); @@ -722,7 +746,7 @@ nfscl_getcl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */ MALLOC(newclp, struct nfsclclient *, sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT, - M_WAITOK); + M_WAITOK | M_ZERO); } NFSLOCKCLSTATE(); /* @@ -743,12 +767,15 @@ nfscl_getcl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, return (EACCES); } clp = newclp; - NFSBZERO((caddr_t)clp, sizeof(struct nfsclclient) + idlen - 1); clp->nfsc_idlen = idlen; LIST_INIT(&clp->nfsc_owner); TAILQ_INIT(&clp->nfsc_deleg); + TAILQ_INIT(&clp->nfsc_layout); + LIST_INIT(&clp->nfsc_devinfo); for (i = 0; i < NFSCLDELEGHASHSIZE; i++) LIST_INIT(&clp->nfsc_deleghash[i]); + for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++) + LIST_INIT(&clp->nfsc_layouthash[i]); clp->nfsc_flags = NFSCLFLAGS_INITED; clp->nfsc_clientidrev = 1; clp->nfsc_cbident = nfscl_nextcbident(); @@ -758,11 +785,12 @@ nfscl_getcl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, nmp->nm_clp = clp; clp->nfsc_nmp = nmp; NFSUNLOCKCLSTATE(); - nfscl_start_renewthread(clp); + if (start_renewthread != 0) + nfscl_start_renewthread(clp); } else { NFSUNLOCKCLSTATE(); if (newclp != NULL) - FREE((caddr_t)newclp, M_NFSCLCLIENT); + free(newclp, M_NFSCLCLIENT); } NFSLOCKCLSTATE(); while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock && @@ -818,14 +846,15 @@ nfscl_getcl(vnode_t vp, struct ucred *cred, NFSPROC_T *p, clidinusedelay = 120; trystalecnt = 3; do { - error = nfsrpc_setclient(VFSTONFS(vnode_mount(vp)), - clp, cred, p); + error = nfsrpc_setclient(nmp, clp, 0, cred, p); if (error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || + error == NFSERR_BADSESSION || error == NFSERR_CLIDINUSE) { (void) nfs_catnap(PZERO, error, "nfs_setcl"); } } while (((error == NFSERR_STALECLIENTID || + error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) || (error == NFSERR_CLIDINUSE && --clidinusedelay > 0)); if (error) { @@ -942,7 +971,7 @@ nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len, if (recovery) clp = rclp; else - error = nfscl_getcl(vp, cred, p, &clp); + error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); } if (error) { FREE((caddr_t)nlp, M_NFSCLLOCKOWNER); @@ -1277,7 +1306,7 @@ nfscl_checkwritelocked(vnode_t vp, struct flock *fl, end = NFS64BITSSET; } - error = nfscl_getcl(vp, cred, p, &clp); + error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (1); nfscl_filllockowner(id, own, flags); @@ -1825,19 +1854,24 @@ nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p) LIST_REMOVE(clp, nfsc_list); nfscl_delegreturnall(clp, p); cred = newnfs_getcred(); - (void) nfsrpc_setclient(nmp, clp, cred, p); + if (NFSHASNFSV4N(nmp)) { + (void)nfsrpc_destroysession(nmp, clp, cred, p); + (void)nfsrpc_destroyclient(nmp, clp, cred, p); + } else + (void)nfsrpc_setclient(nmp, clp, 0, cred, p); nfscl_cleanclient(clp); nmp->nm_clp = NULL; NFSFREECRED(cred); - FREE((caddr_t)clp, M_NFSCLCLIENT); + free(clp, M_NFSCLCLIENT); } else NFSUNLOCKCLSTATE(); } /* * This function is called when a server replies with NFSERR_STALECLIENTID - * or NFSERR_STALESTATEID. It traverses the clientid lists, doing Opens - * and Locks with reclaim. If these fail, it deletes the corresponding state. + * NFSERR_STALESTATEID or NFSERR_BADSESSION. It traverses the clientid lists, + * doing Opens and Locks with reclaim. If these fail, it deletes the + * corresponding state. */ static void nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) @@ -1854,7 +1888,8 @@ nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) struct nfsreq *rep; u_int64_t len; u_int32_t delegtype = NFSV4OPEN_DELEGATEWRITE, mode; - int igotlock = 0, error, trycnt, firstlock, s; + int i, igotlock = 0, error, trycnt, firstlock, s; + struct nfscllayout *lyp, *nlyp; /* * First, lock the client structure, so everyone else will @@ -1871,10 +1906,22 @@ nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) nmp = clp->nfsc_nmp; if (nmp == NULL) panic("nfscl recover"); + + /* + * For now, just get rid of all layouts. There may be a need + * to do LayoutCommit Ops with reclaim == true later. + */ + TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) + nfscl_freelayout(lyp); + TAILQ_INIT(&clp->nfsc_layout); + for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++) + LIST_INIT(&clp->nfsc_layouthash[i]); + trycnt = 5; do { - error = nfsrpc_setclient(nmp, clp, cred, p); + error = nfsrpc_setclient(nmp, clp, 1, cred, p); } while ((error == NFSERR_STALECLIENTID || + error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trycnt > 0); if (error) { nfscl_cleanclient(clp); @@ -1893,9 +1940,10 @@ nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) * Mark requests already queued on the server, so that they don't * initiate another recovery cycle. Any requests already in the * queue that handle state information will have the old stale - * clientid/stateid and will get a NFSERR_STALESTATEID or - * NFSERR_STALECLIENTID reply from the server. This will be - * translated to NFSERR_STALEDONTRECOVER when R_DONTRECOVER is set. + * clientid/stateid and will get a NFSERR_STALESTATEID, + * NFSERR_STALECLIENTID or NFSERR_BADSESSION reply from the server. + * This will be translated to NFSERR_STALEDONTRECOVER when + * R_DONTRECOVER is set. */ s = splsoftclock(); NFSLOCKREQ(); @@ -2136,6 +2184,10 @@ nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) FREE((caddr_t)dp, M_NFSCLDELEG); } + /* For NFSv4.1 or later, do a RECLAIM_COMPLETE. */ + if (NFSHASNFSV4N(nmp)) + (void)nfsrpc_reclaimcomplete(nmp, cred, p); + NFSLOCKCLSTATE(); clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG; wakeup(&clp->nfsc_flags); @@ -2190,8 +2242,9 @@ nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p) cred = newnfs_getcred(); trycnt = 5; do { - error = nfsrpc_setclient(nmp, clp, cred, p); + error = nfsrpc_setclient(nmp, clp, 0, cred, p); } while ((error == NFSERR_STALECLIENTID || + error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trycnt > 0); if (error) { /* @@ -2398,6 +2451,11 @@ nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) static time_t prevsec = 0; struct nfscllockownerfh *lfhp, *nlfhp; struct nfscllockownerfhhead lfh; + struct nfscllayout *lyp, *nlyp; + struct nfscldevinfo *dip, *ndip; + struct nfscllayouthead rlh; + struct nfsclrecalllayout *recallp; + struct nfsclds *dsp; cred = newnfs_getcred(); NFSLOCKCLSTATE(); @@ -2425,10 +2483,12 @@ nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) { clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; clidrev = clp->nfsc_clientidrev; - error = nfsrpc_renew(clp, cred, p); + error = nfsrpc_renew(clp, + TAILQ_FIRST(&clp->nfsc_nmp->nm_sess), cred, p); if (error == NFSERR_CBPATHDOWN) cbpathdown = 1; - else if (error == NFSERR_STALECLIENTID) { + else if (error == NFSERR_STALECLIENTID || + error == NFSERR_BADSESSION) { NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_RECOVER; NFSUNLOCKCLSTATE(); @@ -2436,6 +2496,25 @@ nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) (void) nfscl_hasexpired(clp, clidrev, p); } + /* Do renews for any DS sessions. */ +checkdsrenew: + NFSLOCKMNT(clp->nfsc_nmp); + /* Skip first entry, since the MDS is handled above. */ + dsp = TAILQ_FIRST(&clp->nfsc_nmp->nm_sess); + if (dsp != NULL) + dsp = TAILQ_NEXT(dsp, nfsclds_list); + while (dsp != NULL) { + if (dsp->nfsclds_expire <= NFSD_MONOSEC) { + dsp->nfsclds_expire = NFSD_MONOSEC + + clp->nfsc_renew; + NFSUNLOCKMNT(clp->nfsc_nmp); + (void)nfsrpc_renew(clp, dsp, cred, p); + goto checkdsrenew; + } + dsp = TAILQ_NEXT(dsp, nfsclds_list); + } + NFSUNLOCKMNT(clp->nfsc_nmp); + TAILQ_INIT(&dh); NFSLOCKCLSTATE(); if (cbpathdown) @@ -2542,8 +2621,90 @@ tryagain: } if (igotlock) nfsv4_unlock(&clp->nfsc_lock, 0); + + /* + * Do the recall on any layouts. To avoid trouble, always + * come back up here after having slept. + */ + TAILQ_INIT(&rlh); +tryagain2: + TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) { + if ((lyp->nfsly_flags & NFSLY_RECALL) != 0) { + /* + * Wait for outstanding I/O ops to be done. + */ + if (lyp->nfsly_lock.nfslock_usecnt > 0 || + (lyp->nfsly_lock.nfslock_lock & + NFSV4LOCK_LOCK) != 0) { + lyp->nfsly_lock.nfslock_lock |= + NFSV4LOCK_WANTED; + (void)nfsmsleep(&lyp->nfsly_lock, + NFSCLSTATEMUTEXPTR, PZERO, "nfslyp", + NULL); + goto tryagain2; + } + /* Move the layout to the recall list. */ + TAILQ_REMOVE(&clp->nfsc_layout, lyp, + nfsly_list); + LIST_REMOVE(lyp, nfsly_hash); + TAILQ_INSERT_HEAD(&rlh, lyp, nfsly_list); + + /* Handle any layout commits. */ + if (!NFSHASNOLAYOUTCOMMIT(clp->nfsc_nmp) && + (lyp->nfsly_flags & NFSLY_WRITTEN) != 0) { + lyp->nfsly_flags &= ~NFSLY_WRITTEN; + NFSUNLOCKCLSTATE(); + NFSCL_DEBUG(3, "do layoutcommit\n"); + nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, + cred, p); + NFSLOCKCLSTATE(); + goto tryagain2; + } + } + } + + /* Now, look for stale layouts. */ + lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead); + while (lyp != NULL) { + nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list); + if (lyp->nfsly_timestamp < NFSD_MONOSEC && + (lyp->nfsly_flags & NFSLY_RECALL) == 0 && + lyp->nfsly_lock.nfslock_usecnt == 0 && + lyp->nfsly_lock.nfslock_lock == 0) { + NFSCL_DEBUG(4, "ret stale lay=%d\n", + nfscl_layoutcnt); + recallp = malloc(sizeof(*recallp), + M_NFSLAYRECALL, M_NOWAIT); + if (recallp == NULL) + break; + (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, + lyp, NFSLAYOUTIOMODE_ANY, 0, UINT64_MAX, + lyp->nfsly_stateid.seqid, recallp); + } + lyp = nlyp; + } + + /* + * Free up any unreferenced device info structures. + */ + LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) { + if (dip->nfsdi_layoutrefs == 0 && + dip->nfsdi_refcnt == 0) { + NFSCL_DEBUG(4, "freeing devinfo\n"); + LIST_REMOVE(dip, nfsdi_list); + nfscl_freedevinfo(dip); + } + } NFSUNLOCKCLSTATE(); + /* Do layout return(s), as required. */ + TAILQ_FOREACH_SAFE(lyp, &rlh, nfsly_list, nlyp) { + TAILQ_REMOVE(&rlh, lyp, nfsly_list); + NFSCL_DEBUG(4, "ret layout\n"); + nfscl_layoutreturn(clp->nfsc_nmp, lyp, cred, p); + nfscl_freelayout(lyp); + } + /* * Delegreturn any delegations cleaned out or recalled. */ @@ -2599,8 +2760,8 @@ tryagain: } /* - * Initiate state recovery. Called when NFSERR_STALECLIENTID or - * NFSERR_STALESTATEID is received. + * Initiate state recovery. Called when NFSERR_STALECLIENTID, + * NFSERR_STALESTATEID or NFSERR_BADSESSION is received. */ APPLESTATIC void nfscl_initiate_recovery(struct nfsclclient *clp) @@ -2832,7 +2993,7 @@ nfscl_getclose(vnode_t vp, struct nfsclclient **clpp) struct nfsfh *nfhp; int error, notdecr; - error = nfscl_getcl(vp, NULL, NULL, &clp); + error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, &clp); if (error) return (error); *clpp = clp; @@ -2906,7 +3067,7 @@ nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p) struct nfsfh *nfhp; int error; - error = nfscl_getcl(vp, NULL, NULL, &clp); + error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, &clp); if (error) return (error); *clpp = clp; @@ -2930,6 +3091,9 @@ nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p) } } + /* Return any layouts marked return on close. */ + nfscl_retoncloselayout(clp, nfhp->nfh_fh, nfhp->nfh_len); + /* Now process the opens against the server. */ lookformore: LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { @@ -2979,11 +3143,11 @@ nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p) APPLESTATIC void nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) { - int i, op; + int clist, gotseq_ok, i, j, k, op, rcalls; u_int32_t *tl; struct nfsclclient *clp; struct nfscldeleg *dp = NULL; - int numops, taglen = -1, error = 0, trunc, ret = 0; + int numops, taglen = -1, error = 0, trunc; u_int32_t minorvers, retops = 0, *retopsp = NULL, *repp, cbident; u_char tag[NFSV4_SMALLSTR + 1], *tagstr; vnode_t vp = NULL; @@ -2993,7 +3157,16 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) mount_t mp; nfsattrbit_t attrbits, rattrbits; nfsv4stateid_t stateid; - + uint32_t seqid, slotid = 0, highslot, cachethis; + uint8_t sessionid[NFSX_V4SESSIONID]; + struct mbuf *rep; + struct nfscllayout *lyp; + uint64_t filesid[2], len, off; + int changed, gotone, laytype, recalltype; + uint32_t iomode; + struct nfsclrecalllayout *recallp = NULL; + + gotseq_ok = 0; nfsrvd_rephead(nd); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); taglen = fxdr_unsigned(int, *tl); @@ -3019,7 +3192,7 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED); NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); minorvers = fxdr_unsigned(u_int32_t, *tl++); - if (minorvers != NFSV4_MINORVERSION) + if (minorvers != NFSV4_MINORVERSION && minorvers != NFSV41_MINORVERSION) nd->nd_repstat = NFSERR_MINORVERMISMATCH; cbident = fxdr_unsigned(u_int32_t, *tl++); if (nd->nd_repstat) @@ -3034,73 +3207,85 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED); *repp++ = *tl; op = fxdr_unsigned(int, *tl); - if (op < NFSV4OP_CBGETATTR || op > NFSV4OP_CBRECALL) { + if (op < NFSV4OP_CBGETATTR || + (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) || + (op > NFSV4OP_CBNOTIFYDEVID && + minorvers == NFSV41_MINORVERSION)) { nd->nd_repstat = NFSERR_OPILLEGAL; *repp = nfscl_errmap(nd); retops++; break; } nd->nd_procnum = op; - newnfsstats.cbrpccnt[nd->nd_procnum]++; + if (op < NFSV4OP_CBNOPS) + newnfsstats.cbrpccnt[nd->nd_procnum]++; switch (op) { case NFSV4OP_CBGETATTR: - clp = NULL; + NFSCL_DEBUG(4, "cbgetattr\n"); + mp = NULL; + vp = NULL; error = nfsm_getfh(nd, &nfhp); if (!error) error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); + if (error == 0 && i == 0 && + minorvers != NFSV4_MINORVERSION) + error = NFSERR_OPNOTINSESS; if (!error) { - mp = nfscl_getmnt(cbident); + mp = nfscl_getmnt(minorvers, sessionid, cbident, + &clp); if (mp == NULL) error = NFSERR_SERVERFAULT; } if (!error) { - dp = NULL; - NFSLOCKCLSTATE(); - clp = nfscl_findcl(VFSTONFS(mp)); - if (clp != NULL) - dp = nfscl_finddeleg(clp, nfhp->nfh_fh, - nfhp->nfh_len); - NFSUNLOCKCLSTATE(); - if (dp == NULL) - error = NFSERR_SERVERFAULT; - } - if (!error) { - ret = nfscl_ngetreopen(mp, nfhp->nfh_fh, + error = nfscl_ngetreopen(mp, nfhp->nfh_fh, nfhp->nfh_len, p, &np); - if (!ret) + if (!error) vp = NFSTOV(np); } - if (nfhp != NULL) - FREE((caddr_t)nfhp, M_NFSFH); if (!error) { NFSZERO_ATTRBIT(&rattrbits); - if (NFSISSET_ATTRBIT(&attrbits, - NFSATTRBIT_SIZE)) { - if (!ret) - va.va_size = np->n_size; - else - va.va_size = dp->nfsdl_size; - NFSSETBIT_ATTRBIT(&rattrbits, - NFSATTRBIT_SIZE); - } - if (NFSISSET_ATTRBIT(&attrbits, - NFSATTRBIT_CHANGE)) { - va.va_filerev = dp->nfsdl_change; - if (ret || (np->n_flag & NDELEGMOD)) - va.va_filerev++; - NFSSETBIT_ATTRBIT(&rattrbits, - NFSATTRBIT_CHANGE); - } + NFSLOCKCLSTATE(); + dp = nfscl_finddeleg(clp, nfhp->nfh_fh, + nfhp->nfh_len); + if (dp != NULL) { + if (NFSISSET_ATTRBIT(&attrbits, + NFSATTRBIT_SIZE)) { + if (vp != NULL) + va.va_size = np->n_size; + else + va.va_size = + dp->nfsdl_size; + NFSSETBIT_ATTRBIT(&rattrbits, + NFSATTRBIT_SIZE); + } + if (NFSISSET_ATTRBIT(&attrbits, + NFSATTRBIT_CHANGE)) { + va.va_filerev = + dp->nfsdl_change; + if (vp == NULL || + (np->n_flag & NDELEGMOD)) + va.va_filerev++; + NFSSETBIT_ATTRBIT(&rattrbits, + NFSATTRBIT_CHANGE); + } + } else + error = NFSERR_SERVERFAULT; + NFSUNLOCKCLSTATE(); + } + if (vp != NULL) + vrele(vp); + if (mp != NULL) + vfs_unbusy(mp); + if (nfhp != NULL) + FREE((caddr_t)nfhp, M_NFSFH); + if (!error) (void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va, NULL, 0, &rattrbits, NULL, NULL, 0, 0, 0, 0, (uint64_t)0); - if (!ret) - vrele(vp); - } break; case NFSV4OP_CBRECALL: - clp = NULL; + NFSCL_DEBUG(4, "cbrecall\n"); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); stateid.seqid = *tl++; @@ -3109,14 +3294,15 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); trunc = fxdr_unsigned(int, *tl); error = nfsm_getfh(nd, &nfhp); - if (!error) { - mp = nfscl_getmnt(cbident); - if (mp == NULL) - error = NFSERR_SERVERFAULT; - } + if (error == 0 && i == 0 && + minorvers != NFSV4_MINORVERSION) + error = NFSERR_OPNOTINSESS; if (!error) { NFSLOCKCLSTATE(); - clp = nfscl_findcl(VFSTONFS(mp)); + if (minorvers == NFSV4_MINORVERSION) + clp = nfscl_getclnt(cbident); + else + clp = nfscl_getclntsess(sessionid); if (clp != NULL) { dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); @@ -3134,6 +3320,195 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) if (nfhp != NULL) FREE((caddr_t)nfhp, M_NFSFH); break; + case NFSV4OP_CBLAYOUTRECALL: + NFSCL_DEBUG(4, "cblayrec\n"); + nfhp = NULL; + NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); + laytype = fxdr_unsigned(int, *tl++); + iomode = fxdr_unsigned(uint32_t, *tl++); + if (newnfs_true == *tl++) + changed = 1; + else + changed = 0; + recalltype = fxdr_unsigned(int, *tl); + recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, + M_WAITOK); + if (laytype != NFSLAYOUT_NFSV4_1_FILES) + error = NFSERR_NOMATCHLAYOUT; + else if (recalltype == NFSLAYOUTRETURN_FILE) { + error = nfsm_getfh(nd, &nfhp); + NFSCL_DEBUG(4, "retfile getfh=%d\n", error); + if (error != 0) + goto nfsmout; + NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER + + NFSX_STATEID); + off = fxdr_hyper(tl); tl += 2; + len = fxdr_hyper(tl); tl += 2; + stateid.seqid = fxdr_unsigned(uint32_t, *tl++); + NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); + if (minorvers == NFSV4_MINORVERSION) + error = NFSERR_NOTSUPP; + else if (i == 0) + error = NFSERR_OPNOTINSESS; + if (error == 0) { + NFSLOCKCLSTATE(); + clp = nfscl_getclntsess(sessionid); + NFSCL_DEBUG(4, "cbly clp=%p\n", clp); + if (clp != NULL) { + lyp = nfscl_findlayout(clp, + nfhp->nfh_fh, + nfhp->nfh_len); + NFSCL_DEBUG(4, "cblyp=%p\n", + lyp); + if (lyp != NULL && + (lyp->nfsly_flags & + NFSLY_FILES) != 0 && + !NFSBCMP(stateid.other, + lyp->nfsly_stateid.other, + NFSX_STATEIDOTHER)) { + error = + nfscl_layoutrecall( + recalltype, + lyp, iomode, off, + len, stateid.seqid, + recallp); + recallp = NULL; + wakeup(clp); + NFSCL_DEBUG(4, + "aft layrcal=%d\n", + error); + } else + error = + NFSERR_NOMATCHLAYOUT; + } else + error = NFSERR_NOMATCHLAYOUT; + NFSUNLOCKCLSTATE(); + } + free(nfhp, M_NFSFH); + } else if (recalltype == NFSLAYOUTRETURN_FSID) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER); + filesid[0] = fxdr_hyper(tl); tl += 2; + filesid[1] = fxdr_hyper(tl); tl += 2; + gotone = 0; + NFSLOCKCLSTATE(); + clp = nfscl_getclntsess(sessionid); + if (clp != NULL) { + TAILQ_FOREACH(lyp, &clp->nfsc_layout, + nfsly_list) { + if (lyp->nfsly_filesid[0] == + filesid[0] && + lyp->nfsly_filesid[1] == + filesid[1]) { + error = + nfscl_layoutrecall( + recalltype, + lyp, iomode, 0, + UINT64_MAX, + lyp->nfsly_stateid.seqid, + recallp); + recallp = NULL; + gotone = 1; + } + } + if (gotone != 0) + wakeup(clp); + else + error = NFSERR_NOMATCHLAYOUT; + } else + error = NFSERR_NOMATCHLAYOUT; + NFSUNLOCKCLSTATE(); + } else if (recalltype == NFSLAYOUTRETURN_ALL) { + gotone = 0; + NFSLOCKCLSTATE(); + clp = nfscl_getclntsess(sessionid); + if (clp != NULL) { + TAILQ_FOREACH(lyp, &clp->nfsc_layout, + nfsly_list) { + error = nfscl_layoutrecall( + recalltype, lyp, iomode, 0, + UINT64_MAX, + lyp->nfsly_stateid.seqid, + recallp); + recallp = NULL; + gotone = 1; + } + if (gotone != 0) + wakeup(clp); + else + error = NFSERR_NOMATCHLAYOUT; + } else + error = NFSERR_NOMATCHLAYOUT; + NFSUNLOCKCLSTATE(); + } else + error = NFSERR_NOMATCHLAYOUT; + if (recallp != NULL) { + free(recallp, M_NFSLAYRECALL); + recallp = NULL; + } + break; + case NFSV4OP_CBSEQUENCE: + NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + + 5 * NFSX_UNSIGNED); + bcopy(tl, sessionid, NFSX_V4SESSIONID); + tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; + seqid = fxdr_unsigned(uint32_t, *tl++); + slotid = fxdr_unsigned(uint32_t, *tl++); + highslot = fxdr_unsigned(uint32_t, *tl++); + cachethis = *tl++; + /* Throw away the referring call stuff. */ + clist = fxdr_unsigned(int, *tl); + for (j = 0; j < clist; j++) { + NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + + NFSX_UNSIGNED); + tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; + rcalls = fxdr_unsigned(int, *tl); + for (k = 0; k < rcalls; k++) { + NFSM_DISSECT(tl, uint32_t *, + 2 * NFSX_UNSIGNED); + } + } + NFSLOCKCLSTATE(); + if (i == 0) { + clp = nfscl_getclntsess(sessionid); + if (clp == NULL) + error = NFSERR_SERVERFAULT; + } else + error = NFSERR_SEQUENCEPOS; + if (error == 0) + error = nfsv4_seqsession(seqid, slotid, + highslot, + NFSMNT_MDSSESSION(clp->nfsc_nmp)-> + nfsess_cbslots, &rep, + NFSMNT_MDSSESSION(clp->nfsc_nmp)-> + nfsess_backslots); + NFSUNLOCKCLSTATE(); + if (error == 0) { + gotseq_ok = 1; + if (rep != NULL) { + NFSCL_DEBUG(4, "Got cbretry\n"); + m_freem(nd->nd_mreq); + nd->nd_mreq = rep; + rep = NULL; + goto out; + } + NFSM_BUILD(tl, uint32_t *, + NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); + bcopy(sessionid, tl, NFSX_V4SESSIONID); + tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; + *tl++ = txdr_unsigned(seqid); + *tl++ = txdr_unsigned(slotid); + *tl++ = txdr_unsigned(NFSV4_CBSLOTS - 1); + *tl = txdr_unsigned(NFSV4_CBSLOTS - 1); + } + break; + default: + if (i == 0 && minorvers == NFSV41_MINORVERSION) + error = NFSERR_OPNOTINSESS; + else { + NFSCL_DEBUG(1, "unsupp callback %d\n", op); + error = NFSERR_NOTSUPP; + } + break; }; if (error) { if (error == EBADRPC || error == NFSERR_BADXDR) { @@ -3151,6 +3526,8 @@ nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) *repp = 0; /* NFS4_OK */ } nfsmout: + if (recallp != NULL) + free(recallp, M_NFSLAYRECALL); if (error) { if (error == EBADRPC || error == NFSERR_BADXDR) nd->nd_repstat = NFSERR_BADXDR; @@ -3165,6 +3542,21 @@ nfsmout: *retopsp = txdr_unsigned(retops); } *nd->nd_errp = nfscl_errmap(nd); +out: + if (gotseq_ok != 0) { + rep = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK); + NFSLOCKCLSTATE(); + clp = nfscl_getclntsess(sessionid); + if (clp != NULL) { + nfsv4_seqsess_cacherep(slotid, + NFSMNT_MDSSESSION(clp->nfsc_nmp)->nfsess_cbslots, + rep); + NFSUNLOCKCLSTATE(); + } else { + NFSUNLOCKCLSTATE(); + m_freem(rep); + } + } } /* @@ -3204,26 +3596,68 @@ nfscl_nextcbident(void) } /* - * Get the mount point related to a given cbident. + * Get the mount point related to a given cbident or session and busy it. */ static mount_t -nfscl_getmnt(u_int32_t cbident) +nfscl_getmnt(int minorvers, uint8_t *sessionid, u_int32_t cbident, + struct nfsclclient **clpp) { struct nfsclclient *clp; - struct nfsmount *nmp; + mount_t mp; + int error; + *clpp = NULL; NFSLOCKCLSTATE(); LIST_FOREACH(clp, &nfsclhead, nfsc_list) { - if (clp->nfsc_cbident == cbident) + if (minorvers == NFSV4_MINORVERSION) { + if (clp->nfsc_cbident == cbident) + break; + } else if (!NFSBCMP(NFSMNT_MDSSESSION(clp->nfsc_nmp)-> + nfsess_sessionid, sessionid, NFSX_V4SESSIONID)) break; } if (clp == NULL) { NFSUNLOCKCLSTATE(); return (NULL); } - nmp = clp->nfsc_nmp; + mp = clp->nfsc_nmp->nm_mountp; + vfs_ref(mp); NFSUNLOCKCLSTATE(); - return (nmp->nm_mountp); + error = vfs_busy(mp, 0); + vfs_rel(mp); + if (error != 0) + return (NULL); + *clpp = clp; + return (mp); +} + +/* + * Get the clientid pointer related to a given cbident. + */ +static struct nfsclclient * +nfscl_getclnt(u_int32_t cbident) +{ + struct nfsclclient *clp; + + LIST_FOREACH(clp, &nfsclhead, nfsc_list) + if (clp->nfsc_cbident == cbident) + break; + return (clp); +} + +/* + * Get the clientid pointer related to a given sessionid. + */ +static struct nfsclclient * +nfscl_getclntsess(uint8_t *sessionid) +{ + struct nfsclclient *clp; + + LIST_FOREACH(clp, &nfsclhead, nfsc_list) + if (!NFSBCMP(NFSMNT_MDSSESSION(clp->nfsc_nmp)->nfsess_sessionid, + sessionid, NFSX_V4SESSIONID)) + break; + return (clp); } /* @@ -3420,7 +3854,8 @@ nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp, ret = nfscl_moveopen(vp, clp, nmp, lop, owp, dp, cred, p); if (ret == NFSERR_STALECLIENTID || - ret == NFSERR_STALEDONTRECOVER) { + ret == NFSERR_STALEDONTRECOVER || + ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); @@ -3451,7 +3886,8 @@ nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp, if (ret) { nfscl_freeopenowner(owp, 0); if (ret == NFSERR_STALECLIENTID || - ret == NFSERR_STALEDONTRECOVER) { + ret == NFSERR_STALEDONTRECOVER || + ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); @@ -3475,7 +3911,8 @@ nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp, ret = nfscl_relock(vp, clp, nmp, lp, lckp, cred, p); if (ret == NFSERR_STALESTATEID || ret == NFSERR_STALEDONTRECOVER || - ret == NFSERR_STALECLIENTID) { + ret == NFSERR_STALECLIENTID || + ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); @@ -4223,10 +4660,549 @@ nfscl_errmap(struct nfsrv_descript *nd) if (nd->nd_repstat == NFSERR_MINORVERMISMATCH || nd->nd_repstat == NFSERR_OPILLEGAL) return (txdr_unsigned(nd->nd_repstat)); - errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum]; + if (nd->nd_procnum < NFSV4OP_CBNOPS) + errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum]; + else + return (txdr_unsigned(nd->nd_repstat)); while (*++errp) if (*errp == (short)nd->nd_repstat) return (txdr_unsigned(nd->nd_repstat)); return (txdr_unsigned(*defaulterrp)); } +/* + * Called to find/add a layout to a client. + * This function returns the layout with a refcnt (shared lock) upon + * success (returns 0) or with no lock/refcnt on the layout when an + * error is returned. + * If a layout is passed in via lypp, it is locked (exclusively locked). + */ +APPLESTATIC int +nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen, + nfsv4stateid_t *stateidp, int retonclose, + struct nfsclflayouthead *fhlp, struct nfscllayout **lypp, + struct ucred *cred, NFSPROC_T *p) +{ + struct nfsclclient *clp; + struct nfscllayout *lyp, *tlyp; + struct nfsclflayout *flp; + struct nfsnode *np = VTONFS(vp); + mount_t mp; + int layout_passed_in; + + mp = nmp->nm_mountp; + layout_passed_in = 1; + tlyp = NULL; + lyp = *lypp; + if (lyp == NULL) { + layout_passed_in = 0; + tlyp = malloc(sizeof(*tlyp) + fhlen - 1, M_NFSLAYOUT, + M_WAITOK | M_ZERO); + } + + NFSLOCKCLSTATE(); + clp = nmp->nm_clp; + if (clp == NULL) { + if (layout_passed_in != 0) + nfsv4_unlock(&lyp->nfsly_lock, 0); + NFSUNLOCKCLSTATE(); + if (tlyp != NULL) + free(tlyp, M_NFSLAYOUT); + return (EPERM); + } + if (lyp == NULL) { + /* + * Although no lyp was passed in, another thread might have + * allocated one. If one is found, just increment it's ref + * count and return it. + */ + lyp = nfscl_findlayout(clp, fhp, fhlen); + if (lyp == NULL) { + lyp = tlyp; + tlyp = NULL; + lyp->nfsly_stateid.seqid = stateidp->seqid; + lyp->nfsly_stateid.other[0] = stateidp->other[0]; + lyp->nfsly_stateid.other[1] = stateidp->other[1]; + lyp->nfsly_stateid.other[2] = stateidp->other[2]; + lyp->nfsly_lastbyte = 0; + LIST_INIT(&lyp->nfsly_flayread); + LIST_INIT(&lyp->nfsly_flayrw); + LIST_INIT(&lyp->nfsly_recall); + lyp->nfsly_filesid[0] = np->n_vattr.na_filesid[0]; + lyp->nfsly_filesid[1] = np->n_vattr.na_filesid[1]; + lyp->nfsly_clp = clp; + lyp->nfsly_flags = (retonclose != 0) ? + (NFSLY_FILES | NFSLY_RETONCLOSE) : NFSLY_FILES; + lyp->nfsly_fhlen = fhlen; + NFSBCOPY(fhp, lyp->nfsly_fh, fhlen); + TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); + LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp, + nfsly_hash); + lyp->nfsly_timestamp = NFSD_MONOSEC + 120; + nfscl_layoutcnt++; + } else { + if (retonclose != 0) + lyp->nfsly_flags |= NFSLY_RETONCLOSE; + TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list); + TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); + lyp->nfsly_timestamp = NFSD_MONOSEC + 120; + } + nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp); + if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { + NFSUNLOCKCLSTATE(); + if (tlyp != NULL) + free(tlyp, M_NFSLAYOUT); + return (EPERM); + } + *lypp = lyp; + } else + lyp->nfsly_stateid.seqid = stateidp->seqid; + + /* Merge the new list of File Layouts into the list. */ + flp = LIST_FIRST(fhlp); + if (flp != NULL) { + if (flp->nfsfl_iomode == NFSLAYOUTIOMODE_READ) + nfscl_mergeflayouts(&lyp->nfsly_flayread, fhlp); + else + nfscl_mergeflayouts(&lyp->nfsly_flayrw, fhlp); + } + if (layout_passed_in != 0) + nfsv4_unlock(&lyp->nfsly_lock, 1); + NFSUNLOCKCLSTATE(); + if (tlyp != NULL) + free(tlyp, M_NFSLAYOUT); + return (0); +} + +/* + * Search for a layout by MDS file handle. + * If one is found, it is returned with a refcnt (shared lock) iff + * retflpp returned non-NULL and locked (exclusive locked) iff retflpp is + * returned NULL. + */ +struct nfscllayout * +nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen, + uint64_t off, struct nfsclflayout **retflpp, int *recalledp) +{ + struct nfscllayout *lyp; + mount_t mp; + int error, igotlock; + + mp = clp->nfsc_nmp->nm_mountp; + *recalledp = 0; + *retflpp = NULL; + NFSLOCKCLSTATE(); + lyp = nfscl_findlayout(clp, fhp, fhlen); + if (lyp != NULL) { + if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) { + TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list); + TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); + lyp->nfsly_timestamp = NFSD_MONOSEC + 120; + error = nfscl_findlayoutforio(lyp, off, + NFSV4OPEN_ACCESSREAD, retflpp); + if (error == 0) + nfsv4_getref(&lyp->nfsly_lock, NULL, + NFSCLSTATEMUTEXPTR, mp); + else { + do { + igotlock = nfsv4_lock(&lyp->nfsly_lock, + 1, NULL, NFSCLSTATEMUTEXPTR, mp); + } while (igotlock == 0 && + (mp->mnt_kern_flag & MNTK_UNMOUNTF) == 0); + *retflpp = NULL; + } + if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { + lyp = NULL; + *recalledp = 1; + } + } else { + lyp = NULL; + *recalledp = 1; + } + } + NFSUNLOCKCLSTATE(); + return (lyp); +} + +/* + * Search for a layout by MDS file handle. If one is found that is marked + * "return on close", delete it, since it should now be forgotten. + */ +static void +nfscl_retoncloselayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen) +{ + struct nfscllayout *lyp; + +tryagain: + lyp = nfscl_findlayout(clp, fhp, fhlen); + if (lyp != NULL && (lyp->nfsly_flags & NFSLY_RETONCLOSE) != 0) { + /* + * Wait for outstanding I/O ops to be done. + */ + if (lyp->nfsly_lock.nfslock_usecnt != 0 || + lyp->nfsly_lock.nfslock_lock != 0) { + lyp->nfsly_lock.nfslock_lock |= NFSV4LOCK_WANTED; + (void)mtx_sleep(&lyp->nfsly_lock, + NFSCLSTATEMUTEXPTR, PZERO, "nfslyc", 0); + goto tryagain; + } + nfscl_freelayout(lyp); + } +} + +/* + * Dereference a layout. + */ +void +nfscl_rellayout(struct nfscllayout *lyp, int exclocked) +{ + + NFSLOCKCLSTATE(); + if (exclocked != 0) + nfsv4_unlock(&lyp->nfsly_lock, 0); + else + nfsv4_relref(&lyp->nfsly_lock); + NFSUNLOCKCLSTATE(); +} + +/* + * Search for a devinfo by deviceid. If one is found, return it after + * acquiring a reference count on it. + */ +struct nfscldevinfo * +nfscl_getdevinfo(struct nfsclclient *clp, uint8_t *deviceid, + struct nfscldevinfo *dip) +{ + + NFSLOCKCLSTATE(); + if (dip == NULL) + dip = nfscl_finddevinfo(clp, deviceid); + if (dip != NULL) + dip->nfsdi_refcnt++; + NFSUNLOCKCLSTATE(); + return (dip); +} + +/* + * Dereference a devinfo structure. + */ +static void +nfscl_reldevinfo_locked(struct nfscldevinfo *dip) +{ + + dip->nfsdi_refcnt--; + if (dip->nfsdi_refcnt == 0) + wakeup(&dip->nfsdi_refcnt); +} + +/* + * Dereference a devinfo structure. + */ +void +nfscl_reldevinfo(struct nfscldevinfo *dip) +{ + + NFSLOCKCLSTATE(); + nfscl_reldevinfo_locked(dip); + NFSUNLOCKCLSTATE(); +} + +/* + * Find a layout for this file handle. Return NULL upon failure. + */ +static struct nfscllayout * +nfscl_findlayout(struct nfsclclient *clp, u_int8_t *fhp, int fhlen) +{ + struct nfscllayout *lyp; + + LIST_FOREACH(lyp, NFSCLLAYOUTHASH(clp, fhp, fhlen), nfsly_hash) + if (lyp->nfsly_fhlen == fhlen && + !NFSBCMP(lyp->nfsly_fh, fhp, fhlen)) + break; + return (lyp); +} + +/* + * Find a devinfo for this deviceid. Return NULL upon failure. + */ +static struct nfscldevinfo * +nfscl_finddevinfo(struct nfsclclient *clp, uint8_t *deviceid) +{ + struct nfscldevinfo *dip; + + LIST_FOREACH(dip, &clp->nfsc_devinfo, nfsdi_list) + if (NFSBCMP(dip->nfsdi_deviceid, deviceid, NFSX_V4DEVICEID) + == 0) + break; + return (dip); +} + +/* + * Merge the new file layout list into the main one, maintaining it in + * increasing offset order. + */ +static void +nfscl_mergeflayouts(struct nfsclflayouthead *fhlp, + struct nfsclflayouthead *newfhlp) +{ + struct nfsclflayout *flp, *nflp, *prevflp, *tflp; + + flp = LIST_FIRST(fhlp); + prevflp = NULL; + LIST_FOREACH_SAFE(nflp, newfhlp, nfsfl_list, tflp) { + while (flp != NULL && flp->nfsfl_off < nflp->nfsfl_off) { + prevflp = flp; + flp = LIST_NEXT(flp, nfsfl_list); + } + if (prevflp == NULL) + LIST_INSERT_HEAD(fhlp, nflp, nfsfl_list); + else + LIST_INSERT_AFTER(prevflp, nflp, nfsfl_list); + prevflp = nflp; + } +} + +/* + * Add this nfscldevinfo to the client, if it doesn't already exist. + * This function consumes the structure pointed at by dip, if not NULL. + */ +APPLESTATIC int +nfscl_adddevinfo(struct nfsmount *nmp, struct nfscldevinfo *dip, + struct nfsclflayout *flp) +{ + struct nfsclclient *clp; + struct nfscldevinfo *tdip; + + NFSLOCKCLSTATE(); + clp = nmp->nm_clp; + if (clp == NULL) { + NFSUNLOCKCLSTATE(); + if (dip != NULL) + free(dip, M_NFSDEVINFO); + return (ENODEV); + } + tdip = nfscl_finddevinfo(clp, flp->nfsfl_dev); + if (tdip != NULL) { + tdip->nfsdi_layoutrefs++; + flp->nfsfl_devp = tdip; + nfscl_reldevinfo_locked(tdip); + NFSUNLOCKCLSTATE(); + if (dip != NULL) + free(dip, M_NFSDEVINFO); + return (0); + } + if (dip != NULL) { + LIST_INSERT_HEAD(&clp->nfsc_devinfo, dip, nfsdi_list); + dip->nfsdi_layoutrefs = 1; + flp->nfsfl_devp = dip; + } + NFSUNLOCKCLSTATE(); + if (dip == NULL) + return (ENODEV); + return (0); +} + +/* + * Free up a layout structure and associated file layout structure(s). + */ +APPLESTATIC void +nfscl_freelayout(struct nfscllayout *layp) +{ + struct nfsclflayout *flp, *nflp; + struct nfsclrecalllayout *rp, *nrp; + + LIST_FOREACH_SAFE(flp, &layp->nfsly_flayread, nfsfl_list, nflp) { + LIST_REMOVE(flp, nfsfl_list); + nfscl_freeflayout(flp); + } + LIST_FOREACH_SAFE(flp, &layp->nfsly_flayrw, nfsfl_list, nflp) { + LIST_REMOVE(flp, nfsfl_list); + nfscl_freeflayout(flp); + } + LIST_FOREACH_SAFE(rp, &layp->nfsly_recall, nfsrecly_list, nrp) { + LIST_REMOVE(rp, nfsrecly_list); + free(rp, M_NFSLAYRECALL); + } + nfscl_layoutcnt--; + free(layp, M_NFSLAYOUT); +} + +/* + * Free up a file layout structure. + */ +APPLESTATIC void +nfscl_freeflayout(struct nfsclflayout *flp) +{ + int i; + + for (i = 0; i < flp->nfsfl_fhcnt; i++) + free(flp->nfsfl_fh[i], M_NFSFH); + if (flp->nfsfl_devp != NULL) + flp->nfsfl_devp->nfsdi_layoutrefs--; + free(flp, M_NFSFLAYOUT); +} + +/* + * Free up a file layout devinfo structure. + */ +APPLESTATIC void +nfscl_freedevinfo(struct nfscldevinfo *dip) +{ + + free(dip, M_NFSDEVINFO); +} + +/* + * Mark any layouts that match as recalled. + */ +static int +nfscl_layoutrecall(int recalltype, struct nfscllayout *lyp, uint32_t iomode, + uint64_t off, uint64_t len, uint32_t stateseqid, + struct nfsclrecalllayout *recallp) +{ + struct nfsclrecalllayout *rp, *orp; + + recallp->nfsrecly_recalltype = recalltype; + recallp->nfsrecly_iomode = iomode; + recallp->nfsrecly_stateseqid = stateseqid; + recallp->nfsrecly_off = off; + recallp->nfsrecly_len = len; + /* + * Order the list as file returns first, followed by fsid and any + * returns, both in increasing stateseqid order. + * Note that the seqids wrap around, so 1 is after 0xffffffff. + * (I'm not sure this is correct because I find RFC5661 confusing + * on this, but hopefully it will work ok.) + */ + orp = NULL; + LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) { + orp = rp; + if ((recalltype == NFSLAYOUTRETURN_FILE && + (rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE || + nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) || + (recalltype != NFSLAYOUTRETURN_FILE && + rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE && + nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) { + LIST_INSERT_BEFORE(rp, recallp, nfsrecly_list); + break; + } + } + if (rp == NULL) { + if (orp == NULL) + LIST_INSERT_HEAD(&lyp->nfsly_recall, recallp, + nfsrecly_list); + else + LIST_INSERT_AFTER(orp, recallp, nfsrecly_list); + } + lyp->nfsly_flags |= NFSLY_RECALL; + return (0); +} + +/* + * Compare the two seqids for ordering. The trick is that the seqids can + * wrap around from 0xffffffff->0, so check for the cases where one + * has wrapped around. + * Return 1 if seqid1 comes before seqid2, 0 otherwise. + */ +static int +nfscl_seq(uint32_t seqid1, uint32_t seqid2) +{ + + if (seqid2 > seqid1 && (seqid2 - seqid1) >= 0x7fffffff) + /* seqid2 has wrapped around. */ + return (0); + if (seqid1 > seqid2 && (seqid1 - seqid2) >= 0x7fffffff) + /* seqid1 has wrapped around. */ + return (1); + if (seqid1 <= seqid2) + return (1); + return (0); +} + +/* + * Do a layout return for each of the recalls. + */ +static void +nfscl_layoutreturn(struct nfsmount *nmp, struct nfscllayout *lyp, + struct ucred *cred, NFSPROC_T *p) +{ + struct nfsclrecalllayout *rp; + nfsv4stateid_t stateid; + + NFSBCOPY(lyp->nfsly_stateid.other, stateid.other, NFSX_STATEIDOTHER); + LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) { + stateid.seqid = rp->nfsrecly_stateseqid; + (void)nfsrpc_layoutreturn(nmp, lyp->nfsly_fh, + lyp->nfsly_fhlen, 0, NFSLAYOUT_NFSV4_1_FILES, + rp->nfsrecly_iomode, rp->nfsrecly_recalltype, + rp->nfsrecly_off, rp->nfsrecly_len, + &stateid, 0, NULL, cred, p, NULL); + } +} + +/* + * Do the layout commit for a file layout. + */ +static void +nfscl_dolayoutcommit(struct nfsmount *nmp, struct nfscllayout *lyp, + struct ucred *cred, NFSPROC_T *p) +{ + int error; + + error = nfsrpc_layoutcommit(nmp, lyp->nfsly_fh, lyp->nfsly_fhlen, + 0, 0, 0, lyp->nfsly_lastbyte, &lyp->nfsly_stateid, + NFSLAYOUT_NFSV4_1_FILES, 0, NULL, cred, p, NULL); + if (error == NFSERR_NOTSUPP) { + /* If the server doesn't want it, don't bother doing it. */ + NFSLOCKMNT(nmp); + nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT; + NFSUNLOCKMNT(nmp); + } +} + +/* + * Commit all layouts for a file (vnode). + */ +int +nfscl_layoutcommit(vnode_t vp, NFSPROC_T *p) +{ + struct nfsclclient *clp; + struct nfscllayout *lyp; + struct nfsnode *np = VTONFS(vp); + mount_t mp; + struct nfsmount *nmp; + + mp = vnode_mount(vp); + nmp = VFSTONFS(mp); + if (NFSHASNOLAYOUTCOMMIT(nmp)) + return (0); + NFSLOCKCLSTATE(); + clp = nmp->nm_clp; + if (clp == NULL) { + NFSUNLOCKCLSTATE(); + return (EPERM); + } + lyp = nfscl_findlayout(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); + if (lyp == NULL) { + NFSUNLOCKCLSTATE(); + return (EPERM); + } + nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp); + if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { + NFSUNLOCKCLSTATE(); + return (EPERM); + } +tryagain: + if ((lyp->nfsly_flags & NFSLY_WRITTEN) != 0) { + lyp->nfsly_flags &= ~NFSLY_WRITTEN; + NFSUNLOCKCLSTATE(); + NFSCL_DEBUG(4, "do layoutcommit2\n"); + nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, NFSPROCCRED(p), p); + NFSLOCKCLSTATE(); + goto tryagain; + } + nfsv4_relref(&lyp->nfsly_lock); + NFSUNLOCKCLSTATE(); + return (0); +} + diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c index 41a6b78..00dbf90 100644 --- a/sys/fs/nfsclient/nfs_clvfsops.c +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -80,6 +80,8 @@ extern int nfscl_ticks; extern struct timeval nfsboottime; extern struct nfsstats newnfsstats; extern int nfsrv_useacl; +extern int nfscl_debuglevel; +NFSCLSTATEMUTEX; MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "New NFS request header"); MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "New NFS mount struct"); @@ -104,7 +106,7 @@ static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, static int mountnfs(struct nfs_args *, struct mount *, struct sockaddr *, char *, u_char *, int, u_char *, int, u_char *, int, struct vnode **, struct ucred *, - struct thread *, int, int); + struct thread *, int, int, int); static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *, struct sockaddr_storage *, int *, off_t *, struct timeval *); @@ -296,9 +298,11 @@ nfs_statfs(struct mount *mp, struct statfs *sbp) if (!error) error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva, &attrflag, NULL); + if (error != 0) + NFSCL_DEBUG(2, "statfs=%d\n", error); if (attrflag == 0) { ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, - td->td_ucred, td, &nfsva, NULL); + td->td_ucred, td, &nfsva, NULL, NULL); if (ret) { /* * Just set default values to get things going. @@ -521,7 +525,7 @@ nfs_mountdiskless(char *path, nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK); if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen, NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO, - NFS_DEFAULT_NEGNAMETIMEO)) != 0) { + NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) { printf("nfs_mountroot: mount %s on /: %d\n", path, error); return (error); } @@ -715,8 +719,8 @@ static const char *nfs_opts[] = { "from", "nfs_args", "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize", "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport", "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec", - "principal", "nfsv4", "gssname", "allgssname", "dirpath", - "nametimeo", "negnametimeo", "nocto", "wcommitsize", + "principal", "nfsv4", "gssname", "allgssname", "dirpath", "minorversion", + "nametimeo", "negnametimeo", "nocto", "pnfs", "wcommitsize", NULL }; /* @@ -763,6 +767,7 @@ nfs_mount(struct mount *mp) char *opt, *name, *secname; int nametimeo = NFS_DEFAULT_NAMETIMEO; int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO; + int minvers = 0; int dirlen, has_nfs_args_opt, krbnamelen, srvkrbnamelen; size_t hstlen; @@ -836,6 +841,8 @@ nfs_mount(struct mount *mp) args.flags |= NFSMNT_ALLGSSNAME; if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0) args.flags |= NFSMNT_NOCTO; + if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0) + args.flags |= NFSMNT_PNFS; if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) { if (opt == NULL) { vfs_mount_error(mp, "illegal readdirsize"); @@ -988,6 +995,16 @@ nfs_mount(struct mount *mp) goto out; } } + if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) == + 0) { + ret = sscanf(opt, "%d", &minvers); + if (ret != 1 || minvers < 0 || minvers > 1 || + (args.flags & NFSMNT_NFSV4) == 0) { + vfs_mount_error(mp, "illegal minorversion: %s", opt); + error = EINVAL; + goto out; + } + } if (vfs_getopt(mp->mnt_optnew, "sec", (void **) &secname, NULL) == 0) nfs_sec_name(secname, &args.flags); @@ -1132,7 +1149,7 @@ nfs_mount(struct mount *mp) args.fh = nfh; error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath, dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td, - nametimeo, negnametimeo); + nametimeo, negnametimeo, minvers); out: if (!error) { MNT_ILOCK(mp); @@ -1176,14 +1193,20 @@ static int mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen, u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp, - struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo) + struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo, + int minvers) { struct nfsmount *nmp; struct nfsnode *np; int error, trycnt, ret; struct nfsvattr nfsva; + struct nfsclclient *clp; + struct nfsclds *dsp, *tdsp; + uint32_t lease; static u_int64_t clval = 0; + NFSCL_DEBUG(3, "in mnt\n"); + clp = NULL; if (mp->mnt_flag & MNT_UPDATE) { nmp = VFSTONFS(mp); printf("%s: MNT_UPDATE is no longer handled here\n", __func__); @@ -1259,6 +1282,10 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000); else nmp->nm_wcommitsize = hibufspace / 10; + if ((argp->flags & NFSMNT_NFSV4) != 0) + nmp->nm_minorvers = minvers; + else + nmp->nm_minorvers = 0; nfs_decode_args(mp, nmp, argp, hst, cred, td); @@ -1306,17 +1333,18 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0))) goto bad; + /* For NFSv4.1, get the clientid now. */ + if (nmp->nm_minorvers > 0) { + NFSCL_DEBUG(3, "at getcl\n"); + error = nfscl_getcl(mp, cred, td, 0, &clp); + NFSCL_DEBUG(3, "aft getcl=%d\n", error); + if (error != 0) + goto bad; + } - /* - * A reference count is needed on the nfsnode representing the - * remote root. If this object is not persistent, then backward - * traversals of the mount point (i.e. "..") will not work if - * the nfsnode gets flushed out of the cache. Ufs does not have - * this problem, because one can identify root inodes by their - * number == ROOTINO (2). - */ if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) && nmp->nm_dirpathlen > 0) { + NFSCL_DEBUG(3, "in dirp\n"); /* * If the fhsize on the mount point == 0 for V4, the mount * path needs to be looked up. @@ -1325,6 +1353,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, do { error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp), cred, td); + NFSCL_DEBUG(3, "aft dirp=%d\n", error); if (error) (void) nfs_catnap(PZERO, error, "nfsgetdirp"); } while (error && --trycnt > 0); @@ -1333,6 +1362,15 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, goto bad; } } + + /* + * A reference count is needed on the nfsnode representing the + * remote root. If this object is not persistent, then backward + * traversals of the mount point (i.e. "..") will not work if + * the nfsnode gets flushed out of the cache. Ufs does not have + * this problem, because one can identify root inodes by their + * number == ROOTINO (2). + */ if (nmp->nm_fhsize > 0) { /* * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set @@ -1352,7 +1390,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, * (*vpp)->v_type with the correct value. */ ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1, - cred, td, &nfsva, NULL); + cred, td, &nfsva, NULL, &lease); if (ret) { /* * Just set default values to get things going. @@ -1367,8 +1405,25 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, nfsva.na_vattr.va_gen = 1; nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE; nfsva.na_vattr.va_size = 512 * 1024; + lease = 60; } (void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1); + if (nmp->nm_minorvers > 0) { + NFSCL_DEBUG(3, "lease=%d\n", (int)lease); + NFSLOCKCLSTATE(); + clp->nfsc_renew = NFSCL_RENEW(lease); + clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; + clp->nfsc_clientidrev++; + if (clp->nfsc_clientidrev == 0) + clp->nfsc_clientidrev++; + NFSUNLOCKCLSTATE(); + /* + * Mount will succeed, so the renew thread can be + * started now. + */ + nfscl_start_renewthread(clp); + nfscl_clientrelease(clp); + } if (argp->flags & NFSMNT_NFSV3) ncl_fsinfo(nmp, *vpp, cred, td); @@ -1390,10 +1445,20 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, error = EIO; bad: + if (clp != NULL) + nfscl_clientrelease(clp); newnfs_disconnect(&nmp->nm_sockreq); crfree(nmp->nm_sockreq.nr_cred); mtx_destroy(&nmp->nm_sockreq.nr_mtx); mtx_destroy(&nmp->nm_mtx); + if (nmp->nm_clp != NULL) { + NFSLOCKCLSTATE(); + LIST_REMOVE(nmp->nm_clp, nfsc_list); + NFSUNLOCKCLSTATE(); + free(nmp->nm_clp, M_NFSCLCLIENT); + } + TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) + nfscl_freenfsclds(dsp); FREE(nmp, M_NEWNFSMNT); FREE(nam, M_SONAME); return (error); @@ -1408,6 +1473,7 @@ nfs_unmount(struct mount *mp, int mntflags) struct thread *td; struct nfsmount *nmp; int error, flags = 0, trycnt = 0; + struct nfsclds *dsp, *tdsp; td = curthread; @@ -1448,6 +1514,8 @@ nfs_unmount(struct mount *mp, int mntflags) mtx_destroy(&nmp->nm_sockreq.nr_mtx); mtx_destroy(&nmp->nm_mtx); + TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) + nfscl_freenfsclds(dsp); FREE(nmp, M_NEWNFSMNT); out: return (error); diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index 12e018c..3a898f2 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -103,6 +103,7 @@ uint32_t nfscl_accesscache_load_done_id; extern struct nfsstats newnfsstats; extern int nfsrv_useacl; +extern int nfscl_debuglevel; MALLOC_DECLARE(M_NEWNFSREQ); /* @@ -606,6 +607,10 @@ nfs_open(struct vop_open_args *ap) np->n_directio_opens++; } + /* If opened for writing via NFSv4.1 or later, mark that for pNFS. */ + if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0) + np->n_flag |= NWRITEOPENED; + /* * If this is an open for writing, capture a reference to the * credentials, so they can be used by ncl_putpages(). Using @@ -619,6 +624,7 @@ nfs_open(struct vop_open_args *ap) } else cred = NULL; mtx_unlock(&np->n_mtx); + if (cred != NULL) crfree(cred); vnode_create_vobject(vp, vattr.va_size, ap->a_td); @@ -1362,9 +1368,18 @@ ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { int error, ret, attrflag; struct nfsvattr nfsva; + struct nfsmount *nmp; - error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, &attrflag, - NULL); + nmp = VFSTONFS(vnode_mount(vp)); + error = EIO; + attrflag = 0; + if (NFSHASPNFS(nmp)) + error = nfscl_doiods(vp, uiop, NULL, NULL, + NFSV4OPEN_ACCESSREAD, cred, uiop->uio_td); + NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error); + if (error != 0) + error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, + &attrflag, NULL); if (attrflag) { ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1); if (ret && !error) @@ -1383,10 +1398,20 @@ ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, int *iomode, int *must_commit, int called_from_strategy) { struct nfsvattr nfsva; - int error = 0, attrflag, ret; + int error, attrflag, ret; + struct nfsmount *nmp; - error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, - uiop->uio_td, &nfsva, &attrflag, NULL, called_from_strategy); + nmp = VFSTONFS(vnode_mount(vp)); + error = EIO; + attrflag = 0; + if (NFSHASPNFS(nmp)) + error = nfscl_doiods(vp, uiop, iomode, must_commit, + NFSV4OPEN_ACCESSWRITE, cred, uiop->uio_td); + NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error); + if (error != 0) + error = nfsrpc_write(vp, uiop, iomode, must_commit, cred, + uiop->uio_td, &nfsva, &attrflag, NULL, + called_from_strategy); if (attrflag) { if (VTONFS(vp)->n_flag & ND_NFSV4) ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1, @@ -2534,7 +2559,6 @@ ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct nfsvattr nfsva; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error, attrflag; - u_char verf[NFSX_VERF]; mtx_lock(&nmp->nm_mtx); if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { @@ -2542,21 +2566,13 @@ ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, return (0); } mtx_unlock(&nmp->nm_mtx); - error = nfsrpc_commit(vp, offset, cnt, cred, td, verf, &nfsva, + error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva, &attrflag, NULL); - if (!error) { - mtx_lock(&nmp->nm_mtx); - if (NFSBCMP((caddr_t)nmp->nm_verf, verf, NFSX_VERF)) { - NFSBCOPY(verf, (caddr_t)nmp->nm_verf, NFSX_VERF); - error = NFSERR_STALEWRITEVERF; - } - mtx_unlock(&nmp->nm_mtx); - if (!error && attrflag) - (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, - 0, 1); - } else if (NFS_ISV4(vp)) { + if (attrflag != 0) + (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, + 0, 1); + if (error != 0 && NFS_ISV4(vp)) error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); - } return (error); } @@ -2928,6 +2944,8 @@ loop: mtx_unlock(&np->n_mtx); } else BO_UNLOCK(bo); + if (NFSHASPNFS(nmp)) + nfscl_layoutcommit(vp, td); mtx_lock(&np->n_mtx); if (np->n_flag & NWRITEERR) { error = np->n_error; diff --git a/sys/fs/nfsclient/nfsmount.h b/sys/fs/nfsclient/nfsmount.h index 8068c28..f5b56bf 100644 --- a/sys/fs/nfsclient/nfsmount.h +++ b/sys/fs/nfsclient/nfsmount.h @@ -70,10 +70,12 @@ struct nfsmount { int nm_negnametimeo; /* timeout for -ve entries (sec) */ /* Newnfs additions */ + TAILQ_HEAD(, nfsclds) nm_sess; /* Session(s) for NFSv4.1. */ struct nfsclclient *nm_clp; uid_t nm_uid; /* Uid for SetClientID etc. */ u_int64_t nm_clval; /* identifies which clientid */ u_int64_t nm_fsid[2]; /* NFSv4 fsid */ + int nm_minorvers; /* Minor version # for NFSv4 */ u_int16_t nm_krbnamelen; /* Krb5 host principal, if any */ u_int16_t nm_dirpathlen; /* and mount dirpath, for V4 */ u_int16_t nm_srvkrbnamelen; /* and the server's target name */ @@ -107,6 +109,12 @@ struct nfsmount { */ #define VFSTONFS(mp) ((struct nfsmount *)((mp)->mnt_data)) +/* + * Get a pointer to the MDS session, which is always the first element + * in the list. + */ +#define NFSMNT_MDSSESSION(m) (&(TAILQ_FIRST(&((m)->nm_sess))->nfsclds_sess)) + #ifndef NFS_DEFAULT_NAMETIMEO #define NFS_DEFAULT_NAMETIMEO 60 #endif diff --git a/sys/fs/nfsclient/nfsnode.h b/sys/fs/nfsclient/nfsnode.h index 209945a..bbb67d7 100644 --- a/sys/fs/nfsclient/nfsnode.h +++ b/sys/fs/nfsclient/nfsnode.h @@ -155,6 +155,8 @@ struct nfsnode { #define NREMOVEWANT 0x00004000 /* Want notification that remove is done */ #define NLOCK 0x00008000 /* Sleep lock the node */ #define NLOCKWANT 0x00010000 /* Want the sleep lock */ +#define NNOLAYOUT 0x00020000 /* Can't get a layout for this file */ +#define NWRITEOPENED 0x00040000 /* Has been opened for writing */ /* * Convert between nfsnode pointers and vnode pointers |