From e851db5b05408b89b9a9429a66814b79fabee2a1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 30 Jun 2008 18:45:30 -0400 Subject: SUNRPC: Add address family field to svc_serv data structure Introduce and initialize an address family field in the svc_serv structure. This field will determine what family to use for the service's listener sockets and what families are advertised via the local rpcbind daemon. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 80292ff..7f3d76a7 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -229,6 +229,7 @@ int nfsd_create_serv(void) atomic_set(&nfsd_busy, 0); nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, + AF_INET, nfsd_last_thread, nfsd, THIS_MODULE); if (nfsd_serv == NULL) err = -ENOMEM; -- cgit v1.1 From 04716e6621ff4abb422d64ba7b48718f52716a3e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 7 Aug 2008 13:00:20 -0400 Subject: nfsd: permit unauthenticated stat of export root RFC 2623 section 2.3.2 permits the server to bypass gss authentication checks for certain operations that a client may perform when mounting. In the case of a client that doesn't have some form of credentials available to it on boot, this allows it to perform the mount unattended. (Presumably real file access won't be needed until a user with credentials logs in.) Being slightly more lenient allows lots of old clients to access krb5-only exports, with the only loss being a small amount of information leaked about the root directory of the export. This affects only v2 and v3; v4 still requires authentication for all access. Thanks to Peter Staubach testing against a Solaris client, which suggesting addition of v3 getattr, to the list, and to Trond for noting that doing so exposes no additional information. Signed-off-by: J. Bruce Fields Cc: Peter Staubach Cc: Trond Myklebust --- fs/nfsd/nfs3proc.c | 8 +++++--- fs/nfsd/nfsfh.c | 30 ++++++++++++++++++++---------- fs/nfsd/nfsproc.c | 6 ++++-- fs/nfsd/vfs.c | 4 ++-- 4 files changed, 31 insertions(+), 17 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 4d617ea..9dbd2eb 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -63,7 +63,8 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + nfserr = fh_verify(rqstp, &resp->fh, 0, + NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); if (nfserr) RETURN_STATUS(nfserr); @@ -530,7 +531,7 @@ nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, dprintk("nfsd: FSSTAT(3) %s\n", SVCFH_fmt(&argp->fh)); - nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); + nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0); fh_put(&argp->fh); RETURN_STATUS(nfserr); } @@ -558,7 +559,8 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, resp->f_maxfilesize = ~(u32) 0; resp->f_properties = NFS3_FSF_DEFAULT; - nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP); + nfserr = fh_verify(rqstp, &argp->fh, 0, + NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); /* Check special features of the file system. May request * different read/write sizes for file systems known to have diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index ea37c96..cd25d91 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -302,17 +302,27 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) if (error) goto out; - if (!(access & NFSD_MAY_LOCK)) { - /* - * pseudoflavor restrictions are not enforced on NLM, - * which clients virtually always use auth_sys for, - * even while using RPCSEC_GSS for NFS. - */ - error = check_nfsd_access(exp, rqstp); - if (error) - goto out; - } + /* + * pseudoflavor restrictions are not enforced on NLM, + * which clients virtually always use auth_sys for, + * even while using RPCSEC_GSS for NFS. + */ + if (access & NFSD_MAY_LOCK) + goto skip_pseudoflavor_check; + /* + * Clients may expect to be able to use auth_sys during mount, + * even if they use gss for everything else; see section 2.3.2 + * of rfc 2623. + */ + if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT + && exp->ex_path.dentry == dentry) + goto skip_pseudoflavor_check; + + error = check_nfsd_access(exp, rqstp); + if (error) + goto out; +skip_pseudoflavor_check: /* Finally, check access permissions. */ error = nfsd_permission(rqstp, exp, dentry, access); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 0766f95..5cffeca 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -65,7 +65,8 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + nfserr = fh_verify(rqstp, &resp->fh, 0, + NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); return nfsd_return_attrs(nfserr, resp); } @@ -521,7 +522,8 @@ nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh)); - nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); + nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, + NFSD_MAY_BYPASS_GSS_ON_ROOT); fh_put(&argp->fh); return nfserr; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 18060be..1319e80 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1866,9 +1866,9 @@ out: * N.B. After this call fhp needs an fh_put */ __be32 -nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) +nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) { - __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); + __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); if (!err && vfs_statfs(fhp->fh_dentry,stat)) err = nfserr_io; return err; -- cgit v1.1 From 8e40741494bd08a15af17b8cfd26a1f7ad1f5081 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 12 Aug 2008 20:42:51 +0300 Subject: nfsd: properly xdr-encode stateid4.seqid as uint32_t for cb_recall Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 702fa57..ab13e02 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -225,7 +225,8 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); WRITE32(OP_CB_RECALL); - WRITEMEM(&cb_rec->cbr_stateid, sizeof(stateid_t)); + WRITE32(cb_rec->cbr_stateid.si_generation); + WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t)); WRITE32(cb_rec->cbr_trunc); WRITE32(len); WRITEMEM(cb_rec->cbr_fhval, len); -- cgit v1.1 From c47b2ca42e848e2dce122acbefa0de59320f5683 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 12 Aug 2008 20:43:37 +0300 Subject: nfsd: properly xdr-encode deleg stateid returned from open Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 14ba4d9..d7e4bd5 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2149,7 +2149,9 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op break; case NFS4_OPEN_DELEGATE_READ: RESERVE_SPACE(20 + sizeof(stateid_t)); - WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); + WRITE32(open->op_delegate_stateid.si_generation); + WRITEMEM(&open->op_delegate_stateid.si_opaque, + sizeof(stateid_opaque_t)); WRITE32(open->op_recall); /* @@ -2163,7 +2165,9 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op break; case NFS4_OPEN_DELEGATE_WRITE: RESERVE_SPACE(32 + sizeof(stateid_t)); - WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); + WRITE32(open->op_delegate_stateid.si_generation); + WRITEMEM(&open->op_delegate_stateid.si_opaque, + sizeof(stateid_opaque_t)); WRITE32(0); /* -- cgit v1.1 From 5033b77a931a12bc7395c1834fa50f6d477be3ae Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 12 Aug 2008 20:44:41 +0300 Subject: nfsd: fix nfsd4_encode_open buffer space reservation nfsd4_encode_open first reservation is currently for 36 + sizeof(stateid_t) while it writes after the stateid a cinfo (20 bytes) and 5 more 4-bytes words, for a total of 40 + sizeof(stateid_t). Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index d7e4bd5..ace3934 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2133,7 +2133,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op if (nfserr) goto out; - RESERVE_SPACE(36 + sizeof(stateid_t)); + RESERVE_SPACE(40 + sizeof(stateid_t)); WRITE32(open->op_stateid.si_generation); WRITEMEM(&open->op_stateid.si_opaque, sizeof(stateid_opaque_t)); WRITECINFO(open->op_cinfo); -- cgit v1.1 From e2f282b9f0538e4f63255ffa35bf3b902f5fbde2 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 12 Aug 2008 20:45:07 +0300 Subject: nfsd: nfs4xdr encode_stateid helper function Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 76 +++++++++++++++++++++++-------------------------------- 1 file changed, 32 insertions(+), 44 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ace3934..c560af8 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1950,6 +1950,17 @@ fail: return -EINVAL; } +static void +nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) +{ + ENCODE_HEAD; + + RESERVE_SPACE(sizeof(stateid_t)); + WRITE32(sid->si_generation); + WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); + ADJUST_ARGS(); +} + static __be32 nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) { @@ -1969,12 +1980,9 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c { ENCODE_SEQID_OP_HEAD; - if (!nfserr) { - RESERVE_SPACE(sizeof(stateid_t)); - WRITE32(close->cl_stateid.si_generation); - WRITEMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t)); - ADJUST_ARGS(); - } + if (!nfserr) + nfsd4_encode_stateid(resp, &close->cl_stateid); + ENCODE_SEQID_OP_TAIL(close->cl_stateowner); return nfserr; } @@ -2074,12 +2082,9 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo { ENCODE_SEQID_OP_HEAD; - if (!nfserr) { - RESERVE_SPACE(4 + sizeof(stateid_t)); - WRITE32(lock->lk_resp_stateid.si_generation); - WRITEMEM(&lock->lk_resp_stateid.si_opaque, sizeof(stateid_opaque_t)); - ADJUST_ARGS(); - } else if (nfserr == nfserr_denied) + if (!nfserr) + nfsd4_encode_stateid(resp, &lock->lk_resp_stateid); + else if (nfserr == nfserr_denied) nfsd4_encode_lock_denied(resp, &lock->lk_denied); ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner); @@ -2099,13 +2104,9 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l { ENCODE_SEQID_OP_HEAD; - if (!nfserr) { - RESERVE_SPACE(sizeof(stateid_t)); - WRITE32(locku->lu_stateid.si_generation); - WRITEMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t)); - ADJUST_ARGS(); - } - + if (!nfserr) + nfsd4_encode_stateid(resp, &locku->lu_stateid); + ENCODE_SEQID_OP_TAIL(locku->lu_stateowner); return nfserr; } @@ -2133,9 +2134,8 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op if (nfserr) goto out; - RESERVE_SPACE(40 + sizeof(stateid_t)); - WRITE32(open->op_stateid.si_generation); - WRITEMEM(&open->op_stateid.si_opaque, sizeof(stateid_opaque_t)); + nfsd4_encode_stateid(resp, &open->op_stateid); + RESERVE_SPACE(40); WRITECINFO(open->op_cinfo); WRITE32(open->op_rflags); WRITE32(2); @@ -2148,10 +2148,8 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op case NFS4_OPEN_DELEGATE_NONE: break; case NFS4_OPEN_DELEGATE_READ: - RESERVE_SPACE(20 + sizeof(stateid_t)); - WRITE32(open->op_delegate_stateid.si_generation); - WRITEMEM(&open->op_delegate_stateid.si_opaque, - sizeof(stateid_opaque_t)); + nfsd4_encode_stateid(resp, &open->op_delegate_stateid); + RESERVE_SPACE(20); WRITE32(open->op_recall); /* @@ -2164,10 +2162,8 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op ADJUST_ARGS(); break; case NFS4_OPEN_DELEGATE_WRITE: - RESERVE_SPACE(32 + sizeof(stateid_t)); - WRITE32(open->op_delegate_stateid.si_generation); - WRITEMEM(&open->op_delegate_stateid.si_opaque, - sizeof(stateid_opaque_t)); + nfsd4_encode_stateid(resp, &open->op_delegate_stateid); + RESERVE_SPACE(32); WRITE32(0); /* @@ -2199,13 +2195,9 @@ static __be32 nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) { ENCODE_SEQID_OP_HEAD; - - if (!nfserr) { - RESERVE_SPACE(sizeof(stateid_t)); - WRITE32(oc->oc_resp_stateid.si_generation); - WRITEMEM(&oc->oc_resp_stateid.si_opaque, sizeof(stateid_opaque_t)); - ADJUST_ARGS(); - } + + if (!nfserr) + nfsd4_encode_stateid(resp, &oc->oc_resp_stateid); ENCODE_SEQID_OP_TAIL(oc->oc_stateowner); return nfserr; @@ -2215,13 +2207,9 @@ static __be32 nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) { ENCODE_SEQID_OP_HEAD; - - if (!nfserr) { - RESERVE_SPACE(sizeof(stateid_t)); - WRITE32(od->od_stateid.si_generation); - WRITEMEM(&od->od_stateid.si_opaque, sizeof(stateid_opaque_t)); - ADJUST_ARGS(); - } + + if (!nfserr) + nfsd4_encode_stateid(resp, &od->od_stateid); ENCODE_SEQID_OP_TAIL(od->od_stateowner); return nfserr; -- cgit v1.1 From 1b6b2257dc5a88aae375ff44485f8d6c4ee800e4 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 12 Aug 2008 20:45:28 +0300 Subject: nfsd: don't declare p in ENCODE_SEQID_OP_HEAD After using the encode_stateid helper the "p" pointer declared by ENCODE_SEQID_OP_HEAD is warned as unused. In the single site where it is still needed it can be declared separately using the ENCODE_HEAD macro. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c560af8..1b81f16 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1183,7 +1183,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) * Header routine to setup seqid operation replay cache */ #define ENCODE_SEQID_OP_HEAD \ - __be32 *p; \ __be32 *save; \ \ save = resp->p; @@ -2129,6 +2128,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li static __be32 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) { + ENCODE_HEAD; ENCODE_SEQID_OP_HEAD; if (nfserr) -- cgit v1.1 From 5bf8c6911fe88bea4f9850007796fbacf9fbfb88 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 12 Aug 2008 20:45:51 +0300 Subject: nfsd: properly xdr-decode NFS4_OPEN_CLAIM_DELEGATE_CUR stateid Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1b81f16..48a0cc17 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -679,7 +679,9 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) break; case NFS4_OPEN_CLAIM_DELEGATE_CUR: READ_BUF(sizeof(stateid_t) + 4); - COPYMEM(&open->op_delegate_stateid, sizeof(stateid_t)); + READ32(open->op_delegate_stateid.si_generation); + COPYMEM(&open->op_delegate_stateid.si_opaque, + sizeof(stateid_opaque_t)); READ32(open->op_fname.len); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); -- cgit v1.1 From e31a1b662f40fd460e982ef87582c66c51596cd0 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 12 Aug 2008 20:46:18 +0300 Subject: nfsd: nfs4xdr decode_stateid helper function Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 99 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 55 insertions(+), 44 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 48a0cc17..afcdf4b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -413,6 +413,18 @@ out_nfserr: } static __be32 +nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid) +{ + DECODE_HEAD; + + READ_BUF(sizeof(stateid_t)); + READ32(sid->si_generation); + COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); + + DECODE_TAIL; +} + +static __be32 nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access) { DECODE_HEAD; @@ -429,10 +441,9 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) DECODE_HEAD; close->cl_stateowner = NULL; - READ_BUF(4 + sizeof(stateid_t)); + READ_BUF(4); READ32(close->cl_seqid); - READ32(close->cl_stateid.si_generation); - COPYMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t)); + return nfsd4_decode_stateid(argp, &close->cl_stateid); DECODE_TAIL; } @@ -493,13 +504,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create static inline __be32 nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) { - DECODE_HEAD; - - READ_BUF(sizeof(stateid_t)); - READ32(dr->dr_stateid.si_generation); - COPYMEM(&dr->dr_stateid.si_opaque, sizeof(stateid_opaque_t)); - - DECODE_TAIL; + return nfsd4_decode_stateid(argp, &dr->dr_stateid); } static inline __be32 @@ -542,20 +547,22 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) READ32(lock->lk_is_new); if (lock->lk_is_new) { - READ_BUF(36); + READ_BUF(4); READ32(lock->lk_new_open_seqid); - READ32(lock->lk_new_open_stateid.si_generation); - - COPYMEM(&lock->lk_new_open_stateid.si_opaque, sizeof(stateid_opaque_t)); + status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid); + if (status) + return status; + READ_BUF(8 + sizeof(clientid_t)); READ32(lock->lk_new_lock_seqid); COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); READ32(lock->lk_new_owner.len); READ_BUF(lock->lk_new_owner.len); READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); } else { - READ_BUF(20); - READ32(lock->lk_old_lock_stateid.si_generation); - COPYMEM(&lock->lk_old_lock_stateid.si_opaque, sizeof(stateid_opaque_t)); + status = nfsd4_decode_stateid(argp, &lock->lk_old_lock_stateid); + if (status) + return status; + READ_BUF(4); READ32(lock->lk_old_lock_seqid); } @@ -587,13 +594,15 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) DECODE_HEAD; locku->lu_stateowner = NULL; - READ_BUF(24 + sizeof(stateid_t)); + READ_BUF(8); READ32(locku->lu_type); if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) goto xdr_error; READ32(locku->lu_seqid); - READ32(locku->lu_stateid.si_generation); - COPYMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t)); + status = nfsd4_decode_stateid(argp, &locku->lu_stateid); + if (status) + return status; + READ_BUF(16); READ64(locku->lu_offset); READ64(locku->lu_length); @@ -678,10 +687,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) READ32(open->op_delegate_type); break; case NFS4_OPEN_CLAIM_DELEGATE_CUR: - READ_BUF(sizeof(stateid_t) + 4); - READ32(open->op_delegate_stateid.si_generation); - COPYMEM(&open->op_delegate_stateid.si_opaque, - sizeof(stateid_opaque_t)); + status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); + if (status) + return status; + READ_BUF(4); READ32(open->op_fname.len); READ_BUF(open->op_fname.len); SAVEMEM(open->op_fname.data, open->op_fname.len); @@ -701,9 +710,10 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con DECODE_HEAD; open_conf->oc_stateowner = NULL; - READ_BUF(4 + sizeof(stateid_t)); - READ32(open_conf->oc_req_stateid.si_generation); - COPYMEM(&open_conf->oc_req_stateid.si_opaque, sizeof(stateid_opaque_t)); + status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); + if (status) + return status; + READ_BUF(4); READ32(open_conf->oc_seqid); DECODE_TAIL; @@ -715,9 +725,10 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d DECODE_HEAD; open_down->od_stateowner = NULL; - READ_BUF(12 + sizeof(stateid_t)); - READ32(open_down->od_stateid.si_generation); - COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t)); + status = nfsd4_decode_stateid(argp, &open_down->od_stateid); + if (status) + return status; + READ_BUF(12); READ32(open_down->od_seqid); READ32(open_down->od_share_access); READ32(open_down->od_share_deny); @@ -745,9 +756,10 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) { DECODE_HEAD; - READ_BUF(sizeof(stateid_t) + 12); - READ32(read->rd_stateid.si_generation); - COPYMEM(&read->rd_stateid.si_opaque, sizeof(stateid_opaque_t)); + status = nfsd4_decode_stateid(argp, &read->rd_stateid); + if (status) + return status; + READ_BUF(12); READ64(read->rd_offset); READ32(read->rd_length); @@ -836,15 +848,13 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) { - DECODE_HEAD; - - READ_BUF(sizeof(stateid_t)); - READ32(setattr->sa_stateid.si_generation); - COPYMEM(&setattr->sa_stateid.si_opaque, sizeof(stateid_opaque_t)); - if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, &setattr->sa_acl))) - goto out; + __be32 status; - DECODE_TAIL; + status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); + if (status) + return status; + return nfsd4_decode_fattr(argp, setattr->sa_bmval, + &setattr->sa_iattr, &setattr->sa_acl); } static __be32 @@ -929,9 +939,10 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) int len; DECODE_HEAD; - READ_BUF(sizeof(stateid_opaque_t) + 20); - READ32(write->wr_stateid.si_generation); - COPYMEM(&write->wr_stateid.si_opaque, sizeof(stateid_opaque_t)); + status = nfsd4_decode_stateid(argp, &write->wr_stateid); + if (status) + return status; + READ_BUF(16); READ64(write->wr_offset); READ32(write->wr_stable_how); if (write->wr_stable_how > 2) -- cgit v1.1 From 54a66e548079f12a6f54c3cae96812a9ed9b54ae Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 13 Aug 2008 22:03:27 -0400 Subject: knfsd: allocate readahead cache in individual chunks I had a report from someone building a large NFS server that they were unable to start more than 585 nfsd threads. It was reported against an older kernel using the slab allocator, and I tracked it down to the large allocation in nfsd_racache_init failing. It appears that the slub allocator handles large allocations better, but large contiguous allocations can often be problematic. There doesn't seem to be any reason that the racache has to be allocated as a single large chunk. This patch breaks this up so that the racache is built up from separate allocations. (Thanks also to Takashi Iwai for a bugfix.) Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields Cc: Takashi Iwai --- fs/nfsd/vfs.c | 59 +++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 24 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 1319e80..aa1d0d6 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -83,7 +83,6 @@ struct raparm_hbucket { spinlock_t pb_lock; } ____cacheline_aligned_in_smp; -static struct raparms * raparml; #define RAPARM_HASH_BITS 4 #define RAPARM_HASH_SIZE (1<p_next; + kfree(last_raparm); + } + raparm_hash[i].pb_head = NULL; + } } /* * Initialize readahead param cache @@ -1981,35 +1989,38 @@ nfsd_racache_init(int cache_size) int i; int j = 0; int nperbucket; + struct raparms **raparm = NULL; - if (raparml) + if (raparm_hash[0].pb_head) return 0; - if (cache_size < 2*RAPARM_HASH_SIZE) - cache_size = 2*RAPARM_HASH_SIZE; - raparml = kcalloc(cache_size, sizeof(struct raparms), GFP_KERNEL); - - if (!raparml) { - printk(KERN_WARNING - "nfsd: Could not allocate memory read-ahead cache.\n"); - return -ENOMEM; - } + nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); + if (nperbucket < 2) + nperbucket = 2; + cache_size = nperbucket * RAPARM_HASH_SIZE; dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); - for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) { - raparm_hash[i].pb_head = NULL; + + for (i = 0; i < RAPARM_HASH_SIZE; i++) { spin_lock_init(&raparm_hash[i].pb_lock); - } - nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); - for (i = 0; i < cache_size - 1; i++) { - if (i % nperbucket == 0) - raparm_hash[j++].pb_head = raparml + i; - if (i % nperbucket < nperbucket-1) - raparml[i].p_next = raparml + i + 1; + + raparm = &raparm_hash[i].pb_head; + for (j = 0; j < nperbucket; j++) { + *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL); + if (!*raparm) + goto out_nomem; + raparm = &(*raparm)->p_next; + } + *raparm = NULL; } nfsdstats.ra_size = cache_size; return 0; + +out_nomem: + dprintk("nfsd: kmalloc failed, freeing readahead buffers\n"); + nfsd_racache_shutdown(); + return -ENOMEM; } #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) -- cgit v1.1 From 97eb89bb0e5d9ab20dbc677cb18fad1421473287 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 26 Sep 2008 15:14:13 +0300 Subject: nfsd: do_probe_callback should not clear rpc stats Now that cb_stats are static (since commit ff7d9756b501744540be65e172d27ee321d86103) there's no need to clear them. Initially I thought it might make sense to do that every callback probing but since the stats are per-program and they are shared between possibly several client callback instances, zeroing them out seems like the wrong thing to do. Note that that commit also introduced a bug since stats.program is also being cleared in the process and it is not restored after the memset as it used to be. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index ab13e02..f7c793a 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -397,9 +397,6 @@ static int do_probe_callback(void *data) addr.sin_port = htons(cb->cb_port); addr.sin_addr.s_addr = htonl(cb->cb_addr); - /* Initialize rpc_stat */ - memset(args.program->stats, 0, sizeof(struct rpc_stat)); - /* Create RPC client */ client = rpc_create(&args); if (IS_ERR(client)) { -- cgit v1.1 From d5b337b4877f7c4e1d761434ee04d045b0201e03 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 28 Sep 2008 09:21:26 +0300 Subject: nfsd: use nfs client rpc callback program since commit ff7d9756b501744540be65e172d27ee321d86103 "nfsd: use static memory for callback program and stats" do_probe_callback uses a static callback program (NFS4_CALLBACK) rather than the one set in clp->cl_callback.cb_prog as passed in by the client in setclientid (4.0) or create_session (4.1). This patches introduces rpc_create_args.prognumber that allows overriding program->number when creating rpc_clnt. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index f7c793a..094747a 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -380,6 +380,7 @@ static int do_probe_callback(void *data) .addrsize = sizeof(addr), .timeout = &timeparms, .program = &cb_program, + .prognumber = cb->cb_prog, .version = nfs_cb_version[1]->number, .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), -- cgit v1.1 From af558e33bedab672f5cfd3260bce7445e353fe21 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 6 Sep 2007 12:34:25 -0400 Subject: nfsd: common grace period control Rewrite grace period code to unify management of grace period across lockd and nfsd. The current code has lockd and nfsd cooperate to compute a grace period which is satisfactory to them both, and then individually enforce it. This creates a slight race condition, since the enforcement is not coordinated. It's also more complicated than necessary. Here instead we have lockd and nfsd each inform common code when they enter the grace period, and when they're ready to leave the grace period, and allow normal locking only after both of them are ready to leave. We also expect the locks_start_grace()/locks_end_grace() interface here to be simpler to build on for future cluster/high-availability work, which may require (for example) putting individual filesystems into grace, or enforcing grace periods across multiple cluster nodes. Signed-off-by: J. Bruce Fields --- fs/nfsd/lockd.c | 1 - fs/nfsd/nfs4proc.c | 8 ++++---- fs/nfsd/nfs4state.c | 34 +++++++++++++++------------------- 3 files changed, 19 insertions(+), 24 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 15c6fae..b2786a5 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -70,7 +70,6 @@ nlm_fclose(struct file *filp) static struct nlmsvc_binding nfsd_nlm_ops = { .fopen = nlm_fopen, /* open file for locking */ .fclose = nlm_fclose, /* close file */ - .get_grace_period = get_nfs4_grace_period, }; void diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e5b51ff..669461e 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -201,10 +201,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* Openowner is now set, so sequence id will get bumped. Now we need * these checks before we do any creates: */ status = nfserr_grace; - if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + if (locks_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) goto out; status = nfserr_no_grace; - if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + if (!locks_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) goto out; switch (open->op_claim_type) { @@ -575,7 +575,7 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; - if (nfs4_in_grace()) + if (locks_in_grace()) return nfserr_grace; status = nfsd_unlink(rqstp, &cstate->current_fh, 0, remove->rm_name, remove->rm_namelen); @@ -596,7 +596,7 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!cstate->save_fh.fh_dentry) return status; - if (nfs4_in_grace() && !(cstate->save_fh.fh_export->ex_flags + if (locks_in_grace() && !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) return nfserr_grace; status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1578d7a..0cc7ff5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -61,7 +61,6 @@ static time_t lease_time = 90; /* default lease time */ static time_t user_lease_time = 90; static time_t boot_time; -static int in_grace = 1; static u32 current_ownerid = 1; static u32 current_fileid = 1; static u32 current_delegid = 1; @@ -1640,7 +1639,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta case NFS4_OPEN_CLAIM_NULL: /* Let's not give out any delegations till everyone's * had the chance to reclaim theirs.... */ - if (nfs4_in_grace()) + if (locks_in_grace()) goto out; if (!atomic_read(&cb->cb_set) || !sop->so_confirmed) goto out; @@ -1816,12 +1815,15 @@ out: return status; } +struct lock_manager nfsd4_manager = { +}; + static void -end_grace(void) +nfsd4_end_grace(void) { dprintk("NFSD: end of grace period\n"); nfsd4_recdir_purge_old(); - in_grace = 0; + locks_end_grace(&nfsd4_manager); } static time_t @@ -1838,8 +1840,8 @@ nfs4_laundromat(void) nfs4_lock_state(); dprintk("NFSD: laundromat service - starting\n"); - if (in_grace) - end_grace(); + if (locks_in_grace()) + nfsd4_end_grace(); list_for_each_safe(pos, next, &client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { @@ -1974,7 +1976,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) return nfserr_bad_stateid; else if (ONE_STATEID(stateid) && (flags & RD_STATE)) return nfs_ok; - else if (nfs4_in_grace()) { + else if (locks_in_grace()) { /* Answer in remaining cases depends on existance of * conflicting state; so we must wait out the grace period. */ return nfserr_grace; @@ -1993,7 +1995,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) static inline int io_during_grace_disallowed(struct inode *inode, int flags) { - return nfs4_in_grace() && (flags & (RD_STATE | WR_STATE)) + return locks_in_grace() && (flags & (RD_STATE | WR_STATE)) && mandatory_lock(inode); } @@ -2693,10 +2695,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, filp = lock_stp->st_vfs_file; status = nfserr_grace; - if (nfs4_in_grace() && !lock->lk_reclaim) + if (locks_in_grace() && !lock->lk_reclaim) goto out; status = nfserr_no_grace; - if (!nfs4_in_grace() && lock->lk_reclaim) + if (!locks_in_grace() && lock->lk_reclaim) goto out; locks_init_lock(&file_lock); @@ -2779,7 +2781,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, int error; __be32 status; - if (nfs4_in_grace()) + if (locks_in_grace()) return nfserr_grace; if (check_lock_length(lockt->lt_offset, lockt->lt_length)) @@ -3192,9 +3194,9 @@ __nfs4_state_start(void) unsigned long grace_time; boot_time = get_seconds(); - grace_time = get_nfs_grace_period(); + grace_time = get_nfs4_grace_period(); lease_time = user_lease_time; - in_grace = 1; + locks_start_grace(&nfsd4_manager); printk(KERN_INFO "NFSD: starting %ld-second grace period\n", grace_time/HZ); laundry_wq = create_singlethread_workqueue("nfsd4"); @@ -3213,12 +3215,6 @@ nfs4_state_start(void) return; } -int -nfs4_in_grace(void) -{ - return in_grace; -} - time_t nfs4_lease_time(void) { -- cgit v1.1 From 26a414092353590ceaa5955bcb53f863d6ea7549 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Oct 2008 17:15:30 -0400 Subject: NLM: Remove "proto" argument from lockd_up() Clean up: Now that lockd_up() starts listeners for both transports, the "proto" argument is no longer needed. Signed-off-by: Chuck Lever Cc: Neil Brown Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 5 ++--- fs/nfsd/nfssvc.c | 19 +++++++------------ 2 files changed, 9 insertions(+), 15 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index c53e65f..862dff5 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -614,10 +614,9 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) return -EINVAL; err = nfsd_create_serv(); if (!err) { - int proto = 0; - err = svc_addsock(nfsd_serv, fd, buf, &proto); + err = svc_addsock(nfsd_serv, fd, buf, NULL); if (err >= 0) { - err = lockd_up(proto); + err = lockd_up(); if (err < 0) svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf); } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 7f3d76a7..59eeb46 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -244,25 +244,20 @@ static int nfsd_init_socks(int port) if (!list_empty(&nfsd_serv->sv_permsocks)) return 0; - error = lockd_up(IPPROTO_UDP); - if (error >= 0) { - error = svc_create_xprt(nfsd_serv, "udp", port, + error = svc_create_xprt(nfsd_serv, "udp", port, SVC_SOCK_DEFAULTS); - if (error < 0) - lockd_down(); - } if (error < 0) return error; - error = lockd_up(IPPROTO_TCP); - if (error >= 0) { - error = svc_create_xprt(nfsd_serv, "tcp", port, + error = svc_create_xprt(nfsd_serv, "tcp", port, SVC_SOCK_DEFAULTS); - if (error < 0) - lockd_down(); - } if (error < 0) return error; + + error = lockd_up(); + if (error < 0) + return error; + return 0; } -- cgit v1.1 From 2937391385807b3da9cd7a39345259caf550b032 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Oct 2008 17:15:38 -0400 Subject: NLM: Remove unused argument from svc_addsock() function Clean up: The svc_addsock() function no longer uses its "proto" argument, so remove it. Signed-off-by: Chuck Lever Cc: Neil Brown Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 862dff5..97543df 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -614,7 +614,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) return -EINVAL; err = nfsd_create_serv(); if (!err) { - err = svc_addsock(nfsd_serv, fd, buf, NULL); + err = svc_addsock(nfsd_serv, fd, buf); if (err >= 0) { err = lockd_up(); if (err < 0) -- cgit v1.1