diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-18 11:22:04 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-18 11:22:04 -0800 |
commit | 4dd3c2e5a4225e3df85afc6033e62ce8b09f0ed2 (patch) | |
tree | 3d1dac5206550994b161eaab8ac73828f410228a | |
parent | 07c455ee222f3ad219c2835d05a175a326a138fb (diff) | |
parent | 22700f3c6df55387cec2ee27c533a7b23c76dc51 (diff) | |
download | op-kernel-dev-4dd3c2e5a4225e3df85afc6033e62ce8b09f0ed2.zip op-kernel-dev-4dd3c2e5a4225e3df85afc6033e62ce8b09f0ed2.tar.gz |
Merge tag 'nfsd-4.15' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields:
"Lots of good bugfixes, including:
- fix a number of races in the NFSv4+ state code
- fix some shutdown crashes in multiple-network-namespace cases
- relax our 4.1 session limits; if you've an artificially low limit
to the number of 4.1 clients that can mount simultaneously, try
upgrading"
* tag 'nfsd-4.15' of git://linux-nfs.org/~bfields/linux: (22 commits)
SUNRPC: Improve ordering of transport processing
nfsd: deal with revoked delegations appropriately
svcrdma: Enqueue after setting XPT_CLOSE in completion handlers
nfsd: use nfs->ns.inum as net ID
rpc: remove some BUG()s
svcrdma: Preserve CB send buffer across retransmits
nfds: avoid gettimeofday for nfssvc_boot time
fs, nfsd: convert nfs4_file.fi_ref from atomic_t to refcount_t
fs, nfsd: convert nfs4_cntl_odstate.co_odcount from atomic_t to refcount_t
fs, nfsd: convert nfs4_stid.sc_count from atomic_t to refcount_t
lockd: double unregister of inetaddr notifiers
nfsd4: catch some false session retries
nfsd4: fix cached replies to solo SEQUENCE compounds
sunrcp: make function _svc_create_xprt static
SUNRPC: Fix tracepoint storage issues with svc_recv and svc_rqst_status
nfsd: use ARRAY_SIZE
nfsd: give out fewer session slots as limit approaches
nfsd: increase DRC cache limit
nfsd: remove unnecessary nofilehandle checks
nfs_common: convert int to bool
...
-rw-r--r-- | fs/lockd/svc.c | 20 | ||||
-rw-r--r-- | fs/nfs_common/grace.c | 24 | ||||
-rw-r--r-- | fs/nfsd/fault_inject.c | 5 | ||||
-rw-r--r-- | fs/nfsd/netns.h | 2 | ||||
-rw-r--r-- | fs/nfsd/nfs3xdr.c | 10 | ||||
-rw-r--r-- | fs/nfsd/nfs4layouts.c | 4 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 19 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 127 | ||||
-rw-r--r-- | fs/nfsd/nfssvc.c | 4 | ||||
-rw-r--r-- | fs/nfsd/state.h | 11 | ||||
-rw-r--r-- | fs/nfsd/xdr4.h | 13 | ||||
-rw-r--r-- | include/linux/fs.h | 4 | ||||
-rw-r--r-- | include/linux/sunrpc/svc.h | 1 | ||||
-rw-r--r-- | include/trace/events/sunrpc.h | 17 | ||||
-rw-r--r-- | net/sunrpc/auth_gss/svcauth_gss.c | 14 | ||||
-rw-r--r-- | net/sunrpc/svc_xprt.c | 106 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 6 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_transport.c | 11 |
18 files changed, 225 insertions, 173 deletions
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index b837fb7..a8e3777 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -369,6 +369,7 @@ static int lockd_start_svc(struct svc_serv *serv) printk(KERN_WARNING "lockd_up: svc_rqst allocation failed, error=%d\n", error); + lockd_unregister_notifiers(); goto out_rqst; } @@ -459,13 +460,16 @@ int lockd_up(struct net *net) } error = lockd_up_net(serv, net); - if (error < 0) - goto err_net; + if (error < 0) { + lockd_unregister_notifiers(); + goto err_put; + } error = lockd_start_svc(serv); - if (error < 0) - goto err_start; - + if (error < 0) { + lockd_down_net(serv, net); + goto err_put; + } nlmsvc_users++; /* * Note: svc_serv structures have an initial use count of 1, @@ -476,12 +480,6 @@ err_put: err_create: mutex_unlock(&nlmsvc_mutex); return error; - -err_start: - lockd_down_net(serv, net); -err_net: - lockd_unregister_notifiers(); - goto err_put; } EXPORT_SYMBOL_GPL(lockd_up); diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index 420d3a0..897b299 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -55,14 +55,7 @@ locks_end_grace(struct lock_manager *lm) } EXPORT_SYMBOL_GPL(locks_end_grace); -/** - * locks_in_grace - * - * Lock managers call this function to determine when it is OK for them - * to answer ordinary lock requests, and when they should accept only - * lock reclaims. - */ -int +static bool __state_in_grace(struct net *net, bool open) { struct list_head *grace_list = net_generic(net, grace_net_id); @@ -78,15 +71,22 @@ __state_in_grace(struct net *net, bool open) return false; } -int locks_in_grace(struct net *net) +/** + * locks_in_grace + * + * Lock managers call this function to determine when it is OK for them + * to answer ordinary lock requests, and when they should accept only + * lock reclaims. + */ +bool locks_in_grace(struct net *net) { - return __state_in_grace(net, 0); + return __state_in_grace(net, false); } EXPORT_SYMBOL_GPL(locks_in_grace); -int opens_in_grace(struct net *net) +bool opens_in_grace(struct net *net) { - return __state_in_grace(net, 1); + return __state_in_grace(net, true); } EXPORT_SYMBOL_GPL(opens_in_grace); diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index 6dfede6d..8483125 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -12,6 +12,7 @@ #include <linux/nsproxy.h> #include <linux/sunrpc/addr.h> #include <linux/uaccess.h> +#include <linux/kernel.h> #include "state.h" #include "netns.h" @@ -126,8 +127,6 @@ static struct nfsd_fault_inject_op inject_ops[] = { }, }; -#define NUM_INJECT_OPS (sizeof(inject_ops)/sizeof(struct nfsd_fault_inject_op)) - int nfsd_fault_inject_init(void) { unsigned int i; @@ -138,7 +137,7 @@ int nfsd_fault_inject_init(void) if (!debug_dir) goto fail; - for (i = 0; i < NUM_INJECT_OPS; i++) { + for (i = 0; i < ARRAY_SIZE(inject_ops); i++) { op = &inject_ops[i]; if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd)) goto fail; diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 3714231..1c91391 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -107,7 +107,7 @@ struct nfsd_net { bool lockd_up; /* Time of server startup */ - struct timeval nfssvc_boot; + struct timespec64 nfssvc_boot; /* * Max number of connections this nfsd container will allow. Defaults diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index f38acd9..2758480 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -748,8 +748,9 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p) if (resp->status == 0) { *p++ = htonl(resp->count); *p++ = htonl(resp->committed); - *p++ = htonl(nn->nfssvc_boot.tv_sec); - *p++ = htonl(nn->nfssvc_boot.tv_usec); + /* unique identifier, y2038 overflow can be ignored */ + *p++ = htonl((u32)nn->nfssvc_boot.tv_sec); + *p++ = htonl(nn->nfssvc_boot.tv_nsec); } return xdr_ressize_check(rqstp, p); } @@ -1119,8 +1120,9 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p) p = encode_wcc_data(rqstp, p, &resp->fh); /* Write verifier */ if (resp->status == 0) { - *p++ = htonl(nn->nfssvc_boot.tv_sec); - *p++ = htonl(nn->nfssvc_boot.tv_usec); + /* unique identifier, y2038 overflow can be ignored */ + *p++ = htonl((u32)nn->nfssvc_boot.tv_sec); + *p++ = htonl(nn->nfssvc_boot.tv_nsec); } return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index ea45d95..7d88836 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -336,7 +336,7 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) trace_layout_recall(&ls->ls_stid.sc_stateid); - atomic_inc(&ls->ls_stid.sc_count); + refcount_inc(&ls->ls_stid.sc_count); nfsd4_run_cb(&ls->ls_recall); out_unlock: @@ -441,7 +441,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls) goto done; } - atomic_inc(&ls->ls_stid.sc_count); + refcount_inc(&ls->ls_stid.sc_count); list_add_tail(&new->lo_perstate, &ls->ls_layouts); new = NULL; done: diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 8487486..008ea0b 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -485,9 +485,6 @@ static __be32 nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { - if (!cstate->current_fh.fh_dentry) - return nfserr_nofilehandle; - u->getfh = &cstate->current_fh; return nfs_ok; } @@ -535,9 +532,6 @@ static __be32 nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { - if (!cstate->current_fh.fh_dentry) - return nfserr_nofilehandle; - fh_dup2(&cstate->save_fh, &cstate->current_fh); if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) { memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t)); @@ -570,10 +564,11 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) /* * This is opaque to client, so no need to byte-swap. Use - * __force to keep sparse happy + * __force to keep sparse happy. y2038 time_t overflow is + * irrelevant in this usage. */ verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec; - verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec; + verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec; memcpy(verifier->data, verf, sizeof(verifier->data)); } @@ -703,10 +698,8 @@ nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_link *link = &u->link; - __be32 status = nfserr_nofilehandle; + __be32 status; - if (!cstate->save_fh.fh_dentry) - return status; status = nfsd_link(rqstp, &cstate->current_fh, link->li_name, link->li_namelen, &cstate->save_fh); if (!status) @@ -850,10 +843,8 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_rename *rename = &u->rename; - __be32 status = nfserr_nofilehandle; + __be32 status; - if (!cstate->save_fh.fh_dentry) - return status; if (opens_in_grace(SVC_NET(rqstp)) && !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) return nfserr_grace; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0c04f81..b8281776 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -359,7 +359,7 @@ put_nfs4_file(struct nfs4_file *fi) { might_lock(&state_lock); - if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { + if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { hlist_del_rcu(&fi->fi_hash); spin_unlock(&state_lock); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); @@ -568,7 +568,7 @@ alloc_clnt_odstate(struct nfs4_client *clp) co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL); if (co) { co->co_client = clp; - atomic_set(&co->co_odcount, 1); + refcount_set(&co->co_odcount, 1); } return co; } @@ -586,7 +586,7 @@ static inline void get_clnt_odstate(struct nfs4_clnt_odstate *co) { if (co) - atomic_inc(&co->co_odcount); + refcount_inc(&co->co_odcount); } static void @@ -598,7 +598,7 @@ put_clnt_odstate(struct nfs4_clnt_odstate *co) return; fp = co->co_file; - if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) { + if (refcount_dec_and_lock(&co->co_odcount, &fp->fi_lock)) { list_del(&co->co_perfile); spin_unlock(&fp->fi_lock); @@ -656,7 +656,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla stid->sc_stateid.si_opaque.so_id = new_id; stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ - atomic_set(&stid->sc_count, 1); + refcount_set(&stid->sc_count, 1); spin_lock_init(&stid->sc_lock); /* @@ -813,7 +813,7 @@ nfs4_put_stid(struct nfs4_stid *s) might_lock(&clp->cl_lock); - if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) { + if (!refcount_dec_and_lock(&s->sc_count, &clp->cl_lock)) { wake_up_all(&close_wq); return; } @@ -913,7 +913,7 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) if (status) return status; ++fp->fi_delegees; - atomic_inc(&dp->dl_stid.sc_count); + refcount_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); list_add(&dp->dl_perclnt, &clp->cl_delegations); @@ -1214,7 +1214,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp, WARN_ON_ONCE(!list_empty(&stp->st_locks)); - if (!atomic_dec_and_test(&s->sc_count)) { + if (!refcount_dec_and_test(&s->sc_count)) { wake_up_all(&close_wq); return; } @@ -1439,8 +1439,10 @@ free_session_slots(struct nfsd4_session *ses) { int i; - for (i = 0; i < ses->se_fchannel.maxreqs; i++) + for (i = 0; i < ses->se_fchannel.maxreqs; i++) { + free_svc_cred(&ses->se_slots[i]->sl_cred); kfree(ses->se_slots[i]); + } } /* @@ -1472,6 +1474,11 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) spin_lock(&nfsd_drc_lock); avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, nfsd_drc_max_mem - nfsd_drc_mem_used); + /* + * Never use more than a third of the remaining memory, + * unless it's the only way to give this client a slot: + */ + avail = clamp_t(int, avail, slotsize, avail/3); num = min_t(int, num, avail / slotsize); nfsd_drc_mem_used += num * slotsize; spin_unlock(&nfsd_drc_lock); @@ -2072,7 +2079,7 @@ find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) s = find_stateid_locked(cl, t); if (s != NULL) { if (typemask & s->sc_type) - atomic_inc(&s->sc_count); + refcount_inc(&s->sc_count); else s = NULL; } @@ -2287,14 +2294,18 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) dprintk("--> %s slot %p\n", __func__, slot); + slot->sl_flags |= NFSD4_SLOT_INITIALIZED; slot->sl_opcnt = resp->opcnt; slot->sl_status = resp->cstate.status; + free_svc_cred(&slot->sl_cred); + copy_cred(&slot->sl_cred, &resp->rqstp->rq_cred); - slot->sl_flags |= NFSD4_SLOT_INITIALIZED; - if (nfsd4_not_cached(resp)) { - slot->sl_datalen = 0; + if (!nfsd4_cache_this(resp)) { + slot->sl_flags &= ~NFSD4_SLOT_CACHED; return; } + slot->sl_flags |= NFSD4_SLOT_CACHED; + base = resp->cstate.data_offset; slot->sl_datalen = buf->len - base; if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen)) @@ -2321,8 +2332,16 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, op = &args->ops[resp->opcnt - 1]; nfsd4_encode_operation(resp, op); - /* Return nfserr_retry_uncached_rep in next operation. */ - if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) { + if (slot->sl_flags & NFSD4_SLOT_CACHED) + return op->status; + if (args->opcnt == 1) { + /* + * The original operation wasn't a solo sequence--we + * always cache those--so this retry must not match the + * original: + */ + op->status = nfserr_seq_false_retry; + } else { op = &args->ops[resp->opcnt++]; op->status = nfserr_retry_uncached_rep; nfsd4_encode_operation(resp, op); @@ -2986,6 +3005,34 @@ static bool nfsd4_request_too_big(struct svc_rqst *rqstp, return xb->len > session->se_fchannel.maxreq_sz; } +static bool replay_matches_cache(struct svc_rqst *rqstp, + struct nfsd4_sequence *seq, struct nfsd4_slot *slot) +{ + struct nfsd4_compoundargs *argp = rqstp->rq_argp; + + if ((bool)(slot->sl_flags & NFSD4_SLOT_CACHETHIS) != + (bool)seq->cachethis) + return false; + /* + * If there's an error than the reply can have fewer ops than + * the call. But if we cached a reply with *more* ops than the + * call you're sending us now, then this new call is clearly not + * really a replay of the old one: + */ + if (slot->sl_opcnt < argp->opcnt) + return false; + /* This is the only check explicitly called by spec: */ + if (!same_creds(&rqstp->rq_cred, &slot->sl_cred)) + return false; + /* + * There may be more comparisons we could actually do, but the + * spec doesn't require us to catch every case where the calls + * don't match (that would require caching the call as well as + * the reply), so we don't bother. + */ + return true; +} + __be32 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -3045,6 +3092,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_seq_misordered; if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) goto out_put_session; + status = nfserr_seq_false_retry; + if (!replay_matches_cache(rqstp, seq, slot)) + goto out_put_session; cstate->slot = slot; cstate->session = session; cstate->clp = clp; @@ -3351,7 +3401,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval, { lockdep_assert_held(&state_lock); - atomic_set(&fp->fi_ref, 1); + refcount_set(&fp->fi_ref, 1); spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); @@ -3514,7 +3564,7 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) continue; if (local->st_stateowner == &oo->oo_owner) { ret = local; - atomic_inc(&ret->st_stid.sc_count); + refcount_inc(&ret->st_stid.sc_count); break; } } @@ -3573,7 +3623,7 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) goto out_unlock; open->op_stp = NULL; - atomic_inc(&stp->st_stid.sc_count); + refcount_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_locks); stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner); @@ -3621,7 +3671,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) * there should be no danger of the refcount going back up again at * this point. */ - wait_event(close_wq, atomic_read(&s->st_stid.sc_count) == 2); + wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2); release_all_access(s); if (s->st_stid.sc_file) { @@ -3647,7 +3697,7 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval) hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) { if (fh_match(&fp->fi_fhandle, fh)) { - if (atomic_inc_not_zero(&fp->fi_ref)) + if (refcount_inc_not_zero(&fp->fi_ref)) return fp; } } @@ -3783,7 +3833,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * lock) we know the server hasn't removed the lease yet, we know * it's safe to take a reference. */ - atomic_inc(&dp->dl_stid.sc_count); + refcount_inc(&dp->dl_stid.sc_count); nfsd4_run_cb(&dp->dl_recall); } @@ -3966,7 +4016,8 @@ static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, statei { struct nfs4_stid *ret; - ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID); + ret = find_stateid_by_type(cl, s, + NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID); if (!ret) return NULL; return delegstateid(ret); @@ -3989,6 +4040,12 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open, deleg = find_deleg_stateid(cl, &open->op_delegate_stateid); if (deleg == NULL) goto out; + if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) { + nfs4_put_stid(&deleg->dl_stid); + if (cl->cl_minorversion) + status = nfserr_deleg_revoked; + goto out; + } flags = share_access_to_flags(open->op_share_access); status = nfs4_check_delegmode(deleg, flags); if (status) { @@ -4858,6 +4915,16 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, struct nfs4_stid **s, struct nfsd_net *nn) { __be32 status; + bool return_revoked = false; + + /* + * only return revoked delegations if explicitly asked. + * otherwise we report revoked or bad_stateid status. + */ + if (typemask & NFS4_REVOKED_DELEG_STID) + return_revoked = true; + else if (typemask & NFS4_DELEG_STID) + typemask |= NFS4_REVOKED_DELEG_STID; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return nfserr_bad_stateid; @@ -4872,6 +4939,12 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, *s = find_stateid_by_type(cstate->clp, stateid, typemask); if (!*s) return nfserr_bad_stateid; + if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) { + nfs4_put_stid(*s); + if (cstate->minorversion) + return nfserr_deleg_revoked; + return nfserr_bad_stateid; + } return nfs_ok; } @@ -5071,7 +5144,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ret = nfserr_locks_held; break; case NFS4_LOCK_STID: - atomic_inc(&s->sc_count); + refcount_inc(&s->sc_count); spin_unlock(&cl->cl_lock); ret = nfsd4_free_lock_stateid(stateid, s); goto out; @@ -5578,7 +5651,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, lockdep_assert_held(&clp->cl_lock); - atomic_inc(&stp->st_stid.sc_count); + refcount_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_LOCK_STID; stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); get_nfs4_file(fp); @@ -5604,7 +5677,7 @@ find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { if (lst->st_stid.sc_file == fp) { - atomic_inc(&lst->st_stid.sc_count); + refcount_inc(&lst->st_stid.sc_count); return lst; } } @@ -7006,8 +7079,8 @@ nfs4_state_start_net(struct net *net) nn->nfsd4_manager.block_opens = true; locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); - printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", - nn->nfsd4_grace, net); + printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n", + nn->nfsd4_grace, net->ns.inum); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); return 0; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index e02bd27..33117d4 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -447,7 +447,7 @@ void nfsd_reset_versions(void) */ static void set_max_drc(void) { - #define NFSD_DRC_SIZE_SHIFT 10 + #define NFSD_DRC_SIZE_SHIFT 7 nfsd_drc_max_mem = (nr_free_buffer_pages() >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; nfsd_drc_mem_used = 0; @@ -517,7 +517,7 @@ int nfsd_create_serv(struct net *net) register_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } - do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ + ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */ return 0; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 005c911..f3772ea 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -36,6 +36,7 @@ #define _NFSD4_STATE_H #include <linux/idr.h> +#include <linux/refcount.h> #include <linux/sunrpc/svc_xprt.h> #include "nfsfh.h" @@ -83,7 +84,7 @@ struct nfsd4_callback_ops { * fields that are of general use to any stateid. */ struct nfs4_stid { - atomic_t sc_count; + refcount_t sc_count; #define NFS4_OPEN_STID 1 #define NFS4_LOCK_STID 2 #define NFS4_DELEG_STID 4 @@ -169,11 +170,13 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) struct nfsd4_slot { u32 sl_seqid; __be32 sl_status; + struct svc_cred sl_cred; u32 sl_datalen; u16 sl_opcnt; #define NFSD4_SLOT_INUSE (1 << 0) #define NFSD4_SLOT_CACHETHIS (1 << 1) #define NFSD4_SLOT_INITIALIZED (1 << 2) +#define NFSD4_SLOT_CACHED (1 << 3) u8 sl_flags; char sl_data[]; }; @@ -465,7 +468,7 @@ struct nfs4_clnt_odstate { struct nfs4_client *co_client; struct nfs4_file *co_file; struct list_head co_perfile; - atomic_t co_odcount; + refcount_t co_odcount; }; /* @@ -481,7 +484,7 @@ struct nfs4_clnt_odstate { * the global state_lock spinlock. */ struct nfs4_file { - atomic_t fi_ref; + refcount_t fi_ref; spinlock_t fi_lock; struct hlist_node fi_hash; /* hash on fi_fhandle */ struct list_head fi_stateids; @@ -634,7 +637,7 @@ struct nfs4_file *find_file(struct knfsd_fh *fh); void put_nfs4_file(struct nfs4_file *fi); static inline void get_nfs4_file(struct nfs4_file *fi) { - atomic_inc(&fi->fi_ref); + refcount_inc(&fi->fi_ref); } struct file *find_any_file(struct nfs4_file *f); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 1e4edbf..bc29511 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -649,9 +649,18 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE; } -static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) +/* + * The session reply cache only needs to cache replies that the client + * actually asked us to. But it's almost free for us to cache compounds + * consisting of only a SEQUENCE op, so we may as well cache those too. + * Also, the protocol doesn't give us a convenient response in the case + * of a replay of a solo SEQUENCE op that wasn't cached + * (RETRY_UNCACHED_REP can only be returned in the second op of a + * compound). + */ +static inline bool nfsd4_cache_this(struct nfsd4_compoundres *resp) { - return !(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS) + return (resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS) || nfsd4_is_solo_sequence(resp); } diff --git a/include/linux/fs.h b/include/linux/fs.h index e9379e2..2995a27 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -971,8 +971,8 @@ struct lock_manager { struct net; void locks_start_grace(struct net *, struct lock_manager *); void locks_end_grace(struct lock_manager *); -int locks_in_grace(struct net *); -int opens_in_grace(struct net *); +bool locks_in_grace(struct net *); +bool opens_in_grace(struct net *); /* that will die - we need it for nfs_lock_info */ #include <linux/nfs_fs_i.h> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3b9f0d1..786ae22 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -47,6 +47,7 @@ struct svc_pool { struct svc_pool_stats sp_stats; /* statistics on pool operation */ #define SP_TASK_PENDING (0) /* still work to do even if no * xprt is queued. */ +#define SP_CONGESTED (1) unsigned long sp_flags; } ____cacheline_aligned_in_smp; diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ecbdbfe..8c153f6 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -486,20 +486,22 @@ TRACE_EVENT(svc_recv, TP_ARGS(rqst, status), TP_STRUCT__entry( - __field(struct sockaddr *, addr) __field(u32, xid) __field(int, status) __field(unsigned long, flags) + __dynamic_array(unsigned char, addr, rqst->rq_addrlen) ), TP_fast_assign( - __entry->addr = (struct sockaddr *)&rqst->rq_addr; __entry->xid = status > 0 ? be32_to_cpu(rqst->rq_xid) : 0; __entry->status = status; __entry->flags = rqst->rq_flags; + memcpy(__get_dynamic_array(addr), + &rqst->rq_addr, rqst->rq_addrlen); ), - TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s", __entry->addr, + TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s", + (struct sockaddr *)__get_dynamic_array(addr), __entry->xid, __entry->status, show_rqstp_flags(__entry->flags)) ); @@ -544,22 +546,23 @@ DECLARE_EVENT_CLASS(svc_rqst_status, TP_ARGS(rqst, status), TP_STRUCT__entry( - __field(struct sockaddr *, addr) __field(u32, xid) - __field(int, dropme) __field(int, status) __field(unsigned long, flags) + __dynamic_array(unsigned char, addr, rqst->rq_addrlen) ), TP_fast_assign( - __entry->addr = (struct sockaddr *)&rqst->rq_addr; __entry->xid = be32_to_cpu(rqst->rq_xid); __entry->status = status; __entry->flags = rqst->rq_flags; + memcpy(__get_dynamic_array(addr), + &rqst->rq_addr, rqst->rq_addrlen); ), TP_printk("addr=%pIScp rq_xid=0x%08x status=%d flags=%s", - __entry->addr, __entry->xid, + (struct sockaddr *)__get_dynamic_array(addr), + __entry->xid, __entry->status, show_rqstp_flags(__entry->flags)) ); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 7b1ee5a..73165e9 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -855,11 +855,13 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g return stat; if (integ_len > buf->len) return stat; - if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) - BUG(); + if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) { + WARN_ON_ONCE(1); + return stat; + } /* copy out mic... */ if (read_u32_from_xdr_buf(buf, integ_len, &mic.len)) - BUG(); + return stat; if (mic.len > RPC_MAX_AUTH_SIZE) return stat; mic.data = kmalloc(mic.len, GFP_KERNEL); @@ -1611,8 +1613,10 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) BUG_ON(integ_len % 4); *p++ = htonl(integ_len); *p++ = htonl(gc->gc_seq); - if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) - BUG(); + if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) { + WARN_ON_ONCE(1); + goto out_err; + } if (resbuf->tail[0].iov_base == NULL) { if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE) goto out_err; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 71de77b..e8e0831 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -250,9 +250,9 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) svc_xprt_received(new); } -int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, - struct net *net, const int family, - const unsigned short port, int flags) +static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, + struct net *net, const int family, + const unsigned short port, int flags) { struct svc_xprt_class *xcl; @@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) struct svc_pool *pool; struct svc_rqst *rqstp = NULL; int cpu; - bool queued = false; if (!svc_xprt_has_something_to_do(xprt)) goto out; @@ -401,58 +400,25 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) atomic_long_inc(&pool->sp_stats.packets); -redo_search: + dprintk("svc: transport %p put into queue\n", xprt); + spin_lock_bh(&pool->sp_lock); + list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); + pool->sp_stats.sockets_queued++; + spin_unlock_bh(&pool->sp_lock); + /* find a thread for this xprt */ rcu_read_lock(); list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { - /* Do a lockless check first */ - if (test_bit(RQ_BUSY, &rqstp->rq_flags)) + if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) continue; - - /* - * Once the xprt has been queued, it can only be dequeued by - * the task that intends to service it. All we can do at that - * point is to try to wake this thread back up so that it can - * do so. - */ - if (!queued) { - spin_lock_bh(&rqstp->rq_lock); - if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) { - /* already busy, move on... */ - spin_unlock_bh(&rqstp->rq_lock); - continue; - } - - /* this one will do */ - rqstp->rq_xprt = xprt; - svc_xprt_get(xprt); - spin_unlock_bh(&rqstp->rq_lock); - } - rcu_read_unlock(); - atomic_long_inc(&pool->sp_stats.threads_woken); wake_up_process(rqstp->rq_task); - put_cpu(); - goto out; - } - rcu_read_unlock(); - - /* - * We didn't find an idle thread to use, so we need to queue the xprt. - * Do so and then search again. If we find one, we can't hook this one - * up to it directly but we can wake the thread up in the hopes that it - * will pick it up once it searches for a xprt to service. - */ - if (!queued) { - queued = true; - dprintk("svc: transport %p put into queue\n", xprt); - spin_lock_bh(&pool->sp_lock); - list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); - pool->sp_stats.sockets_queued++; - spin_unlock_bh(&pool->sp_lock); - goto redo_search; + goto out_unlock; } + set_bit(SP_CONGESTED, &pool->sp_flags); rqstp = NULL; +out_unlock: + rcu_read_unlock(); put_cpu(); out: trace_svc_xprt_do_enqueue(xprt, rqstp); @@ -721,38 +687,25 @@ rqst_should_sleep(struct svc_rqst *rqstp) static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) { - struct svc_xprt *xprt; struct svc_pool *pool = rqstp->rq_pool; long time_left = 0; /* rq_xprt should be clear on entry */ WARN_ON_ONCE(rqstp->rq_xprt); - /* Normally we will wait up to 5 seconds for any required - * cache information to be provided. - */ - rqstp->rq_chandle.thread_wait = 5*HZ; - - xprt = svc_xprt_dequeue(pool); - if (xprt) { - rqstp->rq_xprt = xprt; - - /* As there is a shortage of threads and this request - * had to be queued, don't allow the thread to wait so - * long for cache updates. - */ - rqstp->rq_chandle.thread_wait = 1*HZ; - clear_bit(SP_TASK_PENDING, &pool->sp_flags); - return xprt; - } + rqstp->rq_xprt = svc_xprt_dequeue(pool); + if (rqstp->rq_xprt) + goto out_found; /* * We have to be able to interrupt this wait * to bring down the daemons ... */ set_current_state(TASK_INTERRUPTIBLE); + smp_mb__before_atomic(); + clear_bit(SP_CONGESTED, &pool->sp_flags); clear_bit(RQ_BUSY, &rqstp->rq_flags); - smp_mb(); + smp_mb__after_atomic(); if (likely(rqst_should_sleep(rqstp))) time_left = schedule_timeout(timeout); @@ -761,13 +714,11 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) try_to_freeze(); - spin_lock_bh(&rqstp->rq_lock); set_bit(RQ_BUSY, &rqstp->rq_flags); - spin_unlock_bh(&rqstp->rq_lock); - - xprt = rqstp->rq_xprt; - if (xprt != NULL) - return xprt; + smp_mb__after_atomic(); + rqstp->rq_xprt = svc_xprt_dequeue(pool); + if (rqstp->rq_xprt) + goto out_found; if (!time_left) atomic_long_inc(&pool->sp_stats.threads_timedout); @@ -775,6 +726,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) if (signalled() || kthread_should_stop()) return ERR_PTR(-EINTR); return ERR_PTR(-EAGAIN); +out_found: + /* Normally we will wait up to 5 seconds for any required + * cache information to be provided. + */ + if (!test_bit(SP_CONGESTED, &pool->sp_flags)) + rqstp->rq_chandle.thread_wait = 5*HZ; + else + rqstp->rq_chandle.thread_wait = 1*HZ; + return rqstp->rq_xprt; } static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 992594b..af78935 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -133,6 +133,10 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, if (ret) goto out_err; + /* Bump page refcnt so Send completion doesn't release + * the rq_buffer before all retransmits are complete. + */ + get_page(virt_to_page(rqst->rq_buffer)); ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0); if (ret) goto out_unmap; @@ -165,7 +169,6 @@ xprt_rdma_bc_allocate(struct rpc_task *task) return -EINVAL; } - /* svc_rdma_sendto releases this page */ page = alloc_page(RPCRDMA_DEF_GFP); if (!page) return -ENOMEM; @@ -184,6 +187,7 @@ xprt_rdma_bc_free(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; + put_page(virt_to_page(rqst->rq_buffer)); kfree(rqst->rq_rbuffer); } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 5caf8e7..46ec069 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -290,6 +290,7 @@ static void qp_event_handler(struct ib_event *event, void *context) ib_event_msg(event->event), event->event, event->element.qp); set_bit(XPT_CLOSE, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); break; } } @@ -322,8 +323,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) goto out; - svc_xprt_enqueue(&xprt->sc_xprt); - goto out; + goto out_enqueue; flushed: if (wc->status != IB_WC_WR_FLUSH_ERR) @@ -333,6 +333,8 @@ flushed: set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_put_context(ctxt, 1); +out_enqueue: + svc_xprt_enqueue(&xprt->sc_xprt); out: svc_xprt_put(&xprt->sc_xprt); } @@ -358,6 +360,7 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) if (unlikely(wc->status != IB_WC_SUCCESS)) { set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + svc_xprt_enqueue(&xprt->sc_xprt); if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("svcrdma: Send: %s (%u/0x%x)\n", ib_wc_status_msg(wc->status), @@ -569,8 +572,10 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, case RDMA_CM_EVENT_DEVICE_REMOVAL: dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n", xprt, cma_id); - if (xprt) + if (xprt) { set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + svc_xprt_enqueue(&xprt->sc_xprt); + } break; default: |