diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-15 18:47:31 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-15 18:47:31 -0800 |
commit | 73e2e0c9b13c97df1c8565f6e158caac3c481b44 (patch) | |
tree | f3c561172579175a37a3b7908e8af05f4d6a70be /net/sunrpc | |
parent | ed3c5a0be38c180ab0899a0f52719e81f36b87a1 (diff) | |
parent | 2549f307b5997bf5dd91071428e8090d9faa8b1b (diff) | |
download | op-kernel-dev-73e2e0c9b13c97df1c8565f6e158caac3c481b44.zip op-kernel-dev-73e2e0c9b13c97df1c8565f6e158caac3c481b44.tar.gz |
Merge tag 'nfs-for-4.10-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Highlights include:
Stable bugfixes:
- Fix a pnfs deadlock between read resends and layoutreturn
- Don't invalidate the layout stateid while a layout return is
outstanding
- Don't schedule a layoutreturn if the layout stateid is marked as
invalid
- On a pNFS error, do not send LAYOUTGET until the LAYOUTRETURN is
complete
- SUNRPC: fix refcounting problems with auth_gss messages.
Features:
- Add client support for the NFSv4 umask attribute.
- NFSv4: Correct support for flock() stateids.
- Add a LAYOUTRETURN operation to CLOSE and DELEGRETURN when
return-on-close is specified
- Allow the pNFS/flexfiles layoutstat information to piggyback on
LAYOUTRETURN
- Optimise away redundant GETATTR calls when doing state recovery
and/or when not required by cache revalidation rules or
close-to-open cache consistency.
- Attribute cache improvements
- RPC/RDMA support for SG_GAP devices
Bugfixes:
- NFS: Fix performance regressions in readdir
- pNFS/flexfiles: Fix a deadlock on LAYOUTGET
- NFSv4: Add missing nfs_put_lock_context()
- NFSv4.1: Fix regression in callback retry handling
- Fix false positive NFSv4.0 trunking detection.
- pNFS/flexfiles: Only send layoutstats updates for mirrors that were
updated
- Various layout stateid related bugfixes
- RPC/RDMA bugfixes"
* tag 'nfs-for-4.10-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (82 commits)
SUNRPC: fix refcounting problems with auth_gss messages.
nfs: add support for the umask attribute
pNFS/flexfiles: Ensure we have enough buffer for layoutreturn
pNFS/flexfiles: Remove a redundant parameter in ff_layout_encode_ioerr()
pNFS/flexfiles: Fix a deadlock on LAYOUTGET
pNFS: Layoutreturn must free the layout after the layout-private data
pNFS/flexfiles: Fix ff_layout_add_ds_error_locked()
NFSv4: Add missing nfs_put_lock_context()
pNFS: Release NFS_LAYOUT_RETURN when invalidating the layout stateid
NFSv4.1: Don't schedule lease recovery in nfs4_schedule_session_recovery()
NFSv4.1: Handle NFS4ERR_BADSESSION/NFS4ERR_DEADSESSION replies to OP_SEQUENCE
NFS: Only look at the change attribute cache state in nfs_check_verifier
NFS: Fix incorrect size revalidation when holding a delegation
NFS: Fix incorrect mapping revalidation when holding a delegation
pNFS/flexfiles: Support sending layoutstats in layoutreturn
pNFS/flexfiles: Minor refactoring before adding iostats to layoutreturn
NFS: Fix up read of mirror stats
pNFS/flexfiles: Clean up layoutstats
pNFS/flexfiles: Refactor encoding of the layoutreturn payload
pNFS: Add a layoutreturn callback to performa layout-private setup
...
Diffstat (limited to 'net/sunrpc')
-rw-r--r-- | net/sunrpc/auth_gss/auth_gss.c | 7 | ||||
-rw-r--r-- | net/sunrpc/clnt.c | 2 | ||||
-rw-r--r-- | net/sunrpc/stats.c | 10 | ||||
-rw-r--r-- | net/sunrpc/xprt.c | 3 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/backchannel.c | 4 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/frwr_ops.c | 94 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 36 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 34 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 37 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 31 |
10 files changed, 138 insertions, 120 deletions
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 3dfd769..16cea00 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -541,9 +541,13 @@ gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred) return gss_new; gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { - int res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); + int res; + atomic_inc(&gss_msg->count); + res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); if (res) { gss_unhash_msg(gss_new); + atomic_dec(&gss_msg->count); + gss_release_msg(gss_new); gss_msg = ERR_PTR(res); } } else @@ -836,6 +840,7 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) warn_gssd(); gss_release_msg(gss_msg); } + gss_release_msg(gss_msg); } static void gss_pipe_dentry_destroy(struct dentry *dir, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 62a4827..1efbe48 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1926,6 +1926,8 @@ call_connect_status(struct rpc_task *task) case -EADDRINUSE: case -ENOBUFS: case -EPIPE: + xprt_conditional_disconnect(task->tk_rqstp->rq_xprt, + task->tk_rqstp->rq_connect_cookie); if (RPC_IS_SOFTCONN(task)) break; /* retry with existing socket, after a delay */ diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 2ecb994..caeb01a 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -157,15 +157,17 @@ void rpc_count_iostats_metrics(const struct rpc_task *task, spin_lock(&op_metrics->om_lock); op_metrics->om_ops++; - op_metrics->om_ntrans += req->rq_ntrans; + /* kernel API: om_ops must never become larger than om_ntrans */ + op_metrics->om_ntrans += max(req->rq_ntrans, 1); op_metrics->om_timeouts += task->tk_timeouts; op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent; op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd; - delta = ktime_sub(req->rq_xtime, task->tk_start); - op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta); - + if (ktime_to_ns(req->rq_xtime)) { + delta = ktime_sub(req->rq_xtime, task->tk_start); + op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta); + } op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt); delta = ktime_sub(now, task->tk_start); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 685e6d2..9a6be03 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -669,7 +669,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie) spin_lock_bh(&xprt->transport_lock); if (cookie != xprt->connect_cookie) goto out; - if (test_bit(XPRT_CLOSING, &xprt->state) || !xprt_connected(xprt)) + if (test_bit(XPRT_CLOSING, &xprt->state)) goto out; set_bit(XPRT_CLOSE_WAIT, &xprt->state); /* Try to schedule an autoclose RPC call */ @@ -772,6 +772,7 @@ void xprt_connect(struct rpc_task *task) if (!xprt_connected(xprt)) { task->tk_rqstp->rq_bytes_sent = 0; task->tk_timeout = task->tk_rqstp->rq_timeout; + task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie; rpc_sleep_on(&xprt->pending, task, xprt_connect_status); if (test_bit(XPRT_CLOSING, &xprt->state)) diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 2c472e1..24fedd4 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -55,7 +55,8 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, if (IS_ERR(rb)) goto out_fail; req->rl_sendbuf = rb; - xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, size); + xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, + min_t(size_t, size, PAGE_SIZE)); rpcrdma_set_xprtdata(rqst, req); return 0; @@ -191,6 +192,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) size_t maxmsg; maxmsg = min_t(unsigned int, cdata->inline_rsize, cdata->inline_wsize); + maxmsg = min_t(unsigned int, maxmsg, PAGE_SIZE); return maxmsg - RPCRDMA_HDRLEN_MIN; } diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 26b26be..47bed53 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -101,7 +101,7 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) struct rpcrdma_frmr *f = &r->frmr; int rc; - f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, depth); + f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); if (IS_ERR(f->fr_mr)) goto out_mr_err; @@ -157,7 +157,7 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) return rc; } - f->fr_mr = ib_alloc_mr(ia->ri_pd, IB_MR_TYPE_MEM_REG, + f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, ia->ri_max_frmr_depth); if (IS_ERR(f->fr_mr)) { pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", @@ -171,10 +171,6 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) } /* Reset of a single FRMR. Generate a fresh rkey by replacing the MR. - * - * There's no recovery if this fails. The FRMR is abandoned, but - * remains in rb_all. It will be cleaned up when the transport is - * destroyed. */ static void frwr_op_recover_mr(struct rpcrdma_mw *mw) @@ -210,11 +206,16 @@ static int frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_create_data_internal *cdata) { + struct ib_device_attr *attrs = &ia->ri_device->attrs; int depth, delta; + ia->ri_mrtype = IB_MR_TYPE_MEM_REG; + if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) + ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; + ia->ri_max_frmr_depth = min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, - ia->ri_device->attrs.max_fast_reg_page_list_len); + attrs->max_fast_reg_page_list_len); dprintk("RPC: %s: device's max FR page list len = %u\n", __func__, ia->ri_max_frmr_depth); @@ -241,8 +242,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, } ep->rep_attr.cap.max_send_wr *= depth; - if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) { - cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth; + if (ep->rep_attr.cap.max_send_wr > attrs->max_qp_wr) { + cdata->max_requests = attrs->max_qp_wr / depth; if (!cdata->max_requests) return -EINVAL; ep->rep_attr.cap.max_send_wr = cdata->max_requests * @@ -348,6 +349,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, int nsegs, bool writing, struct rpcrdma_mw **out) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; + bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; struct rpcrdma_mw *mw; struct rpcrdma_frmr *frmr; struct ib_mr *mr; @@ -383,8 +385,8 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ++seg; ++i; - - /* Check for holes */ + if (holes_ok) + continue; if ((i < nsegs && offset_in_page(seg->mr_offset)) || offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) break; @@ -421,7 +423,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; - DECR_CQCOUNT(&r_xprt->rx_ep); + rpcrdma_set_signaled(&r_xprt->rx_ep, ®_wr->wr); rc = ib_post_send(ia->ri_id->qp, ®_wr->wr, &bad_wr); if (rc) goto out_senderr; @@ -451,26 +453,6 @@ out_senderr: return -ENOTCONN; } -static struct ib_send_wr * -__frwr_prepare_linv_wr(struct rpcrdma_mw *mw) -{ - struct rpcrdma_frmr *f = &mw->frmr; - struct ib_send_wr *invalidate_wr; - - dprintk("RPC: %s: invalidating frmr %p\n", __func__, f); - - f->fr_state = FRMR_IS_INVALID; - invalidate_wr = &f->fr_invwr; - - memset(invalidate_wr, 0, sizeof(*invalidate_wr)); - f->fr_cqe.done = frwr_wc_localinv; - invalidate_wr->wr_cqe = &f->fr_cqe; - invalidate_wr->opcode = IB_WR_LOCAL_INV; - invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey; - - return invalidate_wr; -} - /* Invalidate all memory regions that were registered for "req". * * Sleeps until it is safe for the host CPU to access the @@ -481,12 +463,12 @@ __frwr_prepare_linv_wr(struct rpcrdma_mw *mw) static void frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) { - struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr; + struct ib_send_wr *first, **prev, *last, *bad_wr; struct rpcrdma_rep *rep = req->rl_reply; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_mw *mw, *tmp; struct rpcrdma_frmr *f; - int rc; + int count, rc; dprintk("RPC: %s: req %p\n", __func__, req); @@ -496,22 +478,29 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * a single ib_post_send() call. */ f = NULL; - invalidate_wrs = pos = prev = NULL; + count = 0; + prev = &first; list_for_each_entry(mw, &req->rl_registered, mw_list) { + mw->frmr.fr_state = FRMR_IS_INVALID; + if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) && - (mw->mw_handle == rep->rr_inv_rkey)) { - mw->frmr.fr_state = FRMR_IS_INVALID; + (mw->mw_handle == rep->rr_inv_rkey)) continue; - } - - pos = __frwr_prepare_linv_wr(mw); - if (!invalidate_wrs) - invalidate_wrs = pos; - else - prev->next = pos; - prev = pos; f = &mw->frmr; + dprintk("RPC: %s: invalidating frmr %p\n", + __func__, f); + + f->fr_cqe.done = frwr_wc_localinv; + last = &f->fr_invwr; + memset(last, 0, sizeof(*last)); + last->wr_cqe = &f->fr_cqe; + last->opcode = IB_WR_LOCAL_INV; + last->ex.invalidate_rkey = mw->mw_handle; + count++; + + *prev = last; + prev = &last->next; } if (!f) goto unmap; @@ -520,17 +509,22 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * last WR in the chain completes, all WRs in the chain * are complete. */ - f->fr_invwr.send_flags = IB_SEND_SIGNALED; + last->send_flags = IB_SEND_SIGNALED; f->fr_cqe.done = frwr_wc_localinv_wake; reinit_completion(&f->fr_linv_done); - INIT_CQCOUNT(&r_xprt->rx_ep); + + /* Initialize CQ count, since there is always a signaled + * WR being posted here. The new cqcount depends on how + * many SQEs are about to be consumed. + */ + rpcrdma_init_cqcount(&r_xprt->rx_ep, count); /* Transport disconnect drains the receive CQ before it * replaces the QP. The RPC reply handler won't call us * unless ri_id->qp is a valid pointer. */ r_xprt->rx_stats.local_inv_needed++; - rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr); + rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); if (rc) goto reset_mrs; @@ -541,7 +535,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) */ unmap: list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) { - dprintk("RPC: %s: unmapping frmr %p\n", + dprintk("RPC: %s: DMA unmapping frmr %p\n", __func__, &mw->frmr); list_del_init(&mw->mw_list); ib_dma_unmap_sg(ia->ri_device, @@ -559,7 +553,7 @@ reset_mrs: */ list_for_each_entry(mw, &req->rl_registered, mw_list) { f = &mw->frmr; - if (mw->frmr.fr_mr->rkey == bad_wr->ex.invalidate_rkey) { + if (mw->mw_handle == bad_wr->ex.invalidate_rkey) { __frwr_reset_mr(ia, mw); bad_wr = bad_wr->next; } diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index d987c2d..c52e0f2 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -786,7 +786,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, int wrchunk, __be32 **iptrp) ifdebug(FACILITY) { u64 off; xdr_decode_hyper((__be32 *)&seg->rs_offset, &off); - dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n", + dprintk("RPC: %s: chunk %d@0x%016llx:0x%08x\n", __func__, be32_to_cpu(seg->rs_length), (unsigned long long)off, @@ -906,28 +906,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) return fixup_copy_count; } -void -rpcrdma_connect_worker(struct work_struct *work) -{ - struct rpcrdma_ep *ep = - container_of(work, struct rpcrdma_ep, rep_connect_worker.work); - struct rpcrdma_xprt *r_xprt = - container_of(ep, struct rpcrdma_xprt, rx_ep); - struct rpc_xprt *xprt = &r_xprt->rx_xprt; - - spin_lock_bh(&xprt->transport_lock); - if (++xprt->connect_cookie == 0) /* maintain a reserved value */ - ++xprt->connect_cookie; - if (ep->rep_connected > 0) { - if (!xprt_test_and_set_connected(xprt)) - xprt_wake_pending_tasks(xprt, 0); - } else { - if (xprt_test_and_clear_connected(xprt)) - xprt_wake_pending_tasks(xprt, -ENOTCONN); - } - spin_unlock_bh(&xprt->transport_lock); -} - #if defined(CONFIG_SUNRPC_BACKCHANNEL) /* By convention, backchannel calls arrive via rdma_msg type * messages, and never populate the chunk lists. This makes @@ -959,18 +937,6 @@ rpcrdma_is_bcall(struct rpcrdma_msg *headerp) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ -/* - * This function is called when an async event is posted to - * the connection which changes the connection state. All it - * does at this point is mark the connection up/down, the rpc - * timers do the rest. - */ -void -rpcrdma_conn_func(struct rpcrdma_ep *ep) -{ - schedule_delayed_work(&ep->rep_connect_worker, 0); -} - /* Process received RPC/RDMA messages. * * Errors must result in the RPC task either being awakened, or diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ed5e285..534c178 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -219,6 +219,34 @@ xprt_rdma_free_addresses(struct rpc_xprt *xprt) } } +void +rpcrdma_conn_func(struct rpcrdma_ep *ep) +{ + schedule_delayed_work(&ep->rep_connect_worker, 0); +} + +void +rpcrdma_connect_worker(struct work_struct *work) +{ + struct rpcrdma_ep *ep = + container_of(work, struct rpcrdma_ep, rep_connect_worker.work); + struct rpcrdma_xprt *r_xprt = + container_of(ep, struct rpcrdma_xprt, rx_ep); + struct rpc_xprt *xprt = &r_xprt->rx_xprt; + + spin_lock_bh(&xprt->transport_lock); + if (++xprt->connect_cookie == 0) /* maintain a reserved value */ + ++xprt->connect_cookie; + if (ep->rep_connected > 0) { + if (!xprt_test_and_set_connected(xprt)) + xprt_wake_pending_tasks(xprt, 0); + } else { + if (xprt_test_and_clear_connected(xprt)) + xprt_wake_pending_tasks(xprt, -ENOTCONN); + } + spin_unlock_bh(&xprt->transport_lock); +} + static void xprt_rdma_connect_worker(struct work_struct *work) { @@ -621,7 +649,8 @@ xprt_rdma_free(struct rpc_task *task) dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); - ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task)); + if (unlikely(!list_empty(&req->rl_registered))) + ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task)); rpcrdma_unmap_sges(ia, req); rpcrdma_buffer_put(req); } @@ -657,7 +686,8 @@ xprt_rdma_send_request(struct rpc_task *task) int rc = 0; /* On retransmit, remove any previously registered chunks */ - r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); + if (unlikely(!list_empty(&req->rl_registered))) + r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); rc = rpcrdma_marshal_req(rqst); if (rc < 0) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index ec74289..11d0774 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -103,9 +103,9 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) { struct rpcrdma_ep *ep = context; - pr_err("RPC: %s: %s on device %s ep %p\n", - __func__, ib_event_msg(event->event), - event->device->name, context); + pr_err("rpcrdma: %s on device %s ep %p\n", + ib_event_msg(event->event), event->device->name, context); + if (ep->rep_connected == 1) { ep->rep_connected = -EIO; rpcrdma_conn_func(ep); @@ -223,8 +223,8 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, cdata->inline_rsize = rsize; if (wsize < cdata->inline_wsize) cdata->inline_wsize = wsize; - pr_info("rpcrdma: max send %u, max recv %u\n", - cdata->inline_wsize, cdata->inline_rsize); + dprintk("RPC: %s: max send %u, max recv %u\n", + __func__, cdata->inline_wsize, cdata->inline_rsize); rpcrdma_set_max_header_sizes(r_xprt); } @@ -331,6 +331,7 @@ static struct rdma_cm_id * rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia, struct sockaddr *addr) { + unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; struct rdma_cm_id *id; int rc; @@ -352,8 +353,12 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, __func__, rc); goto out; } - wait_for_completion_interruptible_timeout(&ia->ri_done, - msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); + rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); + if (rc < 0) { + dprintk("RPC: %s: wait() exited: %i\n", + __func__, rc); + goto out; + } /* FIXME: * Until xprtrdma supports DEVICE_REMOVAL, the provider must @@ -376,8 +381,12 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, __func__, rc); goto put; } - wait_for_completion_interruptible_timeout(&ia->ri_done, - msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); + rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); + if (rc < 0) { + dprintk("RPC: %s: wait() exited: %i\n", + __func__, rc); + goto put; + } rc = ia->ri_async_rc; if (rc) goto put; @@ -532,7 +541,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; /* always signal? */ - INIT_CQCOUNT(ep); + rpcrdma_init_cqcount(ep, 0); init_waitqueue_head(&ep->rep_connect_wait); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); @@ -1311,13 +1320,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, dprintk("RPC: %s: posting %d s/g entries\n", __func__, send_wr->num_sge); - if (DECR_CQCOUNT(ep) > 0) - send_wr->send_flags = 0; - else { /* Provider must take a send completion every now and then */ - INIT_CQCOUNT(ep); - send_wr->send_flags = IB_SEND_SIGNALED; - } - + rpcrdma_set_signaled(ep, send_wr); rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); if (rc) goto out_postsend_err; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 6e1bba3..e35efd4 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -75,6 +75,7 @@ struct rpcrdma_ia { unsigned int ri_max_inline_write; unsigned int ri_max_inline_read; bool ri_reminv_expected; + enum ib_mr_type ri_mrtype; struct ib_qp_attr ri_qp_attr; struct ib_qp_init_attr ri_qp_init_attr; }; @@ -95,8 +96,24 @@ struct rpcrdma_ep { struct delayed_work rep_connect_worker; }; -#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) -#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) +static inline void +rpcrdma_init_cqcount(struct rpcrdma_ep *ep, int count) +{ + atomic_set(&ep->rep_cqcount, ep->rep_cqinit - count); +} + +/* To update send queue accounting, provider must take a + * send completion every now and then. + */ +static inline void +rpcrdma_set_signaled(struct rpcrdma_ep *ep, struct ib_send_wr *send_wr) +{ + send_wr->send_flags = 0; + if (unlikely(atomic_sub_return(1, &ep->rep_cqcount) <= 0)) { + rpcrdma_init_cqcount(ep, 0); + send_wr->send_flags = IB_SEND_SIGNALED; + } +} /* Pre-allocate extra Work Requests for handling backward receives * and sends. This is a fixed value because the Work Queues are @@ -473,6 +490,7 @@ int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, struct rpcrdma_create_data_internal *); void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); +void rpcrdma_conn_func(struct rpcrdma_ep *ep); void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, @@ -532,13 +550,6 @@ rpcrdma_data_dir(bool writing) } /* - * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c - */ -void rpcrdma_connect_worker(struct work_struct *); -void rpcrdma_conn_func(struct rpcrdma_ep *); -void rpcrdma_reply_handler(struct work_struct *); - -/* * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c */ @@ -555,12 +566,14 @@ bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *, void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *); int rpcrdma_marshal_req(struct rpc_rqst *); void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); +void rpcrdma_reply_handler(struct work_struct *work); /* RPC/RDMA module init - xprtrdma/transport.c */ extern unsigned int xprt_rdma_max_inline_read; void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); void xprt_rdma_free_addresses(struct rpc_xprt *xprt); +void rpcrdma_connect_worker(struct work_struct *work); void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq); int xprt_rdma_init(void); void xprt_rdma_cleanup(void); |