summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2016-05-02 14:43:03 -0400
committerAnna Schumaker <Anna.Schumaker@Netapp.com>2016-05-17 15:48:04 -0400
commitb2dde94bfa374b9e851756ab8191209f1a604e0a (patch)
tree431a60c0c877faa819b9c6892fc5c49f492f23e4
parent0b043b9fb5dabcb6f187136cc685b26a7f8bcdb1 (diff)
downloadop-kernel-dev-b2dde94bfa374b9e851756ab8191209f1a604e0a.zip
op-kernel-dev-b2dde94bfa374b9e851756ab8191209f1a604e0a.tar.gz
xprtrdma: Faster server reboot recovery
In a cluster failover scenario, it is desirable for the client to attempt to reconnect quickly, as an alternate NFS server is already waiting to take over for the down server. The client can't see that a server IP address has moved to a new server until the existing connection is gone. For fabrics and devices where it is meaningful, set a definite upper bound on the amount of time before it is determined that a connection is no longer valid. This allows the RPC client to detect connection loss in a timely matter, then perform a fresh resolution of the server GUID in case it has changed (cluster failover). Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Tested-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
-rw-r--r--net/sunrpc/xprtrdma/verbs.c12
1 files changed, 11 insertions, 1 deletions
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index b7a5bc1..be66f65 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -554,6 +554,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.recv_cq = recvcq;
/* Initialize cma parameters */
+ memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma));
/* RPC/RDMA does not use private data */
ep->rep_remote_cma.private_data = NULL;
@@ -567,7 +568,16 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_remote_cma.responder_resources =
ia->ri_device->attrs.max_qp_rd_atom;
- ep->rep_remote_cma.retry_count = 7;
+ /* Limit transport retries so client can detect server
+ * GID changes quickly. RPC layer handles re-establishing
+ * transport connection and retransmission.
+ */
+ ep->rep_remote_cma.retry_count = 6;
+
+ /* RPC-over-RDMA handles its own flow control. In addition,
+ * make all RNR NAKs visible so we know that RPC-over-RDMA
+ * flow control is working correctly (no NAKs should be seen).
+ */
ep->rep_remote_cma.flow_control = 0;
ep->rep_remote_cma.rnr_retry_count = 0;
OpenPOWER on IntegriCloud