From 6f18dc893981e4daab29221d6a9771f3ce2dd8c5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 12 Nov 2015 09:44:33 -0500 Subject: svcrdma: Do not send XDR roundup bytes for a write chunk Minor optimization: when dealing with write chunk XDR roundup, do not post a Write WR for the zero bytes in the pad. Simply update the write segment in the RPC-over-RDMA header to reflect the extra pad bytes. The Reply chunk is also a write chunk, but the server does not use send_write_chunks() to send the Reply chunk. That's OK in this case: the server Upper Layer typically marshals the Reply chunk contents in a single contiguous buffer, without a separate tail for the XDR pad. The comments and the variable naming refer to "chunks" but what is really meant is "segments." The existing code sends only one xdr_write_chunk per RPC reply. The fix assumes this as well. When the XDR pad in the first write chunk is reached, the assumption is the Write list is complete and send_write_chunks() returns. That will remain a valid assumption until the server Upper Layer can support multiple bulk payload results per RPC. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 969a1ab..bad5eaa 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -342,6 +342,13 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, arg_ch->rs_handle, arg_ch->rs_offset, write_len); + + /* Do not send XDR pad bytes */ + if (chunk_no && write_len < 4) { + chunk_no++; + break; + } + chunk_off = 0; while (write_len) { ret = send_write(xprt, rqstp, -- cgit v1.1 From 6496500cf15f29ac8afc565e2e4b6f92a1324860 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 20 Nov 2015 15:45:35 -0500 Subject: svcrpc: move some initialization to common code Minor cleanup, no change in behavior. Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth.c | 2 ++ net/sunrpc/svcauth_unix.c | 8 -------- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 79c0f34..69841db 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -55,6 +55,7 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) spin_unlock(&authtab_lock); rqstp->rq_auth_slack = 0; + init_svc_cred(&rqstp->rq_cred); rqstp->rq_authop = aops; return aops->accept(rqstp, authp); @@ -63,6 +64,7 @@ EXPORT_SYMBOL_GPL(svc_authenticate); int svc_set_client(struct svc_rqst *rqstp) { + rqstp->rq_client = NULL; return rqstp->rq_authop->set_client(rqstp); } EXPORT_SYMBOL_GPL(svc_set_client); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 621ca7b..dfacdc9 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -728,10 +728,6 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) struct kvec *resv = &rqstp->rq_res.head[0]; struct svc_cred *cred = &rqstp->rq_cred; - cred->cr_group_info = NULL; - cred->cr_principal = NULL; - rqstp->rq_client = NULL; - if (argv->iov_len < 3*4) return SVC_GARBAGE; @@ -794,10 +790,6 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) u32 slen, i; int len = argv->iov_len; - cred->cr_group_info = NULL; - cred->cr_principal = NULL; - rqstp->rq_client = NULL; - if ((len -= 3*4) < 0) return SVC_GARBAGE; -- cgit v1.1 From 414ca017a54d26c3a58ed1504884e51448d22ae1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 20 Nov 2015 10:48:02 -0500 Subject: nfsd4: fix gss-proxy 4.1 mounts for some AD principals The principal name on a gss cred is used to setup the NFSv4.0 callback, which has to have a client principal name to authenticate to. That code wants the name to be in the form servicetype@hostname. rpc.svcgssd passes down such names (and passes down no principal name at all in the case the principal isn't a service principal). gss-proxy always passes down the principal name, and passes it down in the form servicetype/hostname@REALM. So we've been munging the name gss-proxy passes down into the format the NFSv4.0 callback code expects, or throwing away the name if we can't. Since the introduction of the MACH_CRED enforcement in NFSv4.1, we've also been using the principal name to verify that certain operations are done as the same principal as was used on the original EXCHANGE_ID call. For that application, the original name passed down by gss-proxy is also useful. Lack of that name in some cases was causing some kerberized NFSv4.1 mount failures in an Active Directory environment. This fix only works in the gss-proxy case. The fix for legacy rpc.svcgssd would be more involved, and rpc.svcgssd already has other problems in the AD case. Reported-and-tested-by: James Ralston Acked-by: Simo Sorce Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_rpc_upcall.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 59eeed4..f0c6a8c 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -326,6 +326,9 @@ int gssp_accept_sec_context_upcall(struct net *net, if (data->found_creds && client_name.data != NULL) { char *c; + data->creds.cr_raw_principal = kstrndup(client_name.data, + client_name.len, GFP_KERNEL); + data->creds.cr_principal = kstrndup(client_name.data, client_name.len, GFP_KERNEL); if (data->creds.cr_principal) { -- cgit v1.1 From c3d4879e01bec484f50a78c108341f039d470e96 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 11 Dec 2015 16:45:58 -0500 Subject: sunrpc: Add a function to close temporary transports immediately Add a function svc_age_temp_xprts_now() to close temporary transports whose xpt_local matches the address passed in server_addr immediately instead of waiting for them to be closed by the timer function. The function is intended to be used by notifier_blocks that will be added to nfsd and lockd that will run when an ip address is deleted. This will eliminate the ACK storms and client hangs that occur in HA-NFS configurations where nfsd & lockd is left running on the cluster nodes all the time and the NFS 'service' is migrated back and forth within a short timeframe. Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index a6cbb21..7422f28 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -10,11 +10,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -938,6 +940,49 @@ static void svc_age_temp_xprts(unsigned long closure) mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); } +/* Close temporary transports whose xpt_local matches server_addr immediately + * instead of waiting for them to be picked up by the timer. + * + * This is meant to be called from a notifier_block that runs when an ip + * address is deleted. + */ +void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) +{ + struct svc_xprt *xprt; + struct svc_sock *svsk; + struct socket *sock; + struct list_head *le, *next; + LIST_HEAD(to_be_closed); + struct linger no_linger = { + .l_onoff = 1, + .l_linger = 0, + }; + + spin_lock_bh(&serv->sv_lock); + list_for_each_safe(le, next, &serv->sv_tempsocks) { + xprt = list_entry(le, struct svc_xprt, xpt_list); + if (rpc_cmp_addr(server_addr, (struct sockaddr *) + &xprt->xpt_local)) { + dprintk("svc_age_temp_xprts_now: found %p\n", xprt); + list_move(le, &to_be_closed); + } + } + spin_unlock_bh(&serv->sv_lock); + + while (!list_empty(&to_be_closed)) { + le = to_be_closed.next; + list_del_init(le); + xprt = list_entry(le, struct svc_xprt, xpt_list); + dprintk("svc_age_temp_xprts_now: closing %p\n", xprt); + svsk = container_of(xprt, struct svc_sock, sk_xprt); + sock = svsk->sk_sock; + kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, + (char *)&no_linger, sizeof(no_linger)); + svc_close_xprt(xprt); + } +} +EXPORT_SYMBOL_GPL(svc_age_temp_xprts_now); + static void call_xpt_users(struct svc_xprt *xprt) { struct svc_xpt_user *u; -- cgit v1.1 From 3daa020f9bf803c03c6b6c895921e2b09fcd494a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 23 Dec 2015 15:08:08 -0500 Subject: Revert "svcrdma: Do not send XDR roundup bytes for a write chunk" This reverts commit 6f18dc893981e4daab29221d6a9771f3ce2dd8c5. Just as one example, it appears this code could do the wrong thing in the case of a two-byte NFS READ that crosses a page boundary. Chuck says: "In that case, nfsd would pass down an xdr_buf that has one byte in a page, one byte in another page, and a two-byte XDR pad. The logic introduced by this optimization would be fooled, and neither the second byte nor the XDR pad would be written to the client." Cc: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index bad5eaa..969a1ab 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -342,13 +342,6 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, arg_ch->rs_handle, arg_ch->rs_offset, write_len); - - /* Do not send XDR pad bytes */ - if (chunk_no && write_len < 4) { - chunk_no++; - break; - } - chunk_off = 0; while (write_len) { ret = send_write(xprt, rqstp, -- cgit v1.1