From 3601c4a91ebbbf1cf69f66a2abeffc6c64a4fe64 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Jun 2014 13:42:19 -0400 Subject: SUNRPC: Ensure that we handle ENOBUFS errors correctly. Currently, an ENOBUFS error will result in a fatal error for the RPC call. Normally, we will just want to wait and then retry. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 4 ++++ net/sunrpc/xprtsock.c | 5 +++++ 2 files changed, 9 insertions(+) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 2e6ab10..575e63f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1746,6 +1746,7 @@ call_bind_status(struct rpc_task *task) case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: + case -ENOBUFS: case -EPIPE: dprintk("RPC: %5u remote rpcbind unreachable: %d\n", task->tk_pid, task->tk_status); @@ -1812,6 +1813,7 @@ call_connect_status(struct rpc_task *task) case -ECONNABORTED: case -ENETUNREACH: case -EHOSTUNREACH: + case -ENOBUFS: if (RPC_IS_SOFTCONN(task)) break; /* retry with existing socket, after a delay */ @@ -1918,6 +1920,7 @@ call_transmit_status(struct rpc_task *task) case -ECONNRESET: case -ECONNABORTED: case -ENOTCONN: + case -ENOBUFS: case -EPIPE: rpc_task_force_reencode(task); } @@ -2034,6 +2037,7 @@ call_status(struct rpc_task *task) case -ECONNRESET: case -ECONNABORTED: rpc_force_rebind(clnt); + case -ENOBUFS: rpc_delay(task, 3*HZ); case -EPIPE: case -ENOTCONN: diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index be8bbd5..8f8589f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -594,6 +594,7 @@ static int xs_local_send_request(struct rpc_task *task) } switch (status) { + case -ENOBUFS: case -EAGAIN: status = xs_nospace(task); break; @@ -661,6 +662,7 @@ static int xs_udp_send_request(struct rpc_task *task) dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); case -ENETUNREACH: + case -ENOBUFS: case -EPIPE: case -ECONNREFUSED: /* When the server has died, an ICMP port unreachable message @@ -758,6 +760,7 @@ static int xs_tcp_send_request(struct rpc_task *task) status = -ENOTCONN; /* Should we call xs_close() here? */ break; + case -ENOBUFS: case -EAGAIN: status = xs_nospace(task); break; @@ -1946,6 +1949,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport) dprintk("RPC: xprt %p connected to %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); xprt_set_connected(xprt); + case -ENOBUFS: break; case -ENOENT: dprintk("RPC: xprt %p: socket %s does not exist\n", @@ -2281,6 +2285,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: + case -ENOBUFS: /* retry with existing socket, after a delay */ goto out; } -- cgit v1.1 From 2fc193cf924ea6eb74f6a0cf73b94b2e62938ae5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Jul 2014 00:02:57 -0400 Subject: SUNRPC: Handle EPIPE in xprt_connect_status The callback handler xs_error_report() can end up propagating an EPIPE error by means of the call to xprt_wake_pending_tasks(). Ensure that xprt_connect_status() does not automatically convert this into an EIO error. Reported-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 1 + net/sunrpc/xprt.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 575e63f..488ddee 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1814,6 +1814,7 @@ call_connect_status(struct rpc_task *task) case -ENETUNREACH: case -EHOSTUNREACH: case -ENOBUFS: + case -EPIPE: if (RPC_IS_SOFTCONN(task)) break; /* retry with existing socket, after a delay */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index c3b2b33..c5b2916 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -744,6 +744,7 @@ static void xprt_connect_status(struct rpc_task *task) case -ECONNABORTED: case -ENETUNREACH: case -EHOSTUNREACH: + case -EPIPE: case -EAGAIN: dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid); break; -- cgit v1.1 From 2004c726b9d9a9670b7f837190be9c8dfa7a0e9d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 21 Jun 2014 20:52:15 -0400 Subject: auth_gss: fetch the acceptor name out of the downcall If rpc.gssd sends us an acceptor name string trailing the context token, stash it as part of the context. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index b6e440b..e34af68 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -262,9 +262,22 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct p = ERR_PTR(ret); goto err; } - dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u\n", - __func__, ctx->gc_expiry, now, timeout); - return q; + + /* is there any trailing data? */ + if (q == end) { + p = q; + goto done; + } + + /* pull in acceptor name (if there is one) */ + p = simple_get_netobj(q, end, &ctx->gc_acceptor); + if (IS_ERR(p)) + goto err; +done: + dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u acceptor %.*s\n", + __func__, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len, + ctx->gc_acceptor.data); + return p; err: dprintk("RPC: %s returns error %ld\n", __func__, -PTR_ERR(p)); return p; @@ -1225,6 +1238,7 @@ gss_do_free_ctx(struct gss_cl_ctx *ctx) gss_delete_sec_context(&ctx->gc_gss_ctx); kfree(ctx->gc_wire_ctx.data); + kfree(ctx->gc_acceptor.data); kfree(ctx); } -- cgit v1.1 From a0337d1ddb5a4bd609e3ff0955551cb240340340 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 21 Jun 2014 20:52:16 -0400 Subject: sunrpc: add a new "stringify_acceptor" rpc_credop ...and add an new rpc_auth function to call it when it exists. This is only applicable for AUTH_GSS mechanisms, so we only specify this for those sorts of credentials. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- net/sunrpc/auth.c | 9 ++++++ net/sunrpc/auth_gss/auth_gss.c | 62 ++++++++++++++++++++++++++++-------------- 2 files changed, 51 insertions(+), 20 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index f773667..1481eff 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -363,6 +363,15 @@ rpcauth_cred_key_to_expire(struct rpc_cred *cred) } EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire); +char * +rpcauth_stringify_acceptor(struct rpc_cred *cred) +{ + if (!cred->cr_ops->crstringify_acceptor) + return NULL; + return cred->cr_ops->crstringify_acceptor(cred); +} +EXPORT_SYMBOL_GPL(rpcauth_stringify_acceptor); + /* * Destroy a list of credentials */ diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index e34af68..7385431 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1346,6 +1346,26 @@ gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred) return err; } +static char * +gss_stringify_acceptor(struct rpc_cred *cred) +{ + char *string; + struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); + struct xdr_netobj *acceptor = &gss_cred->gc_ctx->gc_acceptor; + + /* no point if there's no string */ + if (!acceptor->len) + return NULL; + + string = kmalloc(acceptor->len + 1, GFP_KERNEL); + if (!string) + return string; + + memcpy(string, acceptor->data, acceptor->len); + string[acceptor->len] = '\0'; + return string; +} + /* * Returns -EACCES if GSS context is NULL or will expire within the * timeout (miliseconds) @@ -1923,29 +1943,31 @@ static const struct rpc_authops authgss_ops = { }; static const struct rpc_credops gss_credops = { - .cr_name = "AUTH_GSS", - .crdestroy = gss_destroy_cred, - .cr_init = gss_cred_init, - .crbind = rpcauth_generic_bind_cred, - .crmatch = gss_match, - .crmarshal = gss_marshal, - .crrefresh = gss_refresh, - .crvalidate = gss_validate, - .crwrap_req = gss_wrap_req, - .crunwrap_resp = gss_unwrap_resp, - .crkey_timeout = gss_key_timeout, + .cr_name = "AUTH_GSS", + .crdestroy = gss_destroy_cred, + .cr_init = gss_cred_init, + .crbind = rpcauth_generic_bind_cred, + .crmatch = gss_match, + .crmarshal = gss_marshal, + .crrefresh = gss_refresh, + .crvalidate = gss_validate, + .crwrap_req = gss_wrap_req, + .crunwrap_resp = gss_unwrap_resp, + .crkey_timeout = gss_key_timeout, + .crstringify_acceptor = gss_stringify_acceptor, }; static const struct rpc_credops gss_nullops = { - .cr_name = "AUTH_GSS", - .crdestroy = gss_destroy_nullcred, - .crbind = rpcauth_generic_bind_cred, - .crmatch = gss_match, - .crmarshal = gss_marshal, - .crrefresh = gss_refresh_null, - .crvalidate = gss_validate, - .crwrap_req = gss_wrap_req, - .crunwrap_resp = gss_unwrap_resp, + .cr_name = "AUTH_GSS", + .crdestroy = gss_destroy_nullcred, + .crbind = rpcauth_generic_bind_cred, + .crmatch = gss_match, + .crmarshal = gss_marshal, + .crrefresh = gss_refresh_null, + .crvalidate = gss_validate, + .crwrap_req = gss_wrap_req, + .crunwrap_resp = gss_unwrap_resp, + .crstringify_acceptor = gss_stringify_acceptor, }; static const struct rpc_pipe_ops gss_upcall_ops_v0 = { -- cgit v1.1 From adcda652c92c4881f74cca7f9ca3388bb498d997 Mon Sep 17 00:00:00 2001 From: Himangi Saraogi Date: Tue, 10 Jun 2014 20:19:17 +0530 Subject: rpc_pipe: Drop memory allocation cast Drop cast on the result of kmalloc and similar functions. The semantic patch that makes this change is as follows: // @@ type T; @@ - (T *) (\(kmalloc\|kzalloc\|kcalloc\|kmem_cache_alloc\|kmem_cache_zalloc\| kmem_cache_alloc_node\|kmalloc_node\|kzalloc_node\)(...)) // Signed-off-by: Himangi Saraogi Acked-by: Julia Lawall Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index b185548..2d12b76 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -195,7 +195,7 @@ static struct inode * rpc_alloc_inode(struct super_block *sb) { struct rpc_inode *rpci; - rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL); + rpci = kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL); if (!rpci) return NULL; return &rpci->vfs_inode; -- cgit v1.1 From 00cfaa943ec30abbc7109b0b918e0b6a0eef07dc Mon Sep 17 00:00:00 2001 From: Daniel Walter Date: Sat, 21 Jun 2014 13:06:38 +0100 Subject: replace strict_strto calls Replace obsolete strict_strto calls with appropriate kstrto calls Signed-off-by: Daniel Walter Signed-off-by: Trond Myklebust --- net/sunrpc/addr.c | 16 ++++++---------- net/sunrpc/auth.c | 2 +- net/sunrpc/xprtsock.c | 4 ++-- 3 files changed, 9 insertions(+), 13 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index a622ad6..2e0a6f9 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -176,7 +176,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf, len = (buf + buflen) - delim - 1; p = kstrndup(delim + 1, len, GFP_KERNEL); if (p) { - unsigned long scope_id = 0; + u32 scope_id = 0; struct net_device *dev; dev = dev_get_by_name(net, p); @@ -184,7 +184,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf, scope_id = dev->ifindex; dev_put(dev); } else { - if (strict_strtoul(p, 10, &scope_id) == 0) { + if (kstrtou32(p, 10, &scope_id) == 0) { kfree(p); return 0; } @@ -304,7 +304,7 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags) * @sap: buffer into which to plant socket address * @salen: size of buffer * - * @uaddr does not have to be '\0'-terminated, but strict_strtoul() and + * @uaddr does not have to be '\0'-terminated, but kstrtou8() and * rpc_pton() require proper string termination to be successful. * * Returns the size of the socket address if successful; otherwise @@ -315,7 +315,7 @@ size_t rpc_uaddr2sockaddr(struct net *net, const char *uaddr, const size_t salen) { char *c, buf[RPCBIND_MAXUADDRLEN + sizeof('\0')]; - unsigned long portlo, porthi; + u8 portlo, porthi; unsigned short port; if (uaddr_len > RPCBIND_MAXUADDRLEN) @@ -327,18 +327,14 @@ size_t rpc_uaddr2sockaddr(struct net *net, const char *uaddr, c = strrchr(buf, '.'); if (unlikely(c == NULL)) return 0; - if (unlikely(strict_strtoul(c + 1, 10, &portlo) != 0)) - return 0; - if (unlikely(portlo > 255)) + if (unlikely(kstrtou8(c + 1, 10, &portlo) != 0)) return 0; *c = '\0'; c = strrchr(buf, '.'); if (unlikely(c == NULL)) return 0; - if (unlikely(strict_strtoul(c + 1, 10, &porthi) != 0)) - return 0; - if (unlikely(porthi > 255)) + if (unlikely(kstrtou8(c + 1, 10, &porthi) != 0)) return 0; port = (unsigned short)((porthi << 8) | portlo); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 1481eff..2bc7bb8 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -48,7 +48,7 @@ static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp) if (!val) goto out_inval; - ret = strict_strtoul(val, 0, &num); + ret = kstrtoul(val, 0, &num); if (ret == -EINVAL) goto out_inval; nbits = fls(num); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8f8589f..43cd89e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -3059,12 +3059,12 @@ static int param_set_uint_minmax(const char *val, const struct kernel_param *kp, unsigned int min, unsigned int max) { - unsigned long num; + unsigned int num; int ret; if (!val) return -EINVAL; - ret = strict_strtoul(val, 0, &num); + ret = kstrtouint(val, 0, &num); if (ret == -EINVAL || num < min || num > max) return -EINVAL; *((unsigned int *)kp->arg) = num; -- cgit v1.1 From bf858ab0adcb36fc60a1d941749a7dd0a9bf3d8a Mon Sep 17 00:00:00 2001 From: Yan Burman Date: Thu, 19 Jun 2014 16:06:30 +0300 Subject: xprtrdma: Fix DMA-API-DEBUG warning by checking dma_map result Fix the following warning when DMA-API debug is enabled by checking ib_dma_map_single result: [ 1455.345548] ------------[ cut here ]------------ [ 1455.346863] WARNING: CPU: 3 PID: 3929 at /home/yanb/kernel/net-next/lib/dma-debug.c:1140 check_unmap+0x4e5/0x990() [ 1455.349350] mlx4_core 0000:00:07.0: DMA-API: device driver failed to check map error[device address=0x000000007c9f2090] [size=2656 bytes] [mapped as single] [ 1455.349350] Modules linked in: xprtrdma netconsole configfs nfsv3 nfs_acl ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm autofs4 auth_rpcgss oid_registry nfsv4 nfs fscache lockd sunrpc dm_mirror dm_region_hash dm_log microcode pcspkr mlx4_ib ib_sa ib_mad ib_core ib_addr mlx4_en ipv6 ptp pps_core vxlan mlx4_core virtio_balloon cirrus ttm drm_kms_helper drm sysimgblt sysfillrect syscopyarea i2c_piix4 i2c_core button ext3 jbd virtio_blk virtio_net virtio_pci virtio_ring virtio uhci_hcd ata_generic ata_piix libata [ 1455.349350] CPU: 3 PID: 3929 Comm: mount.nfs Not tainted 3.15.0-rc1-dbg+ #13 [ 1455.349350] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007 [ 1455.349350] 0000000000000474 ffff880069dcf628 ffffffff8151c341 ffffffff817b69d8 [ 1455.349350] ffff880069dcf678 ffff880069dcf668 ffffffff8105b5fc 0000000069dcf658 [ 1455.349350] ffff880069dcf778 ffff88007b0c9f00 ffffffff8255ec40 0000000000000a60 [ 1455.349350] Call Trace: [ 1455.349350] [] dump_stack+0x52/0x81 [ 1455.349350] [] warn_slowpath_common+0x8c/0xc0 [ 1455.349350] [] warn_slowpath_fmt+0x46/0x50 [ 1455.349350] [] check_unmap+0x4e5/0x990 [ 1455.349350] [] ? _raw_spin_unlock_irq+0x30/0x60 [ 1455.349350] [] debug_dma_unmap_page+0x5a/0x60 [ 1455.349350] [] rpcrdma_deregister_internal+0xb3/0xd0 [xprtrdma] [ 1455.349350] [] rpcrdma_buffer_destroy+0x69/0x170 [xprtrdma] [ 1455.349350] [] xprt_rdma_destroy+0x3f/0xb0 [xprtrdma] [ 1455.349350] [] xprt_destroy+0x6f/0x80 [sunrpc] [ 1455.349350] [] xprt_put+0x15/0x20 [sunrpc] [ 1455.349350] [] rpc_free_client+0x8a/0xe0 [sunrpc] [ 1455.349350] [] rpc_release_client+0x68/0xa0 [sunrpc] [ 1455.349350] [] rpc_shutdown_client+0xb0/0xc0 [sunrpc] [ 1455.349350] [] ? rpc_ping+0x5d/0x70 [sunrpc] [ 1455.349350] [] rpc_create_xprt+0xbb/0xd0 [sunrpc] [ 1455.349350] [] rpc_create+0xb3/0x160 [sunrpc] [ 1455.349350] [] ? __probe_kernel_read+0x69/0xb0 [ 1455.349350] [] nfs_create_rpc_client+0xdc/0x100 [nfs] [ 1455.349350] [] nfs_init_client+0x3a/0x90 [nfs] [ 1455.349350] [] nfs_get_client+0x478/0x5b0 [nfs] [ 1455.349350] [] ? nfs_get_client+0x100/0x5b0 [nfs] [ 1455.349350] [] ? kmem_cache_alloc_trace+0x24d/0x260 [ 1455.349350] [] nfs_create_server+0xf3/0x4c0 [nfs] [ 1455.349350] [] ? nfs_request_mount+0xf0/0x1a0 [nfs] [ 1455.349350] [] nfs3_create_server+0x13/0x30 [nfsv3] [ 1455.349350] [] nfs_try_mount+0x1f3/0x230 [nfs] [ 1455.349350] [] ? get_parent_ip+0x11/0x50 [ 1455.349350] [] ? __this_cpu_preempt_check+0x13/0x20 [ 1455.349350] [] ? try_module_get+0x6b/0x190 [ 1455.349350] [] nfs_fs_mount+0x187/0x9d0 [nfs] [ 1455.349350] [] ? nfs_clone_super+0x140/0x140 [nfs] [ 1455.349350] [] ? nfs_auth_info_match+0x40/0x40 [nfs] [ 1455.349350] [] mount_fs+0x20/0xe0 [ 1455.349350] [] vfs_kern_mount+0x76/0x160 [ 1455.349350] [] do_mount+0x428/0xae0 [ 1455.349350] [] SyS_mount+0x90/0xe0 [ 1455.349350] [] system_call_fastpath+0x16/0x1b [ 1455.349350] ---[ end trace f1f31572972e211d ]--- Signed-off-by: Yan Burman Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 13dbd1c..176dafc 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1388,6 +1388,9 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, */ iov->addr = ib_dma_map_single(ia->ri_id->device, va, len, DMA_BIDIRECTIONAL); + if (ib_dma_mapping_error(ia->ri_id->device, iov->addr)) + return -ENOMEM; + iov->length = len; if (ia->ri_have_dma_lkey) { -- cgit v1.1 From 5fc83f470d8ada25927701512cf94a53dab6c4c8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:23:17 -0400 Subject: xprtrdma: Fix panic in rpcrdma_register_frmr_external() seg1->mr_nsegs is not yet initialized when it is used to unmap segments during an error exit. Use the same unmapping logic for all error exits. "if (frmr_wr.wr.fast_reg.length < len) {" used to be a BUG_ON check. The broken code will never be executed under normal operation. Fixes: c977dea (xprtrdma: Remove BUG_ON() call sites) Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 176dafc..f337bda 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1548,9 +1548,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; if (frmr_wr.wr.fast_reg.length < len) { - while (seg1->mr_nsegs--) - rpcrdma_unmap_one(ia, seg++); - return -EIO; + rc = -EIO; + goto out_err; } /* Bump the key */ @@ -1568,8 +1567,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, if (rc) { dprintk("RPC: %s: failed ib_post_send for register," " status %i\n", __func__, rc); - while (i--) - rpcrdma_unmap_one(ia, --seg); + goto out_err; } else { seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; seg1->mr_base = seg1->mr_dma + pageoff; @@ -1577,6 +1575,10 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, seg1->mr_len = len; } *nsegs = i; + return 0; +out_err: + while (i--) + rpcrdma_unmap_one(ia, --seg); return rc; } -- cgit v1.1 From 73806c8832b3438ef0439603dab1f3cfc61cb6cd Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:23:25 -0400 Subject: xprtrdma: Protect ia->ri_id when unmapping/invalidating MRs Ensure ia->ri_id remains valid while invoking dma_unmap_page() or posting LOCAL_INV during a transport reconnect. Otherwise, ia->ri_id->device or ia->ri_id->qp is NULL, which triggers a panic. BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=259 Fixes: ec62f40 'xprtrdma: Ensure ia->ri_id->qp is not NULL when reconnecting' Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 23 +++++++++++++++++------ net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index f337bda..aa08de89 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -613,6 +613,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) /* Else will do memory reg/dereg for each chunk */ ia->ri_memreg_strategy = memreg; + rwlock_init(&ia->ri_qplock); return 0; out2: rdma_destroy_id(ia->ri_id); @@ -859,7 +860,7 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) int rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { - struct rdma_cm_id *id; + struct rdma_cm_id *id, *old; int rc = 0; int retry_count = 0; @@ -905,9 +906,14 @@ retry: rc = -ENETUNREACH; goto out; } - rdma_destroy_qp(ia->ri_id); - rdma_destroy_id(ia->ri_id); + + write_lock(&ia->ri_qplock); + old = ia->ri_id; ia->ri_id = id; + write_unlock(&ia->ri_qplock); + + rdma_destroy_qp(old); + rdma_destroy_id(old); } else { dprintk("RPC: %s: connecting...\n", __func__); rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); @@ -1590,9 +1596,6 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, struct ib_send_wr invalidate_wr, *bad_wr; int rc; - while (seg1->mr_nsegs--) - rpcrdma_unmap_one(ia, seg++); - memset(&invalidate_wr, 0, sizeof invalidate_wr); invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; invalidate_wr.opcode = IB_WR_LOCAL_INV; @@ -1600,7 +1603,11 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); + read_lock(&ia->ri_qplock); + while (seg1->mr_nsegs--) + rpcrdma_unmap_one(ia, seg++); rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); + read_unlock(&ia->ri_qplock); if (rc) dprintk("RPC: %s: failed ib_post_send for invalidate," " status %i\n", __func__, rc); @@ -1661,8 +1668,10 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l); rc = ib_unmap_fmr(&l); + read_lock(&ia->ri_qplock); while (seg1->mr_nsegs--) rpcrdma_unmap_one(ia, seg++); + read_unlock(&ia->ri_qplock); if (rc) dprintk("RPC: %s: failed ib_unmap_fmr," " status %i\n", __func__, rc); @@ -1718,7 +1727,9 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, #if RPCRDMA_PERSISTENT_REGISTRATION case RPCRDMA_ALLPHYSICAL: + read_lock(&ia->ri_qplock); rpcrdma_unmap_one(ia, seg); + read_unlock(&ia->ri_qplock); break; #endif diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 89e7cd4..97ca516 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -59,6 +59,7 @@ * Interface Adapter -- one per transport instance */ struct rpcrdma_ia { + rwlock_t ri_qplock; struct rdma_cm_id *ri_id; struct ib_pd *ri_pd; struct ib_mr *ri_bind_mem; -- cgit v1.1 From 43e95988178ed70a878a5be6be9ad248342dbf7d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:23:34 -0400 Subject: xprtrdma: Limit data payload size for ALLPHYSICAL When the client uses physical memory registration, each page in the payload gets its own array entry in the RPC/RDMA header's chunk list. Therefore, don't advertise a maximum payload size that would require more array entries than can fit in the RPC buffer where RPC/RDMA headers are built. BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=248 Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 4 +++- net/sunrpc/xprtrdma/verbs.c | 41 +++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 3 files changed, 45 insertions(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 66f91f0..4185102 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -296,7 +296,6 @@ xprt_setup_rdma(struct xprt_create *args) xprt->resvport = 0; /* privileged port not needed */ xprt->tsh_size = 0; /* RPC-RDMA handles framing */ - xprt->max_payload = RPCRDMA_MAX_DATA_SEGS * PAGE_SIZE; xprt->ops = &xprt_rdma_procs; /* @@ -382,6 +381,9 @@ xprt_setup_rdma(struct xprt_create *args) new_ep->rep_xprt = xprt; xprt_rdma_format_addresses(xprt); + xprt->max_payload = rpcrdma_max_payload(new_xprt); + dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", + __func__, xprt->max_payload); if (!try_module_get(THIS_MODULE)) goto out4; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index aa08de89..13ff874 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1825,3 +1825,44 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, rc); return rc; } + +/* Physical mapping means one Read/Write list entry per-page. + * All list entries must fit within an inline buffer + * + * NB: The server must return a Write list for NFS READ, + * which has the same constraint. Factor in the inline + * rsize as well. + */ +static size_t +rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; + unsigned int inline_size, pages; + + inline_size = min_t(unsigned int, + cdata->inline_wsize, cdata->inline_rsize); + inline_size -= RPCRDMA_HDRLEN_MIN; + pages = inline_size / sizeof(struct rpcrdma_segment); + return pages << PAGE_SHIFT; +} + +static size_t +rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt) +{ + return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT; +} + +size_t +rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt) +{ + size_t result; + + switch (r_xprt->rx_ia.ri_memreg_strategy) { + case RPCRDMA_ALLPHYSICAL: + result = rpcrdma_physical_max_payload(r_xprt); + break; + default: + result = rpcrdma_mr_max_payload(r_xprt); + } + return result; +} diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 97ca516..f3d86b2 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -348,6 +348,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *); * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c */ int rpcrdma_marshal_req(struct rpc_rqst *); +size_t rpcrdma_max_payload(struct rpcrdma_xprt *); /* Temporary NFS request map cache. Created in svc_rdma.c */ extern struct kmem_cache *svc_rdma_map_cachep; -- cgit v1.1 From 6ab59945f292a5c6cbc4a6c2011f1a732a116af2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:23:43 -0400 Subject: xprtrdma: Update rkeys after transport reconnect Various reports of: rpcrdma_qp_async_error_upcall: QP error 3 on device mlx4_0 ep ffff8800bfd3e848 Ensure that rkeys in already-marshalled RPC/RDMA headers are refreshed after the QP has been replaced by a reconnect. BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=249 Suggested-by: Selvin Xavier Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 75 +++++++++++++++++++++-------------------- net/sunrpc/xprtrdma/transport.c | 11 +++--- net/sunrpc/xprtrdma/xprt_rdma.h | 10 ++++++ 3 files changed, 54 insertions(+), 42 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 693966d..54422f7 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -53,14 +53,6 @@ # define RPCDBG_FACILITY RPCDBG_TRANS #endif -enum rpcrdma_chunktype { - rpcrdma_noch = 0, - rpcrdma_readch, - rpcrdma_areadch, - rpcrdma_writech, - rpcrdma_replych -}; - #ifdef RPC_DEBUG static const char transfertypes[][12] = { "pure inline", /* no chunks */ @@ -286,6 +278,28 @@ out: } /* + * Marshal chunks. This routine returns the header length + * consumed by marshaling. + * + * Returns positive RPC/RDMA header size, or negative errno. + */ + +ssize_t +rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result) +{ + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)req->rl_base; + + if (req->rl_rtype != rpcrdma_noch) + result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, + headerp, req->rl_rtype); + else if (req->rl_wtype != rpcrdma_noch) + result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf, + headerp, req->rl_wtype); + return result; +} + +/* * Copy write data inline. * This function is used for "small" requests. Data which is passed * to RPC via iovecs (or page list) is copied directly into the @@ -377,7 +391,6 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) char *base; size_t rpclen, padlen; ssize_t hdrlen; - enum rpcrdma_chunktype rtype, wtype; struct rpcrdma_msg *headerp; /* @@ -415,13 +428,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) * into pages; otherwise use reply chunks. */ if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) - wtype = rpcrdma_noch; + req->rl_wtype = rpcrdma_noch; else if (rqst->rq_rcv_buf.page_len == 0) - wtype = rpcrdma_replych; + req->rl_wtype = rpcrdma_replych; else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) - wtype = rpcrdma_writech; + req->rl_wtype = rpcrdma_writech; else - wtype = rpcrdma_replych; + req->rl_wtype = rpcrdma_replych; /* * Chunks needed for arguments? @@ -438,16 +451,16 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) * TBD check NFSv4 setacl */ if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) - rtype = rpcrdma_noch; + req->rl_rtype = rpcrdma_noch; else if (rqst->rq_snd_buf.page_len == 0) - rtype = rpcrdma_areadch; + req->rl_rtype = rpcrdma_areadch; else - rtype = rpcrdma_readch; + req->rl_rtype = rpcrdma_readch; /* The following simplification is not true forever */ - if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) - wtype = rpcrdma_noch; - if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) { + if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych) + req->rl_wtype = rpcrdma_noch; + if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) { dprintk("RPC: %s: cannot marshal multiple chunk lists\n", __func__); return -EIO; @@ -461,7 +474,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) * When padding is in use and applies to the transfer, insert * it and change the message type. */ - if (rtype == rpcrdma_noch) { + if (req->rl_rtype == rpcrdma_noch) { padlen = rpcrdma_inline_pullup(rqst, RPCRDMA_INLINE_PAD_VALUE(rqst)); @@ -476,7 +489,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ - if (wtype != rpcrdma_noch) { + if (req->rl_wtype != rpcrdma_noch) { dprintk("RPC: %s: invalid chunk list\n", __func__); return -EIO; @@ -497,30 +510,18 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) * on receive. Therefore, we request a reply chunk * for non-writes wherever feasible and efficient. */ - if (wtype == rpcrdma_noch) - wtype = rpcrdma_replych; + if (req->rl_wtype == rpcrdma_noch) + req->rl_wtype = rpcrdma_replych; } } - /* - * Marshal chunks. This routine will return the header length - * consumed by marshaling. - */ - if (rtype != rpcrdma_noch) { - hdrlen = rpcrdma_create_chunks(rqst, - &rqst->rq_snd_buf, headerp, rtype); - wtype = rtype; /* simplify dprintk */ - - } else if (wtype != rpcrdma_noch) { - hdrlen = rpcrdma_create_chunks(rqst, - &rqst->rq_rcv_buf, headerp, wtype); - } + hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen); if (hdrlen < 0) return hdrlen; dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" " headerp 0x%p base 0x%p lkey 0x%x\n", - __func__, transfertypes[wtype], hdrlen, rpclen, padlen, + __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen, headerp, base, req->rl_iov.lkey); /* diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 4185102..f6d280b 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -597,13 +597,14 @@ xprt_rdma_send_request(struct rpc_task *task) struct rpc_xprt *xprt = rqst->rq_xprt; struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - int rc; + int rc = 0; - if (req->rl_niovs == 0) { + if (req->rl_niovs == 0) rc = rpcrdma_marshal_req(rqst); - if (rc < 0) - goto failed_marshal; - } + else if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) + rc = rpcrdma_marshal_chunks(rqst, 0); + if (rc < 0) + goto failed_marshal; if (req->rl_reply == NULL) /* e.g. reconnection */ rpcrdma_recv_buffer_get(req); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index f3d86b2..c270e59 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -99,6 +99,14 @@ struct rpcrdma_ep { #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) +enum rpcrdma_chunktype { + rpcrdma_noch = 0, + rpcrdma_readch, + rpcrdma_areadch, + rpcrdma_writech, + rpcrdma_replych +}; + /* * struct rpcrdma_rep -- this structure encapsulates state required to recv * and complete a reply, asychronously. It needs several pieces of @@ -192,6 +200,7 @@ struct rpcrdma_req { unsigned int rl_niovs; /* 0, 2 or 4 */ unsigned int rl_nchunks; /* non-zero if chunks */ unsigned int rl_connect_cookie; /* retry detection */ + enum rpcrdma_chunktype rl_rtype, rl_wtype; struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ @@ -347,6 +356,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *); /* * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c */ +ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t); int rpcrdma_marshal_req(struct rpc_rqst *); size_t rpcrdma_max_payload(struct rpcrdma_xprt *); -- cgit v1.1 From a7bc211ac926172ad20463afcf00ae7b9ebcd950 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:23:52 -0400 Subject: xprtrdma: On disconnect, don't ignore pending CQEs xprtrdma is currently throwing away queued completions during a reconnect. RPC replies posted just before connection loss, or successful completions that change the state of an FRMR, can be missed. Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 13ff874..e49cdc9 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -310,6 +310,13 @@ rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context) rpcrdma_recvcq_poll(cq, ep); } +static void +rpcrdma_flush_cqs(struct rpcrdma_ep *ep) +{ + rpcrdma_recvcq_upcall(ep->rep_attr.recv_cq, ep); + rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep); +} + #ifdef RPC_DEBUG static const char * const conn[] = { "address resolved", @@ -872,9 +879,7 @@ retry: if (rc && rc != -ENOTCONN) dprintk("RPC: %s: rpcrdma_ep_disconnect" " status %i\n", __func__, rc); - - rpcrdma_clean_cq(ep->rep_attr.recv_cq); - rpcrdma_clean_cq(ep->rep_attr.send_cq); + rpcrdma_flush_cqs(ep); xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); id = rpcrdma_create_id(xprt, ia, @@ -985,8 +990,7 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { int rc; - rpcrdma_clean_cq(ep->rep_attr.recv_cq); - rpcrdma_clean_cq(ep->rep_attr.send_cq); + rpcrdma_flush_cqs(ep); rc = rdma_disconnect(ia->ri_id); if (!rc) { /* returns without wait if not connected */ -- cgit v1.1 From 539431a437d2e5d6d94016184dfc0aab263c01e1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:24:01 -0400 Subject: xprtrdma: Don't invalidate FRMRs if registration fails If FRMR registration fails, it's likely to transition the QP to the error state. Or, registration may have failed because the QP is _already_ in ERROR. Thus calling rpcrdma_deregister_external() in rpcrdma_create_chunks() is useless in FRMR mode: the LOCAL_INVs just get flushed. It is safe to leave existing registrations: when FRMR registration is tried again, rpcrdma_register_frmr_external() checks if each FRMR is already/still VALID, and knocks it down first if it is. Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 54422f7..6166c98 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -271,9 +271,11 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, return (unsigned char *)iptr - (unsigned char *)headerp; out: - for (pos = 0; nchunks--;) - pos += rpcrdma_deregister_external( - &req->rl_segments[pos], r_xprt); + if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) { + for (pos = 0; nchunks--;) + pos += rpcrdma_deregister_external( + &req->rl_segments[pos], r_xprt); + } return n; } -- cgit v1.1 From 0dbb4108a6a615589751de2aaf468d3ddbcef24c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:24:09 -0400 Subject: xprtrdma: Unclutter struct rpcrdma_mr_seg Clean ups: - make it obvious that the rl_mw field is a pointer -- allocated separately, not as part of struct rpcrdma_mr_seg - promote "struct {} frmr;" to a named type - promote the state enum to a named type - name the MW state field the same way other fields in rpcrdma_mw are named Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 34 +++++++++++++++---------------- net/sunrpc/xprtrdma/xprt_rdma.h | 44 ++++++++++++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 28 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index e49cdc9..dd1dabc 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -156,9 +156,9 @@ rpcrdma_sendcq_process_wc(struct ib_wc *wc) return; if (wc->opcode == IB_WC_FAST_REG_MR) - frmr->r.frmr.state = FRMR_IS_VALID; + frmr->r.frmr.fr_state = FRMR_IS_VALID; else if (wc->opcode == IB_WC_LOCAL_INV) - frmr->r.frmr.state = FRMR_IS_INVALID; + frmr->r.frmr.fr_state = FRMR_IS_INVALID; } static int @@ -1496,6 +1496,9 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, struct rpcrdma_xprt *r_xprt) { struct rpcrdma_mr_seg *seg1 = seg; + struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw; + struct rpcrdma_frmr *frmr = &mw->r.frmr; + struct ib_mr *mr = frmr->fr_mr; struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr; u8 key; @@ -1515,8 +1518,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, rpcrdma_map_one(ia, seg, writing); pa = seg->mr_dma; for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { - seg1->mr_chunk.rl_mw->r.frmr.fr_pgl-> - page_list[page_no++] = pa; + frmr->fr_pgl->page_list[page_no++] = pa; pa += PAGE_SIZE; } len += seg->mr_len; @@ -1528,20 +1530,18 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, break; } dprintk("RPC: %s: Using frmr %p to map %d segments\n", - __func__, seg1->mr_chunk.rl_mw, i); + __func__, mw, i); - if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) { + if (unlikely(frmr->fr_state == FRMR_IS_VALID)) { dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n", - __func__, - seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey); + __func__, mr->rkey); /* Invalidate before using. */ memset(&invalidate_wr, 0, sizeof invalidate_wr); - invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; + invalidate_wr.wr_id = (unsigned long)(void *)mw; invalidate_wr.next = &frmr_wr; invalidate_wr.opcode = IB_WR_LOCAL_INV; invalidate_wr.send_flags = IB_SEND_SIGNALED; - invalidate_wr.ex.invalidate_rkey = - seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; + invalidate_wr.ex.invalidate_rkey = mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); post_wr = &invalidate_wr; } else @@ -1549,11 +1549,11 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, /* Prepare FRMR WR */ memset(&frmr_wr, 0, sizeof frmr_wr); - frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; + frmr_wr.wr_id = (unsigned long)(void *)mw; frmr_wr.opcode = IB_WR_FAST_REG_MR; frmr_wr.send_flags = IB_SEND_SIGNALED; frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; - frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; + frmr_wr.wr.fast_reg.page_list = frmr->fr_pgl; frmr_wr.wr.fast_reg.page_list_len = page_no; frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; @@ -1563,13 +1563,13 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, } /* Bump the key */ - key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); - ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); + key = (u8)(mr->rkey & 0x000000FF); + ib_update_fast_reg_key(mr, ++key); frmr_wr.wr.fast_reg.access_flags = (writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ); - frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; + frmr_wr.wr.fast_reg.rkey = mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr); @@ -1579,7 +1579,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, " status %i\n", __func__, rc); goto out_err; } else { - seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; + seg1->mr_rkey = mr->rkey; seg1->mr_base = seg1->mr_dma + pageoff; seg1->mr_nsegs = i; seg1->mr_len = len; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c270e59..84c3455 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -146,6 +146,38 @@ struct rpcrdma_rep { }; /* + * struct rpcrdma_mw - external memory region metadata + * + * An external memory region is any buffer or page that is registered + * on the fly (ie, not pre-registered). + * + * Each rpcrdma_buffer has a list of these anchored in rb_mws. During + * call_allocate, rpcrdma_buffer_get() assigns one to each segment in + * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep + * track of registration metadata while each RPC is pending. + * rpcrdma_deregister_external() uses this metadata to unmap and + * release these resources when an RPC is complete. + */ +enum rpcrdma_frmr_state { + FRMR_IS_INVALID, /* ready to be used */ + FRMR_IS_VALID, /* in use */ +}; + +struct rpcrdma_frmr { + struct ib_fast_reg_page_list *fr_pgl; + struct ib_mr *fr_mr; + enum rpcrdma_frmr_state fr_state; +}; + +struct rpcrdma_mw { + union { + struct ib_fmr *fmr; + struct rpcrdma_frmr frmr; + } r; + struct list_head mw_list; +}; + +/* * struct rpcrdma_req -- structure central to the request/reply sequence. * * N of these are associated with a transport instance, and stored in @@ -172,17 +204,7 @@ struct rpcrdma_rep { struct rpcrdma_mr_seg { /* chunk descriptors */ union { /* chunk memory handles */ struct ib_mr *rl_mr; /* if registered directly */ - struct rpcrdma_mw { /* if registered from region */ - union { - struct ib_fmr *fmr; - struct { - struct ib_fast_reg_page_list *fr_pgl; - struct ib_mr *fr_mr; - enum { FRMR_IS_INVALID, FRMR_IS_VALID } state; - } frmr; - } r; - struct list_head mw_list; - } *rl_mw; + struct rpcrdma_mw *rl_mw; /* if registered from region */ } mr_chunk; u64 mr_base; /* registration result */ u32 mr_rkey; /* registration result */ -- cgit v1.1 From c93e986a295d537589efd0504f36ca952bd1a5be Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:24:19 -0400 Subject: xprtrdma: Back off rkey when FAST_REG_MR fails If posting a FAST_REG_MR Work Reqeust fails, revert the rkey update to avoid subsequent IB_WC_MW_BIND_ERR completions. Suggested-by: Steve Wise Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index dd1dabc..b670f4d 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1577,6 +1577,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, if (rc) { dprintk("RPC: %s: failed ib_post_send for register," " status %i\n", __func__, rc); + ib_update_fast_reg_key(mr, --key); goto out_err; } else { seg1->mr_rkey = mr->rkey; -- cgit v1.1 From 3111d72c7ced444b1034f6e365e0e02444c68aa8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:24:28 -0400 Subject: xprtrdma: Chain together all MWs in same buffer pool During connection loss recovery, need to visit every MW in a buffer pool. Any MW that is in use by an RPC will not be on the rb_mws list. Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 4 ++++ net/sunrpc/xprtrdma/xprt_rdma.h | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index b670f4d..0ad7d10 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1074,6 +1074,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, p += cdata->padding; INIT_LIST_HEAD(&buf->rb_mws); + INIT_LIST_HEAD(&buf->rb_all); r = (struct rpcrdma_mw *)p; switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: @@ -1098,6 +1099,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, ib_dereg_mr(r->r.frmr.fr_mr); goto out; } + list_add(&r->mw_all, &buf->rb_all); list_add(&r->mw_list, &buf->rb_mws); ++r; } @@ -1116,6 +1118,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, " failed %i\n", __func__, rc); goto out; } + list_add(&r->mw_all, &buf->rb_all); list_add(&r->mw_list, &buf->rb_mws); ++r; } @@ -1225,6 +1228,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) while (!list_empty(&buf->rb_mws)) { r = list_entry(buf->rb_mws.next, struct rpcrdma_mw, mw_list); + list_del(&r->mw_all); list_del(&r->mw_list); switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 84c3455..c1d8652 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -151,7 +151,7 @@ struct rpcrdma_rep { * An external memory region is any buffer or page that is registered * on the fly (ie, not pre-registered). * - * Each rpcrdma_buffer has a list of these anchored in rb_mws. During + * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During * call_allocate, rpcrdma_buffer_get() assigns one to each segment in * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep * track of registration metadata while each RPC is pending. @@ -175,6 +175,7 @@ struct rpcrdma_mw { struct rpcrdma_frmr frmr; } r; struct list_head mw_list; + struct list_head mw_all; }; /* @@ -246,6 +247,7 @@ struct rpcrdma_buffer { atomic_t rb_credits; /* most recent server credits */ int rb_max_requests;/* client max requests */ struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ + struct list_head rb_all; int rb_send_index; struct rpcrdma_req **rb_send_bufs; int rb_recv_index; -- cgit v1.1 From c2922c0235aac1c787fa81e24d7d7e93c2202275 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:24:36 -0400 Subject: xprtrdma: Properly handle exhaustion of the rb_mws list If the rb_mws list is exhausted, clean up and return NULL so that call_allocate() will delay and try again. Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 103 ++++++++++++++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 32 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 0ad7d10..017f0ab 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1256,6 +1256,67 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) kfree(buf->rb_pool); } +/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving + * some req segments uninitialized. + */ +static void +rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf) +{ + if (*mw) { + list_add_tail(&(*mw)->mw_list, &buf->rb_mws); + *mw = NULL; + } +} + +/* Cycle mw's back in reverse order, and "spin" them. + * This delays and scrambles reuse as much as possible. + */ +static void +rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) +{ + struct rpcrdma_mr_seg *seg = req->rl_segments; + struct rpcrdma_mr_seg *seg1 = seg; + int i; + + for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++) + rpcrdma_buffer_put_mr(&seg->mr_chunk.rl_mw, buf); + rpcrdma_buffer_put_mr(&seg1->mr_chunk.rl_mw, buf); +} + +static void +rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) +{ + buf->rb_send_bufs[--buf->rb_send_index] = req; + req->rl_niovs = 0; + if (req->rl_reply) { + buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply; + req->rl_reply->rr_func = NULL; + req->rl_reply = NULL; + } +} + +static struct rpcrdma_req * +rpcrdma_buffer_get_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) +{ + struct rpcrdma_mw *r; + int i; + + i = RPCRDMA_MAX_SEGS - 1; + while (!list_empty(&buf->rb_mws)) { + r = list_entry(buf->rb_mws.next, + struct rpcrdma_mw, mw_list); + list_del(&r->mw_list); + req->rl_segments[i].mr_chunk.rl_mw = r; + if (unlikely(i-- == 0)) + return req; /* Success */ + } + + /* Not enough entries on rb_mws for this req */ + rpcrdma_buffer_put_sendbuf(req, buf); + rpcrdma_buffer_put_mrs(req, buf); + return NULL; +} + /* * Get a set of request/reply buffers. * @@ -1268,10 +1329,9 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) struct rpcrdma_req * rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) { + struct rpcrdma_ia *ia = rdmab_to_ia(buffers); struct rpcrdma_req *req; unsigned long flags; - int i; - struct rpcrdma_mw *r; spin_lock_irqsave(&buffers->rb_lock, flags); if (buffers->rb_send_index == buffers->rb_max_requests) { @@ -1291,14 +1351,13 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL; } buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; - if (!list_empty(&buffers->rb_mws)) { - i = RPCRDMA_MAX_SEGS - 1; - do { - r = list_entry(buffers->rb_mws.next, - struct rpcrdma_mw, mw_list); - list_del(&r->mw_list); - req->rl_segments[i].mr_chunk.rl_mw = r; - } while (--i >= 0); + switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: + case RPCRDMA_MTHCAFMR: + req = rpcrdma_buffer_get_mrs(req, buffers); + break; + default: + break; } spin_unlock_irqrestore(&buffers->rb_lock, flags); return req; @@ -1313,34 +1372,14 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) { struct rpcrdma_buffer *buffers = req->rl_buffer; struct rpcrdma_ia *ia = rdmab_to_ia(buffers); - int i; unsigned long flags; spin_lock_irqsave(&buffers->rb_lock, flags); - buffers->rb_send_bufs[--buffers->rb_send_index] = req; - req->rl_niovs = 0; - if (req->rl_reply) { - buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply; - req->rl_reply->rr_func = NULL; - req->rl_reply = NULL; - } + rpcrdma_buffer_put_sendbuf(req, buffers); switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: case RPCRDMA_MTHCAFMR: - /* - * Cycle mw's back in reverse order, and "spin" them. - * This delays and scrambles reuse as much as possible. - */ - i = 1; - do { - struct rpcrdma_mw **mw; - mw = &req->rl_segments[i].mr_chunk.rl_mw; - list_add_tail(&(*mw)->mw_list, &buffers->rb_mws); - *mw = NULL; - } while (++i < RPCRDMA_MAX_SEGS); - list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list, - &buffers->rb_mws); - req->rl_segments[0].mr_chunk.rl_mw = NULL; + rpcrdma_buffer_put_mrs(req, buffers); break; default: break; -- cgit v1.1 From 9f9d802a28a107937ecda4ff78de2ab5cedd439d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:24:45 -0400 Subject: xprtrdma: Reset FRMRs when FAST_REG_MR is flushed by a disconnect FAST_REG_MR Work Requests update a Memory Region's rkey. Rkey's are used to block unwanted access to the memory controlled by an MR. The rkey is passed to the receiver (the NFS server, in our case), and is also used by xprtrdma to invalidate the MR when the RPC is complete. When a FAST_REG_MR Work Request is flushed after a transport disconnect, xprtrdma cannot tell whether the WR actually hit the adapter or not. So it is indeterminant at that point whether the existing rkey is still valid. After the transport connection is re-established, the next FAST_REG_MR or LOCAL_INV Work Request against that MR can sometimes fail because the rkey value does not match what xprtrdma expects. The only reliable way to recover in this case is to deregister and register the MR before it is used again. These operations can be done only in a process context, so handle it in the transport connect worker. Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 67 +++++++++++++++++++++++++++++++++++++++-- net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 2 files changed, 66 insertions(+), 2 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 017f0ab..3a6376a 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -61,6 +61,8 @@ # define RPCDBG_FACILITY RPCDBG_TRANS #endif +static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); + /* * internal functions */ @@ -152,8 +154,10 @@ rpcrdma_sendcq_process_wc(struct ib_wc *wc) if (wc->wr_id == 0ULL) return; - if (wc->status != IB_WC_SUCCESS) + if (wc->status != IB_WC_SUCCESS) { + frmr->r.frmr.fr_state = FRMR_IS_STALE; return; + } if (wc->opcode == IB_WC_FAST_REG_MR) frmr->r.frmr.fr_state = FRMR_IS_VALID; @@ -881,6 +885,9 @@ retry: " status %i\n", __func__, rc); rpcrdma_flush_cqs(ep); + if (ia->ri_memreg_strategy == RPCRDMA_FRMR) + rpcrdma_reset_frmrs(ia); + xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); id = rpcrdma_create_id(xprt, ia, (struct sockaddr *)&xprt->rx_data.addr); @@ -1256,6 +1263,62 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) kfree(buf->rb_pool); } +/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in + * an unusable state. Find FRMRs in this state and dereg / reg + * each. FRMRs that are VALID and attached to an rpcrdma_req are + * also torn down. + * + * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. + * + * This is invoked only in the transport connect worker in order + * to serialize with rpcrdma_register_frmr_external(). + */ +static void +rpcrdma_reset_frmrs(struct rpcrdma_ia *ia) +{ + struct rpcrdma_xprt *r_xprt = + container_of(ia, struct rpcrdma_xprt, rx_ia); + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct list_head *pos; + struct rpcrdma_mw *r; + int rc; + + list_for_each(pos, &buf->rb_all) { + r = list_entry(pos, struct rpcrdma_mw, mw_all); + + if (r->r.frmr.fr_state == FRMR_IS_INVALID) + continue; + + rc = ib_dereg_mr(r->r.frmr.fr_mr); + if (rc) + dprintk("RPC: %s: ib_dereg_mr failed %i\n", + __func__, rc); + ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); + + r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, + ia->ri_max_frmr_depth); + if (IS_ERR(r->r.frmr.fr_mr)) { + rc = PTR_ERR(r->r.frmr.fr_mr); + dprintk("RPC: %s: ib_alloc_fast_reg_mr" + " failed %i\n", __func__, rc); + continue; + } + r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list( + ia->ri_id->device, + ia->ri_max_frmr_depth); + if (IS_ERR(r->r.frmr.fr_pgl)) { + rc = PTR_ERR(r->r.frmr.fr_pgl); + dprintk("RPC: %s: " + "ib_alloc_fast_reg_page_list " + "failed %i\n", __func__, rc); + + ib_dereg_mr(r->r.frmr.fr_mr); + continue; + } + r->r.frmr.fr_state = FRMR_IS_INVALID; + } +} + /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving * some req segments uninitialized. */ @@ -1575,7 +1638,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, dprintk("RPC: %s: Using frmr %p to map %d segments\n", __func__, mw, i); - if (unlikely(frmr->fr_state == FRMR_IS_VALID)) { + if (unlikely(frmr->fr_state != FRMR_IS_INVALID)) { dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n", __func__, mr->rkey); /* Invalidate before using. */ diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c1d8652..1ee6db3 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -161,6 +161,7 @@ struct rpcrdma_rep { enum rpcrdma_frmr_state { FRMR_IS_INVALID, /* ready to be used */ FRMR_IS_VALID, /* in use */ + FRMR_IS_STALE, /* failed completion */ }; struct rpcrdma_frmr { -- cgit v1.1 From ddb6bebcc64678fcf73eb9e21f80c6dacfa093a7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:24:54 -0400 Subject: xprtrdma: Reset FRMRs after a flushed LOCAL_INV Work Request When a LOCAL_INV Work Request is flushed, it leaves an FRMR in the VALID state. This FRMR can be returned by rpcrdma_buffer_get(), and must be knocked down in rpcrdma_register_frmr_external() before it can be re-used. Instead, capture these in rpcrdma_buffer_get(), and reset them. Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 94 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 2 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3a6376a..ca55acf 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1358,8 +1358,91 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) } } +/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external(). + * Redo only the ib_post_send(). + */ +static void +rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia) +{ + struct rpcrdma_xprt *r_xprt = + container_of(ia, struct rpcrdma_xprt, rx_ia); + struct ib_send_wr invalidate_wr, *bad_wr; + int rc; + + dprintk("RPC: %s: FRMR %p is stale\n", __func__, r); + + /* When this FRMR is re-inserted into rb_mws, it is no longer stale */ + r->r.frmr.fr_state = FRMR_IS_VALID; + + memset(&invalidate_wr, 0, sizeof(invalidate_wr)); + invalidate_wr.wr_id = (unsigned long)(void *)r; + invalidate_wr.opcode = IB_WR_LOCAL_INV; + invalidate_wr.send_flags = IB_SEND_SIGNALED; + invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey; + DECR_CQCOUNT(&r_xprt->rx_ep); + + dprintk("RPC: %s: frmr %p invalidating rkey %08x\n", + __func__, r, r->r.frmr.fr_mr->rkey); + + read_lock(&ia->ri_qplock); + rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); + read_unlock(&ia->ri_qplock); + if (rc) { + /* Force rpcrdma_buffer_get() to retry */ + r->r.frmr.fr_state = FRMR_IS_STALE; + dprintk("RPC: %s: ib_post_send failed, %i\n", + __func__, rc); + } +} + +static void +rpcrdma_retry_flushed_linv(struct list_head *stale, + struct rpcrdma_buffer *buf) +{ + struct rpcrdma_ia *ia = rdmab_to_ia(buf); + struct list_head *pos; + struct rpcrdma_mw *r; + unsigned long flags; + + list_for_each(pos, stale) { + r = list_entry(pos, struct rpcrdma_mw, mw_list); + rpcrdma_retry_local_inv(r, ia); + } + + spin_lock_irqsave(&buf->rb_lock, flags); + list_splice_tail(stale, &buf->rb_mws); + spin_unlock_irqrestore(&buf->rb_lock, flags); +} + static struct rpcrdma_req * -rpcrdma_buffer_get_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) +rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf, + struct list_head *stale) +{ + struct rpcrdma_mw *r; + int i; + + i = RPCRDMA_MAX_SEGS - 1; + while (!list_empty(&buf->rb_mws)) { + r = list_entry(buf->rb_mws.next, + struct rpcrdma_mw, mw_list); + list_del(&r->mw_list); + if (r->r.frmr.fr_state == FRMR_IS_STALE) { + list_add(&r->mw_list, stale); + continue; + } + req->rl_segments[i].mr_chunk.rl_mw = r; + if (unlikely(i-- == 0)) + return req; /* Success */ + } + + /* Not enough entries on rb_mws for this req */ + rpcrdma_buffer_put_sendbuf(req, buf); + rpcrdma_buffer_put_mrs(req, buf); + return NULL; +} + +static struct rpcrdma_req * +rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) { struct rpcrdma_mw *r; int i; @@ -1393,6 +1476,7 @@ struct rpcrdma_req * rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) { struct rpcrdma_ia *ia = rdmab_to_ia(buffers); + struct list_head stale; struct rpcrdma_req *req; unsigned long flags; @@ -1414,15 +1498,21 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL; } buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; + + INIT_LIST_HEAD(&stale); switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: + req = rpcrdma_buffer_get_frmrs(req, buffers, &stale); + break; case RPCRDMA_MTHCAFMR: - req = rpcrdma_buffer_get_mrs(req, buffers); + req = rpcrdma_buffer_get_fmrs(req, buffers); break; default: break; } spin_unlock_irqrestore(&buffers->rb_lock, flags); + if (!list_empty(&stale)) + rpcrdma_retry_flushed_linv(&stale, buffers); return req; } -- cgit v1.1 From 440ddad51b821a8ab9099addcc29d4d18d02f6ac Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:25:03 -0400 Subject: xprtrdma: Don't post a LOCAL_INV in rpcrdma_register_frmr_external() Any FRMR arriving in rpcrdma_register_frmr_external() is now guaranteed to be either invalid, or to be targeted by a queued LOCAL_INV that will invalidate it before the adapter processes the FAST_REG_MR being built here. The problem with current arrangement of chaining a LOCAL_INV to the FAST_REG_MR is that if the transport is not connected, the LOCAL_INV is flushed and the FAST_REG_MR is flushed. This leaves the FRMR valid with the old rkey. But rpcrdma_register_frmr_external() has already bumped the in-memory rkey. Next time through rpcrdma_register_frmr_external(), a LOCAL_INV and FAST_REG_MR is attempted again because the FRMR is still valid. But the rkey no longer matches the hardware's rkey, and a memory management operation error occurs. Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index ca55acf..7459b86 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1695,8 +1695,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw; struct rpcrdma_frmr *frmr = &mw->r.frmr; struct ib_mr *mr = frmr->fr_mr; - struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr; - + struct ib_send_wr frmr_wr, *bad_wr; u8 key; int len, pageoff; int i, rc; @@ -1728,22 +1727,6 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, dprintk("RPC: %s: Using frmr %p to map %d segments\n", __func__, mw, i); - if (unlikely(frmr->fr_state != FRMR_IS_INVALID)) { - dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n", - __func__, mr->rkey); - /* Invalidate before using. */ - memset(&invalidate_wr, 0, sizeof invalidate_wr); - invalidate_wr.wr_id = (unsigned long)(void *)mw; - invalidate_wr.next = &frmr_wr; - invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.send_flags = IB_SEND_SIGNALED; - invalidate_wr.ex.invalidate_rkey = mr->rkey; - DECR_CQCOUNT(&r_xprt->rx_ep); - post_wr = &invalidate_wr; - } else - post_wr = &frmr_wr; - - /* Prepare FRMR WR */ memset(&frmr_wr, 0, sizeof frmr_wr); frmr_wr.wr_id = (unsigned long)(void *)mw; frmr_wr.opcode = IB_WR_FAST_REG_MR; @@ -1768,8 +1751,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, frmr_wr.wr.fast_reg.rkey = mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); - rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr); - + rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr); if (rc) { dprintk("RPC: %s: failed ib_post_send for register," " status %i\n", __func__, rc); -- cgit v1.1 From 050557220e34ed5acc830c9bf6cd993f6b4ea33e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:25:12 -0400 Subject: xprtrdma: Disable completions for FAST_REG_MR Work Requests Instead of relying on a completion to change the state of an FRMR to FRMR_IS_VALID, set it in advance. If an error occurs, a completion will fire anyway and mark the FRMR FRMR_IS_STALE. Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 7459b86..3e8b388 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -159,10 +159,7 @@ rpcrdma_sendcq_process_wc(struct ib_wc *wc) return; } - if (wc->opcode == IB_WC_FAST_REG_MR) - frmr->r.frmr.fr_state = FRMR_IS_VALID; - else if (wc->opcode == IB_WC_LOCAL_INV) - frmr->r.frmr.fr_state = FRMR_IS_INVALID; + frmr->r.frmr.fr_state = FRMR_IS_INVALID; } static int @@ -1727,10 +1724,11 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, dprintk("RPC: %s: Using frmr %p to map %d segments\n", __func__, mw, i); + frmr->fr_state = FRMR_IS_VALID; + memset(&frmr_wr, 0, sizeof frmr_wr); frmr_wr.wr_id = (unsigned long)(void *)mw; frmr_wr.opcode = IB_WR_FAST_REG_MR; - frmr_wr.send_flags = IB_SEND_SIGNALED; frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; frmr_wr.wr.fast_reg.page_list = frmr->fr_pgl; frmr_wr.wr.fast_reg.page_list_len = page_no; @@ -1766,6 +1764,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, *nsegs = i; return 0; out_err: + frmr->fr_state = FRMR_IS_INVALID; while (i--) rpcrdma_unmap_one(ia, --seg); return rc; -- cgit v1.1 From dab7e3b8da5ef76143a7e609612c306898f8f8fc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:25:20 -0400 Subject: xprtrdma: Disable completions for LOCAL_INV Work Requests Instead of relying on a completion to change the state of an FRMR to FRMR_IS_INVALID, set it in advance. If an error occurs, a completion will fire anyway and mark the FRMR FRMR_IS_STALE. Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3e8b388..08c9235 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -154,12 +154,8 @@ rpcrdma_sendcq_process_wc(struct ib_wc *wc) if (wc->wr_id == 0ULL) return; - if (wc->status != IB_WC_SUCCESS) { + if (wc->status != IB_WC_SUCCESS) frmr->r.frmr.fr_state = FRMR_IS_STALE; - return; - } - - frmr->r.frmr.fr_state = FRMR_IS_INVALID; } static int @@ -1369,12 +1365,11 @@ rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia) dprintk("RPC: %s: FRMR %p is stale\n", __func__, r); /* When this FRMR is re-inserted into rb_mws, it is no longer stale */ - r->r.frmr.fr_state = FRMR_IS_VALID; + r->r.frmr.fr_state = FRMR_IS_INVALID; memset(&invalidate_wr, 0, sizeof(invalidate_wr)); invalidate_wr.wr_id = (unsigned long)(void *)r; invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.send_flags = IB_SEND_SIGNALED; invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); @@ -1778,10 +1773,11 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, struct ib_send_wr invalidate_wr, *bad_wr; int rc; + seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; + memset(&invalidate_wr, 0, sizeof invalidate_wr); invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.send_flags = IB_SEND_SIGNALED; invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); @@ -1790,9 +1786,12 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, rpcrdma_unmap_one(ia, seg++); rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); read_unlock(&ia->ri_qplock); - if (rc) + if (rc) { + /* Force rpcrdma_buffer_get() to retry */ + seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_STALE; dprintk("RPC: %s: failed ib_post_send for invalidate," " status %i\n", __func__, rc); + } return rc; } -- cgit v1.1 From f590e878c52c38046fd7cfa5a742ddae68717484 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:25:29 -0400 Subject: xprtrdma: Rename frmr_wr Clean up: Name frmr_wr after the opcode of the Work Request, consistent with the send and local invalidation paths. Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 08c9235..80c0163 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1687,7 +1687,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw; struct rpcrdma_frmr *frmr = &mw->r.frmr; struct ib_mr *mr = frmr->fr_mr; - struct ib_send_wr frmr_wr, *bad_wr; + struct ib_send_wr fastreg_wr, *bad_wr; u8 key; int len, pageoff; int i, rc; @@ -1721,15 +1721,15 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, frmr->fr_state = FRMR_IS_VALID; - memset(&frmr_wr, 0, sizeof frmr_wr); - frmr_wr.wr_id = (unsigned long)(void *)mw; - frmr_wr.opcode = IB_WR_FAST_REG_MR; - frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; - frmr_wr.wr.fast_reg.page_list = frmr->fr_pgl; - frmr_wr.wr.fast_reg.page_list_len = page_no; - frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; - if (frmr_wr.wr.fast_reg.length < len) { + memset(&fastreg_wr, 0, sizeof(fastreg_wr)); + fastreg_wr.wr_id = (unsigned long)(void *)mw; + fastreg_wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma; + fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; + fastreg_wr.wr.fast_reg.page_list_len = page_no; + fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; + fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; + if (fastreg_wr.wr.fast_reg.length < len) { rc = -EIO; goto out_err; } @@ -1738,13 +1738,13 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, key = (u8)(mr->rkey & 0x000000FF); ib_update_fast_reg_key(mr, ++key); - frmr_wr.wr.fast_reg.access_flags = (writing ? + fastreg_wr.wr.fast_reg.access_flags = (writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ); - frmr_wr.wr.fast_reg.rkey = mr->rkey; + fastreg_wr.wr.fast_reg.rkey = mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); - rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr); + rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); if (rc) { dprintk("RPC: %s: failed ib_post_send for register," " status %i\n", __func__, rc); -- cgit v1.1 From 2e84522c2e0323a090fe1f7eeed6d5b6a68efe5f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:25:38 -0400 Subject: xprtrdma: Allocate each struct rpcrdma_mw separately Currently rpcrdma_buffer_create() allocates struct rpcrdma_mw's as a single contiguous area of memory. It amounts to quite a bit of memory, and there's no requirement for these to be carved from a single piece of contiguous memory. Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 242 ++++++++++++++++++++++++++------------------ 1 file changed, 143 insertions(+), 99 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 80c0163..31c4fd3 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1005,9 +1005,91 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) return rc; } -/* - * Initialize buffer memory - */ +static int +rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) +{ + int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; + struct ib_fmr_attr fmr_attr = { + .max_pages = RPCRDMA_MAX_DATA_SEGS, + .max_maps = 1, + .page_shift = PAGE_SHIFT + }; + struct rpcrdma_mw *r; + int i, rc; + + i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; + dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i); + + while (i--) { + r = kzalloc(sizeof(*r), GFP_KERNEL); + if (r == NULL) + return -ENOMEM; + + r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr); + if (IS_ERR(r->r.fmr)) { + rc = PTR_ERR(r->r.fmr); + dprintk("RPC: %s: ib_alloc_fmr failed %i\n", + __func__, rc); + goto out_free; + } + + list_add(&r->mw_list, &buf->rb_mws); + list_add(&r->mw_all, &buf->rb_all); + } + return 0; + +out_free: + kfree(r); + return rc; +} + +static int +rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) +{ + struct rpcrdma_frmr *f; + struct rpcrdma_mw *r; + int i, rc; + + i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; + dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i); + + while (i--) { + r = kzalloc(sizeof(*r), GFP_KERNEL); + if (r == NULL) + return -ENOMEM; + f = &r->r.frmr; + + f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, + ia->ri_max_frmr_depth); + if (IS_ERR(f->fr_mr)) { + rc = PTR_ERR(f->fr_mr); + dprintk("RPC: %s: ib_alloc_fast_reg_mr " + "failed %i\n", __func__, rc); + goto out_free; + } + + f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device, + ia->ri_max_frmr_depth); + if (IS_ERR(f->fr_pgl)) { + rc = PTR_ERR(f->fr_pgl); + dprintk("RPC: %s: ib_alloc_fast_reg_page_list " + "failed %i\n", __func__, rc); + + ib_dereg_mr(f->fr_mr); + goto out_free; + } + + list_add(&r->mw_list, &buf->rb_mws); + list_add(&r->mw_all, &buf->rb_all); + } + + return 0; + +out_free: + kfree(r); + return rc; +} + int rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) @@ -1015,7 +1097,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, char *p; size_t len, rlen, wlen; int i, rc; - struct rpcrdma_mw *r; buf->rb_max_requests = cdata->max_requests; spin_lock_init(&buf->rb_lock); @@ -1026,28 +1107,12 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, * 2. arrays of struct rpcrdma_req to fill in pointers * 3. array of struct rpcrdma_rep for replies * 4. padding, if any - * 5. mw's, fmr's or frmr's, if any * Send/recv buffers in req/rep need to be registered */ - len = buf->rb_max_requests * (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); len += cdata->padding; - switch (ia->ri_memreg_strategy) { - case RPCRDMA_FRMR: - len += buf->rb_max_requests * RPCRDMA_MAX_SEGS * - sizeof(struct rpcrdma_mw); - break; - case RPCRDMA_MTHCAFMR: - /* TBD we are perhaps overallocating here */ - len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * - sizeof(struct rpcrdma_mw); - break; - default: - break; - } - /* allocate 1, 4 and 5 in one shot */ p = kzalloc(len, GFP_KERNEL); if (p == NULL) { dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n", @@ -1075,53 +1140,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, INIT_LIST_HEAD(&buf->rb_mws); INIT_LIST_HEAD(&buf->rb_all); - r = (struct rpcrdma_mw *)p; switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: - for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { - r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, - ia->ri_max_frmr_depth); - if (IS_ERR(r->r.frmr.fr_mr)) { - rc = PTR_ERR(r->r.frmr.fr_mr); - dprintk("RPC: %s: ib_alloc_fast_reg_mr" - " failed %i\n", __func__, rc); - goto out; - } - r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list( - ia->ri_id->device, - ia->ri_max_frmr_depth); - if (IS_ERR(r->r.frmr.fr_pgl)) { - rc = PTR_ERR(r->r.frmr.fr_pgl); - dprintk("RPC: %s: " - "ib_alloc_fast_reg_page_list " - "failed %i\n", __func__, rc); - - ib_dereg_mr(r->r.frmr.fr_mr); - goto out; - } - list_add(&r->mw_all, &buf->rb_all); - list_add(&r->mw_list, &buf->rb_mws); - ++r; - } + rc = rpcrdma_init_frmrs(ia, buf); + if (rc) + goto out; break; case RPCRDMA_MTHCAFMR: - /* TBD we are perhaps overallocating here */ - for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { - static struct ib_fmr_attr fa = - { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT }; - r->r.fmr = ib_alloc_fmr(ia->ri_pd, - IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, - &fa); - if (IS_ERR(r->r.fmr)) { - rc = PTR_ERR(r->r.fmr); - dprintk("RPC: %s: ib_alloc_fmr" - " failed %i\n", __func__, rc); - goto out; - } - list_add(&r->mw_all, &buf->rb_all); - list_add(&r->mw_list, &buf->rb_mws); - ++r; - } + rc = rpcrdma_init_fmrs(ia, buf); + if (rc) + goto out; break; default: break; @@ -1189,24 +1217,57 @@ out: return rc; } -/* - * Unregister and destroy buffer memory. Need to deal with - * partial initialization, so it's callable from failed create. - * Must be called before destroying endpoint, as registrations - * reference it. - */ +static void +rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf) +{ + struct rpcrdma_mw *r; + int rc; + + while (!list_empty(&buf->rb_all)) { + r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); + list_del(&r->mw_all); + list_del(&r->mw_list); + + rc = ib_dealloc_fmr(r->r.fmr); + if (rc) + dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", + __func__, rc); + + kfree(r); + } +} + +static void +rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf) +{ + struct rpcrdma_mw *r; + int rc; + + while (!list_empty(&buf->rb_all)) { + r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); + list_del(&r->mw_all); + list_del(&r->mw_list); + + rc = ib_dereg_mr(r->r.frmr.fr_mr); + if (rc) + dprintk("RPC: %s: ib_dereg_mr failed %i\n", + __func__, rc); + ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); + + kfree(r); + } +} + void rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) { - int rc, i; struct rpcrdma_ia *ia = rdmab_to_ia(buf); - struct rpcrdma_mw *r; + int i; /* clean up in reverse order from create * 1. recv mr memory (mr free, then kfree) * 2. send mr memory (mr free, then kfree) - * 3. padding (if any) [moved to rpcrdma_ep_destroy] - * 4. arrays + * 3. MWs */ dprintk("RPC: %s: entering\n", __func__); @@ -1225,32 +1286,15 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) } } - while (!list_empty(&buf->rb_mws)) { - r = list_entry(buf->rb_mws.next, - struct rpcrdma_mw, mw_list); - list_del(&r->mw_all); - list_del(&r->mw_list); - switch (ia->ri_memreg_strategy) { - case RPCRDMA_FRMR: - rc = ib_dereg_mr(r->r.frmr.fr_mr); - if (rc) - dprintk("RPC: %s:" - " ib_dereg_mr" - " failed %i\n", - __func__, rc); - ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); - break; - case RPCRDMA_MTHCAFMR: - rc = ib_dealloc_fmr(r->r.fmr); - if (rc) - dprintk("RPC: %s:" - " ib_dealloc_fmr" - " failed %i\n", - __func__, rc); - break; - default: - break; - } + switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: + rpcrdma_destroy_frmrs(buf); + break; + case RPCRDMA_MTHCAFMR: + rpcrdma_destroy_fmrs(buf); + break; + default: + break; } kfree(buf->rb_pool); -- cgit v1.1 From bb96193d9104613cd87fb518f25db3fadc36432e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:25:46 -0400 Subject: xprtrdma: Schedule reply tasklet once per upcall Minor optimization: grab rpcrdma_tk_lock_g and disable hard IRQs just once after clearing the receive completion queue. Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 31c4fd3..f124f04 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -105,17 +105,6 @@ rpcrdma_run_tasklet(unsigned long data) static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); -static inline void -rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep) -{ - unsigned long flags; - - spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); - list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g); - spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); - tasklet_schedule(&rpcrdma_tasklet_g); -} - static void rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) { @@ -214,7 +203,7 @@ rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context) } static void -rpcrdma_recvcq_process_wc(struct ib_wc *wc) +rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) { struct rpcrdma_rep *rep = (struct rpcrdma_rep *)(unsigned long)wc->wr_id; @@ -245,28 +234,38 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc) } out_schedule: - rpcrdma_schedule_tasklet(rep); + list_add_tail(&rep->rr_list, sched_list); } static int rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) { + struct list_head sched_list; struct ib_wc *wcs; int budget, count, rc; + unsigned long flags; + INIT_LIST_HEAD(&sched_list); budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; do { wcs = ep->rep_recv_wcs; rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs); if (rc <= 0) - return rc; + goto out_schedule; count = rc; while (count-- > 0) - rpcrdma_recvcq_process_wc(wcs++); + rpcrdma_recvcq_process_wc(wcs++, &sched_list); } while (rc == RPCRDMA_POLLSIZE && --budget); - return 0; + rc = 0; + +out_schedule: + spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); + list_splice_tail(&sched_list, &rpcrdma_tasklets_g); + spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); + tasklet_schedule(&rpcrdma_tasklet_g); + return rc; } /* -- cgit v1.1 From 282191cb725db9a1aa80269e8369b06e9270a948 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:25:55 -0400 Subject: xprtrdma: Make rpcrdma_ep_disconnect() return void Clean up: The return code is used only for dprintk's that are already redundant. Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 14 ++++---------- net/sunrpc/xprtrdma/xprt_rdma.h | 2 +- 3 files changed, 6 insertions(+), 12 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index f6d280b..2faac49 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -414,7 +414,7 @@ xprt_rdma_close(struct rpc_xprt *xprt) if (r_xprt->rx_ep.rep_connected > 0) xprt->reestablish_timeout = 0; xprt_disconnect_done(xprt); - (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); + rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); } static void diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index f124f04..1208ab2 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -830,10 +830,7 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) cancel_delayed_work_sync(&ep->rep_connect_worker); if (ia->ri_id->qp) { - rc = rpcrdma_ep_disconnect(ep, ia); - if (rc) - dprintk("RPC: %s: rpcrdma_ep_disconnect" - " returned %i\n", __func__, rc); + rpcrdma_ep_disconnect(ep, ia); rdma_destroy_qp(ia->ri_id); ia->ri_id->qp = NULL; } @@ -871,10 +868,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) struct rpcrdma_xprt *xprt; retry: dprintk("RPC: %s: reconnecting...\n", __func__); - rc = rpcrdma_ep_disconnect(ep, ia); - if (rc && rc != -ENOTCONN) - dprintk("RPC: %s: rpcrdma_ep_disconnect" - " status %i\n", __func__, rc); + + rpcrdma_ep_disconnect(ep, ia); rpcrdma_flush_cqs(ep); if (ia->ri_memreg_strategy == RPCRDMA_FRMR) @@ -984,7 +979,7 @@ out: * This call is not reentrant, and must not be made in parallel * on the same endpoint. */ -int +void rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { int rc; @@ -1001,7 +996,6 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc); ep->rep_connected = rc; } - return rc; } static int diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 1ee6db3..c419498 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -341,7 +341,7 @@ int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, struct rpcrdma_create_data_internal *); void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); -int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); +void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, struct rpcrdma_req *); -- cgit v1.1 From a779ca5fa766e270b9e11c162d877295e2904f4e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:26:04 -0400 Subject: xprtrdma: Remove RPCRDMA_PERSISTENT_REGISTRATION macro Clean up. RPCRDMA_PERSISTENT_REGISTRATION was a compile-time switch between RPCRDMA_REGISTER mode and RPCRDMA_ALLPHYSICAL mode. Since RPCRDMA_REGISTER has been removed, there's no need for the extra conditional compilation. Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 1208ab2..c2253d4 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -561,12 +561,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) if (!ia->ri_id->device->alloc_fmr) { dprintk("RPC: %s: MTHCAFMR registration " "not supported by HCA\n", __func__); -#if RPCRDMA_PERSISTENT_REGISTRATION memreg = RPCRDMA_ALLPHYSICAL; -#else - rc = -ENOMEM; - goto out2; -#endif } } @@ -581,20 +576,16 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) switch (memreg) { case RPCRDMA_FRMR: break; -#if RPCRDMA_PERSISTENT_REGISTRATION case RPCRDMA_ALLPHYSICAL: mem_priv = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; goto register_setup; -#endif case RPCRDMA_MTHCAFMR: if (ia->ri_have_dma_lkey) break; mem_priv = IB_ACCESS_LOCAL_WRITE; -#if RPCRDMA_PERSISTENT_REGISTRATION register_setup: -#endif ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); if (IS_ERR(ia->ri_bind_mem)) { printk(KERN_ALERT "%s: ib_get_dma_mr for " @@ -1905,7 +1896,6 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, switch (ia->ri_memreg_strategy) { -#if RPCRDMA_PERSISTENT_REGISTRATION case RPCRDMA_ALLPHYSICAL: rpcrdma_map_one(ia, seg, writing); seg->mr_rkey = ia->ri_bind_mem->rkey; @@ -1913,7 +1903,6 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, seg->mr_nsegs = 1; nsegs = 1; break; -#endif /* Registration using frmr registration */ case RPCRDMA_FRMR: @@ -1943,13 +1932,11 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, switch (ia->ri_memreg_strategy) { -#if RPCRDMA_PERSISTENT_REGISTRATION case RPCRDMA_ALLPHYSICAL: read_lock(&ia->ri_qplock); rpcrdma_unmap_one(ia, seg); read_unlock(&ia->ri_qplock); break; -#endif case RPCRDMA_FRMR: rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); -- cgit v1.1 From 8079fb785e34de6dff34bd846b8b79c212861edf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 29 Jul 2014 17:26:12 -0400 Subject: xprtrdma: Handle additional connection events Commit 38ca83a5 added RDMA_CM_EVENT_TIMEWAIT_EXIT. But that status is relevant only for consumers that re-use their QPs on new connections. xprtrdma creates a fresh QP on reconnection, so that event should be explicitly ignored. Squelch the alarming "unexpected CM event" message. Signed-off-by: Chuck Lever Tested-by: Steve Wise Tested-by: Shirley Ma Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index c2253d4..61c4129 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -326,8 +326,16 @@ static const char * const conn[] = { "rejected", "established", "disconnected", - "device removal" + "device removal", + "multicast join", + "multicast error", + "address change", + "timewait exit", }; + +#define CONNECTION_MSG(status) \ + ((status) < ARRAY_SIZE(conn) ? \ + conn[(status)] : "unrecognized connection error") #endif static int @@ -385,23 +393,18 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DEVICE_REMOVAL: connstate = -ENODEV; connected: - dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n", - __func__, - (event->event <= 11) ? conn[event->event] : - "unknown connection error", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port), - ep, event->event); atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1); dprintk("RPC: %s: %sconnected\n", __func__, connstate > 0 ? "" : "dis"); ep->rep_connected = connstate; ep->rep_func(ep); wake_up_all(&ep->rep_connect_wait); - break; + /*FALLTHROUGH*/ default: - dprintk("RPC: %s: unexpected CM event %d\n", - __func__, event->event); + dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n", + __func__, &addr->sin_addr.s_addr, + ntohs(addr->sin_port), ep, + CONNECTION_MSG(event->event)); break; } -- cgit v1.1 From bae6746ff356478e1a2706072edbfb514072e0ff Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 21 Jul 2014 13:32:42 -0400 Subject: SUNRPC: Enforce an upper limit on the number of cached credentials In some cases where the credentials are not often reused, we may want to limit their total number just in order to make the negative lookups in the hash table more manageable. Signed-off-by: Trond Myklebust --- net/sunrpc/auth.c | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 2bc7bb8..360decd 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -80,6 +80,10 @@ static struct kernel_param_ops param_ops_hashtbl_sz = { module_param_named(auth_hashtable_size, auth_hashbits, hashtbl_sz, 0644); MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size"); +static unsigned long auth_max_cred_cachesize = ULONG_MAX; +module_param(auth_max_cred_cachesize, ulong, 0644); +MODULE_PARM_DESC(auth_max_cred_cachesize, "RPC credential maximum total cache size"); + static u32 pseudoflavor_to_flavor(u32 flavor) { if (flavor > RPC_AUTH_MAXFLAVOR) @@ -481,6 +485,20 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) return freed; } +static unsigned long +rpcauth_cache_do_shrink(int nr_to_scan) +{ + LIST_HEAD(free); + unsigned long freed; + + spin_lock(&rpc_credcache_lock); + freed = rpcauth_prune_expired(&free, nr_to_scan); + spin_unlock(&rpc_credcache_lock); + rpcauth_destroy_credlist(&free); + + return freed; +} + /* * Run memory cache shrinker. */ @@ -488,9 +506,6 @@ static unsigned long rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { - LIST_HEAD(free); - unsigned long freed; - if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL) return SHRINK_STOP; @@ -498,12 +513,7 @@ rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) if (list_empty(&cred_unused)) return SHRINK_STOP; - spin_lock(&rpc_credcache_lock); - freed = rpcauth_prune_expired(&free, sc->nr_to_scan); - spin_unlock(&rpc_credcache_lock); - rpcauth_destroy_credlist(&free); - - return freed; + return rpcauth_cache_do_shrink(sc->nr_to_scan); } static unsigned long @@ -513,6 +523,21 @@ rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; } +static void +rpcauth_cache_enforce_limit(void) +{ + unsigned long diff; + unsigned int nr_to_scan; + + if (number_cred_unused <= auth_max_cred_cachesize) + return; + diff = number_cred_unused - auth_max_cred_cachesize; + nr_to_scan = 100; + if (diff < nr_to_scan) + nr_to_scan = diff; + rpcauth_cache_do_shrink(nr_to_scan); +} + /* * Look up a process' credentials in the authentication cache */ @@ -566,6 +591,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, } else list_add_tail(&new->cr_lru, &free); spin_unlock(&cache->lock); + rpcauth_cache_enforce_limit(); found: if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && cred->cr_ops->cr_init != NULL && -- cgit v1.1 From c5e6aecd034e70e73af4d7a2b8a115239e3568b5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Jul 2014 06:52:19 -0400 Subject: sunrpc: fix RCU handling of gc_ctx field The handling of the gc_ctx pointer only seems to be partially RCU-safe. The assignment and freeing are done using RCU, but many places in the code seem to dereference that pointer without proper RCU safeguards. Fix them to use rcu_dereference and to rcu_read_lock/unlock, and to properly handle the case where the pointer is NULL. Cc: Arnd Bergmann Cc: Paul McKenney Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 52 ++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 17 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 7385431..afb292c 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -183,8 +183,9 @@ gss_cred_get_ctx(struct rpc_cred *cred) struct gss_cl_ctx *ctx = NULL; rcu_read_lock(); - if (gss_cred->gc_ctx) - ctx = gss_get_ctx(gss_cred->gc_ctx); + ctx = rcu_dereference(gss_cred->gc_ctx); + if (ctx) + gss_get_ctx(ctx); rcu_read_unlock(); return ctx; } @@ -1207,13 +1208,13 @@ gss_destroying_context(struct rpc_cred *cred) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); + struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1); struct rpc_task *task; - if (gss_cred->gc_ctx == NULL || - test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) + if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) return 0; - gss_cred->gc_ctx->gc_proc = RPC_GSS_PROC_DESTROY; + ctx->gc_proc = RPC_GSS_PROC_DESTROY; cred->cr_ops = &gss_nullops; /* Take a reference to ensure the cred will be destroyed either @@ -1274,7 +1275,7 @@ gss_destroy_nullcred(struct rpc_cred *cred) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); - struct gss_cl_ctx *ctx = gss_cred->gc_ctx; + struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1); RCU_INIT_POINTER(gss_cred->gc_ctx, NULL); call_rcu(&cred->cr_rcu, gss_free_cred_callback); @@ -1349,20 +1350,30 @@ gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred) static char * gss_stringify_acceptor(struct rpc_cred *cred) { - char *string; + char *string = NULL; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); - struct xdr_netobj *acceptor = &gss_cred->gc_ctx->gc_acceptor; + struct gss_cl_ctx *ctx; + struct xdr_netobj *acceptor; + + rcu_read_lock(); + ctx = rcu_dereference(gss_cred->gc_ctx); + if (!ctx) + goto out; + + acceptor = &ctx->gc_acceptor; /* no point if there's no string */ if (!acceptor->len) - return NULL; + goto out; string = kmalloc(acceptor->len + 1, GFP_KERNEL); if (!string) - return string; + goto out; memcpy(string, acceptor->data, acceptor->len); string[acceptor->len] = '\0'; +out: + rcu_read_unlock(); return string; } @@ -1374,15 +1385,16 @@ static int gss_key_timeout(struct rpc_cred *rc) { struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); + struct gss_cl_ctx *ctx; unsigned long now = jiffies; unsigned long expire; - if (gss_cred->gc_ctx == NULL) - return -EACCES; - - expire = gss_cred->gc_ctx->gc_expiry - (gss_key_expire_timeo * HZ); - - if (time_after(now, expire)) + rcu_read_lock(); + ctx = rcu_dereference(gss_cred->gc_ctx); + if (ctx) + expire = ctx->gc_expiry - (gss_key_expire_timeo * HZ); + rcu_read_unlock(); + if (!ctx || time_after(now, expire)) return -EACCES; return 0; } @@ -1391,13 +1403,19 @@ static int gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags) { struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); + struct gss_cl_ctx *ctx; int ret; if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) goto out; /* Don't match with creds that have expired. */ - if (time_after(jiffies, gss_cred->gc_ctx->gc_expiry)) + rcu_read_lock(); + ctx = rcu_dereference(gss_cred->gc_ctx); + if (!ctx || time_after(jiffies, ctx->gc_expiry)) { + rcu_read_unlock(); return 0; + } + rcu_read_unlock(); if (!test_bit(RPCAUTH_CRED_UPTODATE, &rc->cr_flags)) return 0; out: -- cgit v1.1 From 6ac0fbbfc141f758e2f331d1fee95bc105f87914 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Jul 2014 06:52:20 -0400 Subject: sunrpc: clean up sparse endianness warnings in gss_krb5_seal.c Use u16 pointer in setup_token and setup_token_v2. None of the fields are actually handled as __be16, so this simplifies the code a bit. Also get rid of some unneeded pointer increments. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/gss_krb5_seal.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 62ae327..42768e5 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -70,31 +70,37 @@ DEFINE_SPINLOCK(krb5_seq_lock); -static char * +static void * setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token) { - __be16 *ptr, *krb5_hdr; + u16 *ptr; + void *krb5_hdr; int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength; token->len = g_token_size(&ctx->mech_used, body_size); - ptr = (__be16 *)token->data; + ptr = (u16 *)token->data; g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr); /* ptr now at start of header described in rfc 1964, section 1.2.1: */ krb5_hdr = ptr; *ptr++ = KG_TOK_MIC_MSG; - *ptr++ = cpu_to_le16(ctx->gk5e->signalg); + /* + * signalg is stored as if it were converted from LE to host endian, even + * though it's an opaque pair of bytes according to the RFC. + */ + *ptr++ = (__force u16)cpu_to_le16(ctx->gk5e->signalg); *ptr++ = SEAL_ALG_NONE; - *ptr++ = 0xffff; + *ptr = 0xffff; - return (char *)krb5_hdr; + return krb5_hdr; } static void * setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token) { - __be16 *ptr, *krb5_hdr; + u16 *ptr; + void *krb5_hdr; u8 *p, flags = 0x00; if ((ctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0) @@ -104,15 +110,15 @@ setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token) /* Per rfc 4121, sec 4.2.6.1, there is no header, * just start the token */ - krb5_hdr = ptr = (__be16 *)token->data; + krb5_hdr = ptr = (u16 *)token->data; *ptr++ = KG2_TOK_MIC; p = (u8 *)ptr; *p++ = flags; *p++ = 0xff; - ptr = (__be16 *)p; - *ptr++ = 0xffff; + ptr = (u16 *)p; *ptr++ = 0xffff; + *ptr = 0xffff; token->len = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength; return krb5_hdr; @@ -181,7 +187,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text, spin_lock(&krb5_seq_lock); seq_send = ctx->seq_send64++; spin_unlock(&krb5_seq_lock); - *((u64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send); + *((__be64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send); if (ctx->initiate) { cksumkey = ctx->initiator_sign; -- cgit v1.1 From b36e9c44af00285ea33e78ce8ac91da3d8ac74da Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Jul 2014 06:52:21 -0400 Subject: sunrpc: clean up sparse endianness warnings in gss_krb5_wrap.c Fix the endianness handling in gss_wrap_kerberos_v1 and drop the memset call there in favor of setting the filler bytes directly. In gss_wrap_kerberos_v2, get rid of the "ec" variable which is always zero, and drop the endianness conversion of 0. Sparse handles 0 as a special case, so it's not necessary. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/gss_krb5_wrap.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 42560e5..88cd24a 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -201,9 +201,15 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset, msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength; - *(__be16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg); - memset(ptr + 4, 0xff, 4); - *(__be16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg); + /* + * signalg and sealalg are stored as if they were converted from LE + * to host endian, even though they're opaque pairs of bytes according + * to the RFC. + */ + *(__le16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg); + *(__le16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg); + ptr[6] = 0xff; + ptr[7] = 0xff; gss_krb5_make_confounder(msg_start, conflen); @@ -438,7 +444,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, u8 *ptr, *plainhdr; s32 now; u8 flags = 0x00; - __be16 *be16ptr, ec = 0; + __be16 *be16ptr; __be64 *be64ptr; u32 err; @@ -468,16 +474,16 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, be16ptr = (__be16 *)ptr; blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc); - *be16ptr++ = cpu_to_be16(ec); + *be16ptr++ = 0; /* "inner" token header always uses 0 for RRC */ - *be16ptr++ = cpu_to_be16(0); + *be16ptr++ = 0; be64ptr = (__be64 *)be16ptr; spin_lock(&krb5_seq_lock); *be64ptr = cpu_to_be64(kctx->seq_send64++); spin_unlock(&krb5_seq_lock); - err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, ec, pages); + err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, 0, pages); if (err) return err; -- cgit v1.1 From ec25422c669d38f4e8a83da7f77950094349de48 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Jul 2014 06:52:22 -0400 Subject: sunrpc: remove "ec" argument from encrypt_v2 operation It's always 0. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/gss_krb5_crypto.c | 9 ++------- net/sunrpc/auth_gss/gss_krb5_wrap.c | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 0f43e89..f5ed9f6 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -641,7 +641,7 @@ out: u32 gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, - struct xdr_buf *buf, int ec, struct page **pages) + struct xdr_buf *buf, struct page **pages) { u32 err; struct xdr_netobj hmac; @@ -684,13 +684,8 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, ecptr = buf->tail[0].iov_base; } - memset(ecptr, 'X', ec); - buf->tail[0].iov_len += ec; - buf->len += ec; - /* copy plaintext gss token header after filler (if any) */ - memcpy(ecptr + ec, buf->head[0].iov_base + offset, - GSS_KRB5_TOK_HDR_LEN); + memcpy(ecptr, buf->head[0].iov_base + offset, GSS_KRB5_TOK_HDR_LEN); buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN; buf->len += GSS_KRB5_TOK_HDR_LEN; diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 88cd24a..4b614c6 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -483,7 +483,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, *be64ptr = cpu_to_be64(kctx->seq_send64++); spin_unlock(&krb5_seq_lock); - err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, 0, pages); + err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages); if (err) return err; -- cgit v1.1 From bd95608053b7f7813351b0defc0e3e7ef8cf2803 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 14 Jul 2014 11:28:20 +1000 Subject: sunrpc/auth: allow lockless (rcu) lookup of credential cache. The new flag RPCAUTH_LOOKUP_RCU to credential lookup avoids locking, does not take a reference on the returned credential, and returns -ECHILD if a simple lookup was not possible. The returned value can only be used within an rcu_read_lock protected region. The main user of this is the new rpc_lookup_cred_nonblock() which returns a pointer to the current credential which is only rcu-safe (no ref-count held), and might return -ECHILD if allocation was required. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/auth.c | 17 +++++++++++++++-- net/sunrpc/auth_generic.c | 6 ++++++ net/sunrpc/auth_null.c | 2 ++ 3 files changed, 23 insertions(+), 2 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 360decd..24fcbd2 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -557,6 +557,12 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) { if (!entry->cr_ops->crmatch(acred, entry, flags)) continue; + if (flags & RPCAUTH_LOOKUP_RCU) { + if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) && + !test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags)) + cred = entry; + break; + } spin_lock(&cache->lock); if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) { spin_unlock(&cache->lock); @@ -571,6 +577,9 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, if (cred != NULL) goto found; + if (flags & RPCAUTH_LOOKUP_RCU) + return ERR_PTR(-ECHILD); + new = auth->au_ops->crcreate(auth, acred, flags); if (IS_ERR(new)) { cred = new; @@ -621,10 +630,14 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) memset(&acred, 0, sizeof(acred)); acred.uid = cred->fsuid; acred.gid = cred->fsgid; - acred.group_info = get_group_info(((struct cred *)cred)->group_info); + if (flags & RPCAUTH_LOOKUP_RCU) + acred.group_info = rcu_dereference(cred->group_info); + else + acred.group_info = get_group_info(((struct cred *)cred)->group_info); ret = auth->au_ops->lookup_cred(auth, &acred, flags); - put_group_info(acred.group_info); + if (!(flags & RPCAUTH_LOOKUP_RCU)) + put_group_info(acred.group_info); return ret; } EXPORT_SYMBOL_GPL(rpcauth_lookupcred); diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index ed04869..6f6b829 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -38,6 +38,12 @@ struct rpc_cred *rpc_lookup_cred(void) } EXPORT_SYMBOL_GPL(rpc_lookup_cred); +struct rpc_cred *rpc_lookup_cred_nonblock(void) +{ + return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU); +} +EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock); + /* * Public call interface for looking up machine creds. */ diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index f0ebe07..712c123 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -35,6 +35,8 @@ nul_destroy(struct rpc_auth *auth) static struct rpc_cred * nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { + if (flags & RPCAUTH_LOOKUP_RCU) + return &null_cred; return get_rpccred(&null_cred); } -- cgit v1.1 From 122a8cda6a9a3594c4737570f5cd81636bcd20cf Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 4 Aug 2014 16:24:00 +1000 Subject: SUNRPC: remove all refcounting of groupinfo from rpcauth_lookupcred current_cred() can only be changed by 'current', and cred->group_info is never changed. If a new group_info is needed, a new 'cred' is created. Consequently it is always safe to access current_cred()->group_info without taking any further references. So drop the refcounting and the incorrect rcu_dereference(). Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/auth.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 24fcbd2..383eb91 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -630,14 +630,8 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) memset(&acred, 0, sizeof(acred)); acred.uid = cred->fsuid; acred.gid = cred->fsgid; - if (flags & RPCAUTH_LOOKUP_RCU) - acred.group_info = rcu_dereference(cred->group_info); - else - acred.group_info = get_group_info(((struct cred *)cred)->group_info); - + acred.group_info = cred->group_info; ret = auth->au_ops->lookup_cred(auth, &acred, flags); - if (!(flags & RPCAUTH_LOOKUP_RCU)) - put_group_info(acred.group_info); return ret; } EXPORT_SYMBOL_GPL(rpcauth_lookupcred); -- cgit v1.1