summaryrefslogtreecommitdiffstats
path: root/net/sunrpc/xprtrdma/backchannel.c
blob: ffc4853a068e0a6080bde8a03bd38c62f73c01ed (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
/*
 * Copyright (c) 2015 Oracle.  All rights reserved.
 *
 * Support for backward direction RPCs on RPC/RDMA.
 */

#include <linux/module.h>

#include "xprt_rdma.h"

#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY	RPCDBG_TRANS
#endif

static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
				 struct rpc_rqst *rqst)
{
	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);

	spin_lock(&buf->rb_reqslock);
	list_del(&req->rl_all);
	spin_unlock(&buf->rb_reqslock);

	rpcrdma_destroy_req(&r_xprt->rx_ia, req);

	kfree(rqst);
}

static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
				 struct rpc_rqst *rqst)
{
	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
	struct rpcrdma_regbuf *rb;
	struct rpcrdma_req *req;
	struct xdr_buf *buf;
	size_t size;

	req = rpcrdma_create_req(r_xprt);
	if (!req)
		return -ENOMEM;
	req->rl_backchannel = true;

	size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
	rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL);
	if (IS_ERR(rb))
		goto out_fail;
	req->rl_rdmabuf = rb;

	size += RPCRDMA_INLINE_READ_THRESHOLD(rqst);
	rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL);
	if (IS_ERR(rb))
		goto out_fail;
	rb->rg_owner = req;
	req->rl_sendbuf = rb;
	/* so that rpcr_to_rdmar works when receiving a request */
	rqst->rq_buffer = (void *)req->rl_sendbuf->rg_base;

	buf = &rqst->rq_snd_buf;
	buf->head[0].iov_base = rqst->rq_buffer;
	buf->head[0].iov_len = 0;
	buf->tail[0].iov_base = NULL;
	buf->tail[0].iov_len = 0;
	buf->page_len = 0;
	buf->len = 0;
	buf->buflen = size;

	return 0;

out_fail:
	rpcrdma_bc_free_rqst(r_xprt, rqst);
	return -ENOMEM;
}

/* Allocate and add receive buffers to the rpcrdma_buffer's
 * existing list of rep's. These are released when the
 * transport is destroyed.
 */
static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
				 unsigned int count)
{
	struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
	struct rpcrdma_rep *rep;
	unsigned long flags;
	int rc = 0;

	while (count--) {
		rep = rpcrdma_create_rep(r_xprt);
		if (IS_ERR(rep)) {
			pr_err("RPC:       %s: reply buffer alloc failed\n",
			       __func__);
			rc = PTR_ERR(rep);
			break;
		}

		spin_lock_irqsave(&buffers->rb_lock, flags);
		list_add(&rep->rr_list, &buffers->rb_recv_bufs);
		spin_unlock_irqrestore(&buffers->rb_lock, flags);
	}

	return rc;
}

/**
 * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests
 * @xprt: transport associated with these backchannel resources
 * @reqs: number of concurrent incoming requests to expect
 *
 * Returns 0 on success; otherwise a negative errno
 */
int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
{
	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
	struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
	struct rpc_rqst *rqst;
	unsigned int i;
	int rc;

	/* The backchannel reply path returns each rpc_rqst to the
	 * bc_pa_list _after_ the reply is sent. If the server is
	 * faster than the client, it can send another backward
	 * direction request before the rpc_rqst is returned to the
	 * list. The client rejects the request in this case.
	 *
	 * Twice as many rpc_rqsts are prepared to ensure there is
	 * always an rpc_rqst available as soon as a reply is sent.
	 */
	if (reqs > RPCRDMA_BACKWARD_WRS >> 1)
		goto out_err;

	for (i = 0; i < (reqs << 1); i++) {
		rqst = kzalloc(sizeof(*rqst), GFP_KERNEL);
		if (!rqst) {
			pr_err("RPC:       %s: Failed to create bc rpc_rqst\n",
			       __func__);
			goto out_free;
		}

		rqst->rq_xprt = &r_xprt->rx_xprt;
		INIT_LIST_HEAD(&rqst->rq_list);
		INIT_LIST_HEAD(&rqst->rq_bc_list);

		if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
			goto out_free;

		spin_lock_bh(&xprt->bc_pa_lock);
		list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
		spin_unlock_bh(&xprt->bc_pa_lock);
	}

	rc = rpcrdma_bc_setup_reps(r_xprt, reqs);
	if (rc)
		goto out_free;

	rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs);
	if (rc)
		goto out_free;

	buffer->rb_bc_srv_max_requests = reqs;
	request_module("svcrdma");

	return 0;

out_free:
	xprt_rdma_bc_destroy(xprt, reqs);

out_err:
	pr_err("RPC:       %s: setup backchannel transport failed\n", __func__);
	return -ENOMEM;
}

/**
 * rpcrdma_bc_marshal_reply - Send backwards direction reply
 * @rqst: buffer containing RPC reply data
 *
 * Returns zero on success.
 */
int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
{
	struct rpc_xprt *xprt = rqst->rq_xprt;
	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
	struct rpcrdma_msg *headerp;
	size_t rpclen;

	headerp = rdmab_to_msg(req->rl_rdmabuf);
	headerp->rm_xid = rqst->rq_xid;
	headerp->rm_vers = rpcrdma_version;
	headerp->rm_credit =
			cpu_to_be32(r_xprt->rx_buf.rb_bc_srv_max_requests);
	headerp->rm_type = rdma_msg;
	headerp->rm_body.rm_chunks[0] = xdr_zero;
	headerp->rm_body.rm_chunks[1] = xdr_zero;
	headerp->rm_body.rm_chunks[2] = xdr_zero;

	rpclen = rqst->rq_svec[0].iov_len;

	pr_info("RPC:       %s: rpclen %zd headerp 0x%p lkey 0x%x\n",
		__func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf));
	pr_info("RPC:       %s: RPC/RDMA: %*ph\n",
		__func__, (int)RPCRDMA_HDRLEN_MIN, headerp);
	pr_info("RPC:       %s:      RPC: %*ph\n",
		__func__, (int)rpclen, rqst->rq_svec[0].iov_base);

	req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf);
	req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN;
	req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf);

	req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf);
	req->rl_send_iov[1].length = rpclen;
	req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf);

	req->rl_niovs = 2;
	return 0;
}

/**
 * xprt_rdma_bc_destroy - Release resources for handling backchannel requests
 * @xprt: transport associated with these backchannel resources
 * @reqs: number of incoming requests to destroy; ignored
 */
void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
{
	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
	struct rpc_rqst *rqst, *tmp;

	spin_lock_bh(&xprt->bc_pa_lock);
	list_for_each_entry_safe(rqst, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
		list_del(&rqst->rq_bc_pa_list);
		spin_unlock_bh(&xprt->bc_pa_lock);

		rpcrdma_bc_free_rqst(r_xprt, rqst);

		spin_lock_bh(&xprt->bc_pa_lock);
	}
	spin_unlock_bh(&xprt->bc_pa_lock);
}

/**
 * xprt_rdma_bc_free_rqst - Release a backchannel rqst
 * @rqst: request to release
 */
void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
{
	struct rpc_xprt *xprt = rqst->rq_xprt;

	smp_mb__before_atomic();
	WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
	clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
	smp_mb__after_atomic();

	spin_lock_bh(&xprt->bc_pa_lock);
	list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
	spin_unlock_bh(&xprt->bc_pa_lock);
}
OpenPOWER on IntegriCloud