summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authortrasz <trasz@FreeBSD.org>2014-05-07 07:17:11 +0000
committertrasz <trasz@FreeBSD.org>2014-05-07 07:17:11 +0000
commitc4225c1e15340b8af5a6b3e11c37d219f768de29 (patch)
tree6e842c5e97877d8d35fc7071c437ca42b28b4774
parent339da02d0db75f2827d040987b3cf3626c7f6b7c (diff)
downloadFreeBSD-src-c4225c1e15340b8af5a6b3e11c37d219f768de29.zip
FreeBSD-src-c4225c1e15340b8af5a6b3e11c37d219f768de29.tar.gz
MFC r264122:
Rework the iSCSI PDU transmit code to avoid lock contention and coalesce PDUs before sending. Sponsored by: The FreeBSD Foundation
-rw-r--r--sys/dev/iscsi/icl.c205
-rw-r--r--sys/dev/iscsi/icl.h1
2 files changed, 169 insertions, 37 deletions
diff --git a/sys/dev/iscsi/icl.c b/sys/dev/iscsi/icl.c
index d40f1ff..b53edaa 100644
--- a/sys/dev/iscsi/icl.c
+++ b/sys/dev/iscsi/icl.c
@@ -63,6 +63,10 @@ static int debug = 1;
TUNABLE_INT("kern.icl.debug", &debug);
SYSCTL_INT(_kern_icl, OID_AUTO, debug, CTLFLAG_RWTUN,
&debug, 1, "Enable debug messages");
+static int coalesce = 1;
+TUNABLE_INT("kern.icl.coalesce", &coalesce);
+SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN,
+ &coalesce, 1, "Try to coalesce PDUs before sending");
static int partial_receive_len = 1 * 1024; /* XXX: More? */
TUNABLE_INT("kern.icl.partial_receive_len", &partial_receive_len);
SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
@@ -769,18 +773,14 @@ icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
}
static int
-icl_pdu_send(struct icl_pdu *request)
+icl_pdu_finalize(struct icl_pdu *request)
{
size_t padding, pdu_len;
uint32_t digest, zero = 0;
- int error, ok;
- struct socket *so;
+ int ok;
struct icl_conn *ic;
ic = request->ip_conn;
- so = request->ip_conn->ic_socket;
-
- ICL_CONN_LOCK_ASSERT(ic);
icl_pdu_set_data_segment_length(request, request->ip_data_len);
@@ -813,7 +813,7 @@ icl_pdu_send(struct icl_pdu *request)
ok = m_append(request->ip_data_mbuf, sizeof(digest),
(void *)&digest);
if (ok != 1) {
- ICL_WARN("failed to append header digest");
+ ICL_WARN("failed to append data digest");
return (1);
}
}
@@ -824,64 +824,114 @@ icl_pdu_send(struct icl_pdu *request)
request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
- error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
- NULL, MSG_DONTWAIT, curthread);
- request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
- if (error != 0) {
- ICL_DEBUG("sosend error %d", error);
- return (error);
- }
-
return (0);
}
static void
-icl_conn_send_pdus(struct icl_conn *ic)
+icl_conn_send_pdus(struct icl_conn *ic, void *fts)
{
- struct icl_pdu *request;
+ STAILQ_HEAD(, icl_pdu) *queue = fts; /* XXX */
+ struct icl_pdu *request, *request2;
struct socket *so;
- size_t available, size;
- int error;
+ size_t available, size, size2;
+ int coalesced, error;
- ICL_CONN_LOCK_ASSERT(ic);
+ ICL_CONN_LOCK_ASSERT_NOT(ic);
so = ic->ic_socket;
SOCKBUF_LOCK(&so->so_snd);
+ /*
+ * Check how much space do we have for transmit. We can't just
+ * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE,
+ * as it always frees the mbuf chain passed to it, even in case
+ * of error.
+ */
available = sbspace(&so->so_snd);
+
+ /*
+ * Notify the socket layer that it doesn't need to call
+ * send socket upcall for the time being.
+ */
+ so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
SOCKBUF_UNLOCK(&so->so_snd);
- while (!STAILQ_EMPTY(&ic->ic_to_send)) {
+ while (!STAILQ_EMPTY(queue)) {
if (ic->ic_disconnecting)
return;
-
- request = STAILQ_FIRST(&ic->ic_to_send);
+ request = STAILQ_FIRST(queue);
size = icl_pdu_size(request);
if (available < size) {
+#if 1
+ ICL_DEBUG("no space to send; "
+ "have %zd, need %zd",
+ available, size);
+#endif
+
/*
* Set the low watermark on the socket,
- * to avoid waking up until there is enough
- * space.
+ * to avoid unneccessary wakeups until there
+ * is enough space for the PDU to fit.
*/
SOCKBUF_LOCK(&so->so_snd);
so->so_snd.sb_lowat = size;
SOCKBUF_UNLOCK(&so->so_snd);
-#if 1
- ICL_DEBUG("no space to send; "
- "have %zd, need %zd",
- available, size);
-#endif
return;
}
- available -= size;
- STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next);
- error = icl_pdu_send(request);
+ STAILQ_REMOVE_HEAD(queue, ip_next);
+ error = icl_pdu_finalize(request);
if (error != 0) {
- ICL_DEBUG("failed to send PDU; "
+ ICL_DEBUG("failed to finalize PDU; "
"dropping connection");
icl_conn_fail(ic);
+ icl_pdu_free(request);
return;
- }
+ }
+ if (coalesce) {
+ coalesced = 1;
+ for (;;) {
+ request2 = STAILQ_FIRST(queue);
+ if (request2 == NULL)
+ break;
+ size2 = icl_pdu_size(request2);
+ if (available < size + size2)
+ break;
+ STAILQ_REMOVE_HEAD(queue, ip_next);
+ error = icl_pdu_finalize(request2);
+ if (error != 0) {
+ ICL_DEBUG("failed to finalize PDU; "
+ "dropping connection");
+ icl_conn_fail(ic);
+ icl_pdu_free(request);
+ icl_pdu_free(request2);
+ return;
+ }
+ m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf);
+ request2->ip_bhs_mbuf = NULL;
+ request->ip_bhs_mbuf->m_pkthdr.len += size2;
+ size += size2;
+ STAILQ_REMOVE_AFTER(queue, request, ip_next);
+ icl_pdu_free(request2);
+ coalesced++;
+ }
+#if 0
+ if (coalesced > 1) {
+ ICL_DEBUG("coalesced %d PDUs into %zd bytes",
+ coalesced, size);
+ }
+#endif
+ }
+ available -= size;
+ error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
+ NULL, MSG_DONTWAIT, curthread);
+ request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
+ if (error != 0) {
+ ICL_DEBUG("failed to send PDU, error %d; "
+ "dropping connection", error);
+ icl_conn_fail(ic);
+ icl_pdu_free(request);
+ return;
+ }
icl_pdu_free(request);
}
}
@@ -890,9 +940,12 @@ static void
icl_send_thread(void *arg)
{
struct icl_conn *ic;
+ STAILQ_HEAD(, icl_pdu) queue;
ic = arg;
+ STAILQ_INIT(&queue);
+
ICL_CONN_LOCK(ic);
ic->ic_send_running = true;
@@ -901,10 +954,54 @@ icl_send_thread(void *arg)
//ICL_DEBUG("terminating");
break;
}
- icl_conn_send_pdus(ic);
+
+ for (;;) {
+ /*
+ * If the local queue is empty, populate it from
+ * the main one. This way the icl_conn_send_pdus()
+ * can go through all the queued PDUs without holding
+ * any locks.
+ */
+ if (STAILQ_EMPTY(&queue))
+ STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu);
+
+ ic->ic_check_send_space = false;
+ ICL_CONN_UNLOCK(ic);
+ icl_conn_send_pdus(ic, &queue);
+ ICL_CONN_LOCK(ic);
+
+ /*
+ * The icl_soupcall_send() was called since the last
+ * call to sbspace(); go around;
+ */
+ if (ic->ic_check_send_space)
+ continue;
+
+ /*
+ * Local queue is empty, but we still have PDUs
+ * in the main one; go around.
+ */
+ if (STAILQ_EMPTY(&queue) &&
+ !STAILQ_EMPTY(&ic->ic_to_send))
+ continue;
+
+ /*
+ * There might be some stuff in the local queue,
+ * which didn't get sent due to not having enough send
+ * space. Wait for socket upcall.
+ */
+ break;
+ }
+
cv_wait(&ic->ic_send_cv, ic->ic_lock);
}
+ /*
+ * We're exiting; move PDUs back to the main queue, so they can
+ * get freed properly. At this point ordering doesn't matter.
+ */
+ STAILQ_CONCAT(&ic->ic_to_send, &queue);
+
ic->ic_send_running = false;
ICL_CONN_UNLOCK(ic);
kthread_exit();
@@ -916,12 +1013,19 @@ icl_soupcall_send(struct socket *so, void *arg, int waitflag)
struct icl_conn *ic;
ic = arg;
+
+ ICL_CONN_LOCK(ic);
+ ic->ic_check_send_space = true;
+ ICL_CONN_UNLOCK(ic);
+
cv_signal(&ic->ic_send_cv);
+
return (SU_OK);
}
int
-icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len, int flags)
+icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len,
+ int flags)
{
struct mbuf *mb, *newmb;
size_t copylen, off = 0;
@@ -974,6 +1078,17 @@ icl_pdu_queue(struct icl_pdu *ip)
icl_pdu_free(ip);
return;
}
+
+ if (!STAILQ_EMPTY(&ic->ic_to_send)) {
+ STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
+ /*
+ * If the queue is not empty, someone else had already
+ * signaled the send thread; no need to do that again,
+ * just return.
+ */
+ return;
+ }
+
STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next);
cv_signal(&ic->ic_send_cv);
}
@@ -1185,6 +1300,20 @@ icl_conn_close(struct icl_conn *ic)
return;
}
+ /*
+ * Deregister socket upcalls.
+ */
+ ICL_CONN_UNLOCK(ic);
+ SOCKBUF_LOCK(&ic->ic_socket->so_snd);
+ if (ic->ic_socket->so_snd.sb_upcall != NULL)
+ soupcall_clear(ic->ic_socket, SO_SND);
+ SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
+ SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
+ if (ic->ic_socket->so_rcv.sb_upcall != NULL)
+ soupcall_clear(ic->ic_socket, SO_RCV);
+ SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
+ ICL_CONN_LOCK(ic);
+
ic->ic_disconnecting = true;
/*
@@ -1202,7 +1331,9 @@ icl_conn_close(struct icl_conn *ic)
}
//ICL_DEBUG("send/receive threads terminated");
+ ICL_CONN_UNLOCK(ic);
soclose(ic->ic_socket);
+ ICL_CONN_LOCK(ic);
ic->ic_socket = NULL;
if (ic->ic_receive_pdu != NULL) {
diff --git a/sys/dev/iscsi/icl.h b/sys/dev/iscsi/icl.h
index fd4abae..d5f5aa2 100644
--- a/sys/dev/iscsi/icl.h
+++ b/sys/dev/iscsi/icl.h
@@ -80,6 +80,7 @@ struct icl_conn {
volatile u_int ic_outstanding_pdus;
#endif
STAILQ_HEAD(, icl_pdu) ic_to_send;
+ bool ic_check_send_space;
size_t ic_receive_len;
int ic_receive_state;
struct icl_pdu *ic_receive_pdu;
OpenPOWER on IntegriCloud