summaryrefslogtreecommitdiffstats
path: root/sys/contrib
diff options
context:
space:
mode:
authornp <np@FreeBSD.org>2013-10-21 06:31:56 +0000
committernp <np@FreeBSD.org>2013-10-21 06:31:56 +0000
commit71c5e06cccd7a1b993d32b76a3f872129a5c7c8f (patch)
tree271efc59905eaa4992ce14482d6cd52bb8b71443 /sys/contrib
parent82b4688a6700a96d56f6a5c69264de9ce70dc6bb (diff)
downloadFreeBSD-src-71c5e06cccd7a1b993d32b76a3f872129a5c7c8f.zip
FreeBSD-src-71c5e06cccd7a1b993d32b76a3f872129a5c7c8f.tar.gz
MFC r256470:
Update krping to the latest upstream code. Move all the FreeBSD specific parts to krping_dev.c, which leaves the other files as close to their upstream versions as possible. Approved by: re (glebius)
Diffstat (limited to 'sys/contrib')
-rw-r--r--sys/contrib/rdma/krping/getopt.c17
-rw-r--r--sys/contrib/rdma/krping/krping.c1589
-rw-r--r--sys/contrib/rdma/krping/krping.h136
-rw-r--r--sys/contrib/rdma/krping/krping_dev.c139
4 files changed, 1173 insertions, 708 deletions
diff --git a/sys/contrib/rdma/krping/getopt.c b/sys/contrib/rdma/krping/getopt.c
index 701910e..24e62e1 100644
--- a/sys/contrib/rdma/krping/getopt.c
+++ b/sys/contrib/rdma/krping/getopt.c
@@ -5,9 +5,10 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <sys/ctype.h>
-#include <sys/param.h>
-#include <sys/libkern.h>
+#include <sys/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+
#include "getopt.h"
/**
@@ -49,29 +50,29 @@ int krping_getopt(const char *caller, char **options,
if (opts->has_arg & OPT_NOPARAM) {
return opts->val;
}
- printf("%s: the %s option requires "
+ printk(KERN_INFO "%s: the %s option requires "
"an argument\n", caller, token);
return -EINVAL;
}
if (opts->has_arg & OPT_INT) {
char* v;
- *value = strtoul(val, &v, 0);
+ *value = simple_strtoul(val, &v, 0);
if (!*v) {
return opts->val;
}
- printf("%s: invalid numeric value "
+ printk(KERN_INFO "%s: invalid numeric value "
"in %s=%s\n", caller, token, val);
return -EDOM;
}
if (opts->has_arg & OPT_STRING) {
return opts->val;
}
- printf("%s: unexpected argument %s to the "
+ printk(KERN_INFO "%s: unexpected argument %s to the "
"%s option\n", caller, val, token);
return -EINVAL;
}
}
- printf("%s: Unrecognized option %s\n", caller, token);
+ printk(KERN_INFO "%s: Unrecognized option %s\n", caller, token);
return -EOPNOTSUPP;
}
diff --git a/sys/contrib/rdma/krping/krping.c b/sys/contrib/rdma/krping/krping.c
index c0acf0c..1aed101 100644
--- a/sys/contrib/rdma/krping/krping.c
+++ b/sys/contrib/rdma/krping/krping.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2005 Ammasso, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
+ * Copyright (c) 2006-2009 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -34,39 +34,52 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <sys/ctype.h>
-
-#include <sys/param.h>
-#include <sys/condvar.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/socket.h>
-#include <sys/endian.h>
-#include <sys/limits.h>
-#include <sys/proc.h>
-#include <sys/signalvar.h>
-
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/rwlock.h>
-#include <sys/queue.h>
-#include <sys/taskqueue.h>
-#include <sys/syslog.h>
-#include <netinet/in.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-
-#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/inet.h>
+#include <linux/list.h>
+#include <linux/in.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <asm/system.h>
+
+#include <asm/atomic.h>
+
+#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
-#include "getopt.h"
#include "krping.h"
+#include "getopt.h"
+
+extern int krping_debug;
+#define DEBUG_LOG(cb, x...) if (krping_debug) krping_printf((cb)->cookie, x)
+#define PRINTF(cb, x...) krping_printf((cb)->cookie, x)
+
+MODULE_AUTHOR("Steve Wise");
+MODULE_DESCRIPTION("RDMA ping client/server");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static __inline uint64_t
+get_cycles(void)
+{
+ uint32_t low, high;
+ __asm __volatile("rdtsc" : "=a" (low), "=d" (high));
+ return (low | ((u_int64_t)high << 32));
+}
-#define PFX "krping: "
+typedef uint64_t cycles_t;
-static int debug = 0;
-#define DEBUG_LOG if (debug) printf
+enum mem_type {
+ DMA = 1,
+ FASTREG = 2,
+ MW = 3,
+ MR = 4
+};
static const struct krping_option krping_opts[] = {
{"count", OPT_INT, 'C'},
@@ -77,23 +90,29 @@ static const struct krping_option krping_opts[] = {
{"validate", OPT_NOPARAM, 'V'},
{"server", OPT_NOPARAM, 's'},
{"client", OPT_NOPARAM, 'c'},
- {"dmamr", OPT_NOPARAM, 'D'},
- {"debug", OPT_NOPARAM, 'd'},
- {"wlat", OPT_NOPARAM, 'l'},
- {"rlat", OPT_NOPARAM, 'L'},
- {"bw", OPT_NOPARAM, 'B'},
- {"tx-depth", OPT_INT, 't'},
- {"poll", OPT_NOPARAM, 'P'},
- {"memlimit", OPT_INT, 'm'},
+ {"mem_mode", OPT_STRING, 'm'},
+ {"server_inv", OPT_NOPARAM, 'I'},
+ {"wlat", OPT_NOPARAM, 'l'},
+ {"rlat", OPT_NOPARAM, 'L'},
+ {"bw", OPT_NOPARAM, 'B'},
+ {"duplex", OPT_NOPARAM, 'd'},
+ {"txdepth", OPT_INT, 'T'},
+ {"poll", OPT_NOPARAM, 'P'},
+ {"local_dma_lkey", OPT_NOPARAM, 'Z'},
+ {"read_inv", OPT_NOPARAM, 'R'},
+ {"fr", OPT_NOPARAM, 'f'},
{NULL, 0, 0}
};
-struct mtx krping_mutex;
+#define htonll(x) cpu_to_be64((x))
+#define ntohll(x) cpu_to_be64((x))
+
+static struct mutex krping_mutex;
/*
* List of running krping threads.
*/
-struct krping_cb_list krping_cbs;
+static LIST_HEAD(krping_cbs);
/*
* krping "ping/pong" loop:
@@ -109,24 +128,118 @@ struct krping_cb_list krping_cbs;
*/
/*
+ * These states are used to signal events between the completion handler
+ * and the main client or server thread.
+ *
+ * Once CONNECTED, they cycle through RDMA_READ_ADV, RDMA_WRITE_ADV,
+ * and RDMA_WRITE_COMPLETE for each ping.
+ */
+enum test_state {
+ IDLE = 1,
+ CONNECT_REQUEST,
+ ADDR_RESOLVED,
+ ROUTE_RESOLVED,
+ CONNECTED,
+ RDMA_READ_ADV,
+ RDMA_READ_COMPLETE,
+ RDMA_WRITE_ADV,
+ RDMA_WRITE_COMPLETE,
+ ERROR
+};
+
+struct krping_rdma_info {
+ uint64_t buf;
+ uint32_t rkey;
+ uint32_t size;
+};
+
+/*
* Default max buffer size for IO...
*/
#define RPING_BUFSIZE 128*1024
-#define RPING_SQ_DEPTH 32
+#define RPING_SQ_DEPTH 64
-static void krping_wait(struct krping_cb *cb, int state)
-{
- int rc;
- mtx_lock(&cb->lock);
- while (cb->state < state) {
- rc = msleep(cb, &cb->lock, PCATCH, "krping", 0);
- if (rc && rc != ERESTART) {
- cb->state = ERROR;
- break;
- }
- }
- mtx_unlock(&cb->lock);
-}
+/*
+ * Control block struct.
+ */
+struct krping_cb {
+ void *cookie;
+ int server; /* 0 iff client */
+ struct ib_cq *cq;
+ struct ib_pd *pd;
+ struct ib_qp *qp;
+
+ enum mem_type mem;
+ struct ib_mr *dma_mr;
+
+ struct ib_fast_reg_page_list *page_list;
+ int page_list_len;
+ struct ib_send_wr fastreg_wr;
+ struct ib_send_wr invalidate_wr;
+ struct ib_mr *fastreg_mr;
+ int server_invalidate;
+ int read_inv;
+ u8 key;
+
+ struct ib_mw *mw;
+ struct ib_mw_bind bind_attr;
+
+ struct ib_recv_wr rq_wr; /* recv work request record */
+ struct ib_sge recv_sgl; /* recv single SGE */
+ struct krping_rdma_info recv_buf;/* malloc'd buffer */
+ u64 recv_dma_addr;
+ DECLARE_PCI_UNMAP_ADDR(recv_mapping)
+ struct ib_mr *recv_mr;
+
+ struct ib_send_wr sq_wr; /* send work requrest record */
+ struct ib_sge send_sgl;
+ struct krping_rdma_info send_buf;/* single send buf */
+ u64 send_dma_addr;
+ DECLARE_PCI_UNMAP_ADDR(send_mapping)
+ struct ib_mr *send_mr;
+
+ struct ib_send_wr rdma_sq_wr; /* rdma work request record */
+ struct ib_sge rdma_sgl; /* rdma single SGE */
+ char *rdma_buf; /* used as rdma sink */
+ u64 rdma_dma_addr;
+ DECLARE_PCI_UNMAP_ADDR(rdma_mapping)
+ struct ib_mr *rdma_mr;
+
+ uint32_t remote_rkey; /* remote guys RKEY */
+ uint64_t remote_addr; /* remote guys TO */
+ uint32_t remote_len; /* remote guys LEN */
+
+ char *start_buf; /* rdma read src */
+ u64 start_dma_addr;
+ DECLARE_PCI_UNMAP_ADDR(start_mapping)
+ struct ib_mr *start_mr;
+
+ enum test_state state; /* used for cond/signalling */
+ wait_queue_head_t sem;
+ struct krping_stats stats;
+
+ uint16_t port; /* dst port in NBO */
+ struct in_addr addr; /* dst addr in NBO */
+ char *addr_str; /* dst addr string */
+ int verbose; /* verbose logging */
+ int count; /* ping count */
+ int size; /* ping data size */
+ int validate; /* validate ping data */
+ int wlat; /* run wlat test */
+ int rlat; /* run rlat test */
+ int bw; /* run bw test */
+ int duplex; /* run bw full duplex test */
+ int poll; /* poll or block for rlat test */
+ int txdepth; /* SQ depth */
+ int local_dma_lkey; /* use 0 for lkey */
+ int frtest; /* fastreg test */
+
+ /* CM stuff */
+ struct rdma_cm_id *cm_id; /* connection on client side,*/
+ /* listener on server side. */
+ struct rdma_cm_id *child_cm_id; /* connection on server side */
+ struct list_head list;
+};
static int krping_cma_event_handler(struct rdma_cm_id *cma_id,
struct rdma_cm_event *event)
@@ -134,39 +247,37 @@ static int krping_cma_event_handler(struct rdma_cm_id *cma_id,
int ret;
struct krping_cb *cb = cma_id->context;
- DEBUG_LOG(PFX "cma_event type %d cma_id %p (%s)\n", event->event, cma_id,
- (cma_id == cb->cm_id) ? "parent" : "child");
+ DEBUG_LOG(cb, "cma_event type %d cma_id %p (%s)\n", event->event,
+ cma_id, (cma_id == cb->cm_id) ? "parent" : "child");
- mtx_lock(&cb->lock);
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
cb->state = ADDR_RESOLVED;
ret = rdma_resolve_route(cma_id, 2000);
if (ret) {
- log(LOG_ERR, "rdma_resolve_route error %d\n",
- ret);
- wakeup(cb);
+ PRINTF(cb, "rdma_resolve_route error %d\n", ret);
+ wake_up_interruptible(&cb->sem);
}
break;
case RDMA_CM_EVENT_ROUTE_RESOLVED:
cb->state = ROUTE_RESOLVED;
- wakeup(cb);
+ wake_up_interruptible(&cb->sem);
break;
case RDMA_CM_EVENT_CONNECT_REQUEST:
cb->state = CONNECT_REQUEST;
cb->child_cm_id = cma_id;
- DEBUG_LOG(PFX "child cma %p\n", cb->child_cm_id);
- wakeup(cb);
+ DEBUG_LOG(cb, "child cma %p\n", cb->child_cm_id);
+ wake_up_interruptible(&cb->sem);
break;
case RDMA_CM_EVENT_ESTABLISHED:
- DEBUG_LOG(PFX "ESTABLISHED\n");
+ DEBUG_LOG(cb, "ESTABLISHED\n");
if (!cb->server) {
cb->state = CONNECTED;
- wakeup(cb);
}
+ wake_up_interruptible(&cb->sem);
break;
case RDMA_CM_EVENT_ADDR_ERROR:
@@ -174,40 +285,34 @@ static int krping_cma_event_handler(struct rdma_cm_id *cma_id,
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
case RDMA_CM_EVENT_REJECTED:
- log(LOG_ERR, "cma event %d, error %d\n", event->event,
+ PRINTF(cb, "cma event %d, error %d\n", event->event,
event->status);
cb->state = ERROR;
- wakeup(cb);
+ wake_up_interruptible(&cb->sem);
break;
case RDMA_CM_EVENT_DISCONNECTED:
- DEBUG_LOG(PFX "DISCONNECT EVENT...\n");
+ PRINTF(cb, "DISCONNECT EVENT...\n");
cb->state = ERROR;
- wakeup(cb);
+ wake_up_interruptible(&cb->sem);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
- DEBUG_LOG(PFX "cma detected device removal!!!!\n");
- cb->state = ERROR;
- wakeup(cb);
- mtx_unlock(&cb->lock);
- krping_wait(cb, CLEANUP);
- tsleep(cb, 0, "krping", 5000);
- return 0;
+ PRINTF(cb, "cma detected device removal!!!!\n");
+ break;
default:
- log(LOG_ERR, "oof bad type!\n");
- wakeup(cb);
+ PRINTF(cb, "oof bad type!\n");
+ wake_up_interruptible(&cb->sem);
break;
}
- mtx_unlock(&cb->lock);
return 0;
}
static int server_recv(struct krping_cb *cb, struct ib_wc *wc)
{
if (wc->byte_len != sizeof(cb->recv_buf)) {
- log(LOG_ERR, "Received bogus data, size %d\n",
+ PRINTF(cb, "Received bogus data, size %d\n",
wc->byte_len);
return -1;
}
@@ -215,7 +320,7 @@ static int server_recv(struct krping_cb *cb, struct ib_wc *wc)
cb->remote_rkey = ntohl(cb->recv_buf.rkey);
cb->remote_addr = ntohll(cb->recv_buf.buf);
cb->remote_len = ntohl(cb->recv_buf.size);
- DEBUG_LOG(PFX "Received rkey %x addr %llx len %d from peer\n",
+ DEBUG_LOG(cb, "Received rkey %x addr %llx len %d from peer\n",
cb->remote_rkey, (unsigned long long)cb->remote_addr,
cb->remote_len);
@@ -230,7 +335,7 @@ static int server_recv(struct krping_cb *cb, struct ib_wc *wc)
static int client_recv(struct krping_cb *cb, struct ib_wc *wc)
{
if (wc->byte_len != sizeof(cb->recv_buf)) {
- log(LOG_ERR, "Received bogus data, size %d\n",
+ PRINTF(cb, "Received bogus data, size %d\n",
wc->byte_len);
return -1;
}
@@ -250,11 +355,13 @@ static void krping_cq_event_handler(struct ib_cq *cq, void *ctx)
struct ib_recv_wr *bad_wr;
int ret;
- mtx_lock(&cb->lock);
- KASSERT(cb->cq == cq, ("bad condition"));
+ BUG_ON(cb->cq != cq);
if (cb->state == ERROR) {
- log(LOG_ERR, "cq completion in ERROR state\n");
- mtx_unlock(&cb->lock);
+ PRINTF(cb, "cq completion in ERROR state\n");
+ return;
+ }
+ if (cb->frtest) {
+ PRINTF(cb, "cq completion event in frtest!\n");
return;
}
if (!cb->wlat && !cb->rlat && !cb->bw)
@@ -262,76 +369,77 @@ static void krping_cq_event_handler(struct ib_cq *cq, void *ctx)
while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) {
if (wc.status) {
if (wc.status == IB_WC_WR_FLUSH_ERR) {
- DEBUG_LOG("cq flushed\n");
+ DEBUG_LOG(cb, "cq flushed\n");
continue;
} else {
- log(LOG_CRIT, "cq completion failed status %d\n",
- wc.status);
+ PRINTF(cb, "cq completion failed with "
+ "wr_id %Lx status %d opcode %d vender_err %x\n",
+ wc.wr_id, wc.status, wc.opcode, wc.vendor_err);
goto error;
}
}
switch (wc.opcode) {
case IB_WC_SEND:
- DEBUG_LOG(PFX "send completion\n");
+ DEBUG_LOG(cb, "send completion\n");
cb->stats.send_bytes += cb->send_sgl.length;
cb->stats.send_msgs++;
break;
case IB_WC_RDMA_WRITE:
- DEBUG_LOG(PFX "rdma write completion\n");
+ DEBUG_LOG(cb, "rdma write completion\n");
cb->stats.write_bytes += cb->rdma_sq_wr.sg_list->length;
cb->stats.write_msgs++;
cb->state = RDMA_WRITE_COMPLETE;
- wakeup(cb);
+ wake_up_interruptible(&cb->sem);
break;
case IB_WC_RDMA_READ:
- DEBUG_LOG(PFX "rdma read completion\n");
+ DEBUG_LOG(cb, "rdma read completion\n");
cb->stats.read_bytes += cb->rdma_sq_wr.sg_list->length;
cb->stats.read_msgs++;
cb->state = RDMA_READ_COMPLETE;
- wakeup(cb);
+ wake_up_interruptible(&cb->sem);
break;
case IB_WC_RECV:
- DEBUG_LOG(PFX "recv completion\n");
+ DEBUG_LOG(cb, "recv completion\n");
cb->stats.recv_bytes += sizeof(cb->recv_buf);
cb->stats.recv_msgs++;
if (cb->wlat || cb->rlat || cb->bw)
ret = server_recv(cb, &wc);
else
ret = cb->server ? server_recv(cb, &wc) :
- client_recv(cb, &wc);
+ client_recv(cb, &wc);
if (ret) {
- log(LOG_ERR, "recv wc error: %d\n", ret);
+ PRINTF(cb, "recv wc error: %d\n", ret);
goto error;
}
ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
if (ret) {
- log(LOG_ERR, "post recv error: %d\n",
+ PRINTF(cb, "post recv error: %d\n",
ret);
goto error;
}
- wakeup(cb);
+ wake_up_interruptible(&cb->sem);
break;
default:
- log(LOG_ERR, "unknown!!!!! completion\n");
+ PRINTF(cb,
+ "%s:%d Unexpected opcode %d, Shutting down\n",
+ __func__, __LINE__, wc.opcode);
goto error;
}
}
if (ret) {
- log(LOG_ERR, "poll error %d\n", ret);
+ PRINTF(cb, "poll error %d\n", ret);
goto error;
}
- mtx_unlock(&cb->lock);
return;
error:
cb->state = ERROR;
- wakeup(cb);
- mtx_unlock(&cb->lock);
+ wake_up_interruptible(&cb->sem);
}
static int krping_accept(struct krping_cb *cb)
@@ -339,7 +447,7 @@ static int krping_accept(struct krping_cb *cb)
struct rdma_conn_param conn_param;
int ret;
- DEBUG_LOG(PFX "accepting client connection request\n");
+ DEBUG_LOG(cb, "accepting client connection request\n");
memset(&conn_param, 0, sizeof conn_param);
conn_param.responder_resources = 1;
@@ -347,14 +455,15 @@ static int krping_accept(struct krping_cb *cb)
ret = rdma_accept(cb->child_cm_id, &conn_param);
if (ret) {
- log(LOG_ERR, "rdma_accept error: %d\n", ret);
+ PRINTF(cb, "rdma_accept error: %d\n", ret);
return ret;
}
if (!cb->wlat && !cb->rlat && !cb->bw) {
- krping_wait(cb, CONNECTED);
+ wait_event_interruptible(cb->sem, cb->state >= CONNECTED);
if (cb->state == ERROR) {
- log(LOG_ERR, "wait for CONNECTED state %d\n", cb->state);
+ PRINTF(cb, "wait for CONNECTED state %d\n",
+ cb->state);
return -1;
}
}
@@ -363,19 +472,22 @@ static int krping_accept(struct krping_cb *cb)
static void krping_setup_wr(struct krping_cb *cb)
{
- /* XXX X86 only here... not mapping for dma! */
- cb->recv_sgl.addr = vtophys(&cb->recv_buf);
+ cb->recv_sgl.addr = cb->recv_dma_addr;
cb->recv_sgl.length = sizeof cb->recv_buf;
- if (cb->use_dmamr)
+ if (cb->local_dma_lkey)
+ cb->recv_sgl.lkey = cb->qp->device->local_dma_lkey;
+ else if (cb->mem == DMA)
cb->recv_sgl.lkey = cb->dma_mr->lkey;
else
cb->recv_sgl.lkey = cb->recv_mr->lkey;
cb->rq_wr.sg_list = &cb->recv_sgl;
cb->rq_wr.num_sge = 1;
- cb->send_sgl.addr = vtophys(&cb->send_buf);
+ cb->send_sgl.addr = cb->send_dma_addr;
cb->send_sgl.length = sizeof cb->send_buf;
- if (cb->use_dmamr)
+ if (cb->local_dma_lkey)
+ cb->send_sgl.lkey = cb->qp->device->local_dma_lkey;
+ else if (cb->mem == DMA)
cb->send_sgl.lkey = cb->dma_mr->lkey;
else
cb->send_sgl.lkey = cb->send_mr->lkey;
@@ -385,18 +497,39 @@ static void krping_setup_wr(struct krping_cb *cb)
cb->sq_wr.sg_list = &cb->send_sgl;
cb->sq_wr.num_sge = 1;
- cb->rdma_addr = vtophys(cb->rdma_buf);
- cb->rdma_sgl.addr = cb->rdma_addr;
- if (cb->use_dmamr)
- cb->rdma_sgl.lkey = cb->dma_mr->lkey;
- else
- cb->rdma_sgl.lkey = cb->rdma_mr->lkey;
- cb->rdma_sq_wr.send_flags = IB_SEND_SIGNALED;
- cb->rdma_sq_wr.sg_list = &cb->rdma_sgl;
- cb->rdma_sq_wr.num_sge = 1;
-
- if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
- cb->start_addr = vtophys(cb->start_buf);
+ if (cb->server || cb->wlat || cb->rlat || cb->bw) {
+ cb->rdma_sgl.addr = cb->rdma_dma_addr;
+ if (cb->mem == MR)
+ cb->rdma_sgl.lkey = cb->rdma_mr->lkey;
+ cb->rdma_sq_wr.send_flags = IB_SEND_SIGNALED;
+ cb->rdma_sq_wr.sg_list = &cb->rdma_sgl;
+ cb->rdma_sq_wr.num_sge = 1;
+ }
+
+ switch(cb->mem) {
+ case FASTREG:
+
+ /*
+ * A chain of 2 WRs, INVALDATE_MR + FAST_REG_MR.
+ * both unsignaled. The client uses them to reregister
+ * the rdma buffers with a new key each iteration.
+ */
+ cb->fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+ cb->fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ cb->fastreg_wr.wr.fast_reg.length = cb->size;
+ cb->fastreg_wr.wr.fast_reg.page_list = cb->page_list;
+ cb->fastreg_wr.wr.fast_reg.page_list_len = cb->page_list_len;
+
+ cb->invalidate_wr.next = &cb->fastreg_wr;
+ cb->invalidate_wr.opcode = IB_WR_LOCAL_INV;
+ break;
+ case MW:
+ cb->bind_attr.wr_id = 0xabbaabba;
+ cb->bind_attr.send_flags = 0; /* unsignaled */
+ cb->bind_attr.length = cb->size;
+ break;
+ default:
+ break;
}
}
@@ -406,134 +539,207 @@ static int krping_setup_buffers(struct krping_cb *cb)
struct ib_phys_buf buf;
u64 iovbase;
- DEBUG_LOG(PFX "krping_setup_buffers called on cb %p\n", cb);
+ DEBUG_LOG(cb, "krping_setup_buffers called on cb %p\n", cb);
- if (cb->use_dmamr) {
+ cb->recv_dma_addr = dma_map_single(cb->pd->device->dma_device,
+ &cb->recv_buf,
+ sizeof(cb->recv_buf), DMA_BIDIRECTIONAL);
+ pci_unmap_addr_set(cb, recv_mapping, cb->recv_dma_addr);
+ cb->send_dma_addr = dma_map_single(cb->pd->device->dma_device,
+ &cb->send_buf, sizeof(cb->send_buf),
+ DMA_BIDIRECTIONAL);
+ pci_unmap_addr_set(cb, send_mapping, cb->send_dma_addr);
+
+ if (cb->mem == DMA) {
cb->dma_mr = ib_get_dma_mr(cb->pd, IB_ACCESS_LOCAL_WRITE|
IB_ACCESS_REMOTE_READ|
IB_ACCESS_REMOTE_WRITE);
if (IS_ERR(cb->dma_mr)) {
- log(LOG_ERR, "reg_dmamr failed\n");
- return PTR_ERR(cb->dma_mr);
+ DEBUG_LOG(cb, "reg_dmamr failed\n");
+ ret = PTR_ERR(cb->dma_mr);
+ goto bail;
}
} else {
+ if (!cb->local_dma_lkey) {
+ buf.addr = cb->recv_dma_addr;
+ buf.size = sizeof cb->recv_buf;
+ DEBUG_LOG(cb, "recv buf dma_addr %llx size %d\n", buf.addr,
+ (int)buf.size);
+ iovbase = cb->recv_dma_addr;
+ cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1,
+ IB_ACCESS_LOCAL_WRITE,
+ &iovbase);
+
+ if (IS_ERR(cb->recv_mr)) {
+ DEBUG_LOG(cb, "recv_buf reg_mr failed\n");
+ ret = PTR_ERR(cb->recv_mr);
+ goto bail;
+ }
- buf.addr = vtophys(&cb->recv_buf);
- buf.size = sizeof cb->recv_buf;
- iovbase = vtophys(&cb->recv_buf);
- cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1,
- IB_ACCESS_LOCAL_WRITE,
- &iovbase);
-
- if (IS_ERR(cb->recv_mr)) {
- log(LOG_ERR, "recv_buf reg_mr failed\n");
- return PTR_ERR(cb->recv_mr);
- }
-
- buf.addr = vtophys(&cb->send_buf);
- buf.size = sizeof cb->send_buf;
- iovbase = vtophys(&cb->send_buf);
- cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1,
- 0, &iovbase);
-
- if (IS_ERR(cb->send_mr)) {
- log(LOG_ERR, "send_buf reg_mr failed\n");
- ib_dereg_mr(cb->recv_mr);
- return PTR_ERR(cb->send_mr);
+ buf.addr = cb->send_dma_addr;
+ buf.size = sizeof cb->send_buf;
+ DEBUG_LOG(cb, "send buf dma_addr %llx size %d\n", buf.addr,
+ (int)buf.size);
+ iovbase = cb->send_dma_addr;
+ cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1,
+ 0, &iovbase);
+
+ if (IS_ERR(cb->send_mr)) {
+ DEBUG_LOG(cb, "send_buf reg_mr failed\n");
+ ret = PTR_ERR(cb->send_mr);
+ goto bail;
+ }
}
}
- /* RNIC adapters have a limit upto which it can register physical memory
- * If DMA-MR memory mode is set then normally driver registers maximum
- * supported memory. After that if contigmalloc allocates memory beyond the
- * specified RNIC limit then Krping may not work.
- */
- if (cb->use_dmamr && cb->memlimit)
- cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, cb->memlimit,
- PAGE_SIZE, 0);
- else
- cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL,
- PAGE_SIZE, 0);
-
+ cb->rdma_buf = kmalloc(cb->size, GFP_KERNEL);
if (!cb->rdma_buf) {
- log(LOG_ERR, "rdma_buf malloc failed\n");
- ret = ENOMEM;
- goto err1;
- }
- if (!cb->use_dmamr) {
-
- buf.addr = vtophys(cb->rdma_buf);
- buf.size = cb->size;
- iovbase = vtophys(cb->rdma_buf);
- cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1,
+ DEBUG_LOG(cb, "rdma_buf malloc failed\n");
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ cb->rdma_dma_addr = dma_map_single(cb->pd->device->dma_device,
+ cb->rdma_buf, cb->size,
+ DMA_BIDIRECTIONAL);
+ pci_unmap_addr_set(cb, rdma_mapping, cb->rdma_dma_addr);
+ if (cb->mem != DMA) {
+ switch (cb->mem) {
+ case FASTREG:
+ cb->page_list_len = (((cb->size - 1) & PAGE_MASK) +
+ PAGE_SIZE) >> PAGE_SHIFT;
+ cb->page_list = ib_alloc_fast_reg_page_list(
+ cb->pd->device,
+ cb->page_list_len);
+ if (IS_ERR(cb->page_list)) {
+ DEBUG_LOG(cb, "recv_buf reg_mr failed\n");
+ ret = PTR_ERR(cb->page_list);
+ goto bail;
+ }
+ cb->fastreg_mr = ib_alloc_fast_reg_mr(cb->pd,
+ cb->page_list->max_page_list_len);
+ if (IS_ERR(cb->fastreg_mr)) {
+ DEBUG_LOG(cb, "recv_buf reg_mr failed\n");
+ ret = PTR_ERR(cb->fastreg_mr);
+ goto bail;
+ }
+ DEBUG_LOG(cb, "fastreg rkey 0x%x page_list %p"
+ " page_list_len %u\n", cb->fastreg_mr->rkey,
+ cb->page_list, cb->page_list_len);
+ break;
+ case MW:
+ cb->mw = ib_alloc_mw(cb->pd);
+ if (IS_ERR(cb->mw)) {
+ DEBUG_LOG(cb, "recv_buf alloc_mw failed\n");
+ ret = PTR_ERR(cb->mw);
+ goto bail;
+ }
+ DEBUG_LOG(cb, "mw rkey 0x%x\n", cb->mw->rkey);
+ /*FALLTHROUGH*/
+ case MR:
+ buf.addr = cb->rdma_dma_addr;
+ buf.size = cb->size;
+ iovbase = cb->rdma_dma_addr;
+ cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1,
IB_ACCESS_REMOTE_READ|
IB_ACCESS_REMOTE_WRITE,
&iovbase);
-
- if (IS_ERR(cb->rdma_mr)) {
- log(LOG_ERR, "rdma_buf reg_mr failed\n");
- ret = PTR_ERR(cb->rdma_mr);
- goto err2;
+ if (IS_ERR(cb->rdma_mr)) {
+ DEBUG_LOG(cb, "rdma_buf reg_mr failed\n");
+ ret = PTR_ERR(cb->rdma_mr);
+ goto bail;
+ }
+ DEBUG_LOG(cb, "rdma buf dma_addr %llx size %d mr rkey 0x%x\n",
+ buf.addr, (int)buf.size, cb->rdma_mr->rkey);
+ break;
+ default:
+ ret = -EINVAL;
+ goto bail;
+ break;
}
}
if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
- if (cb->use_dmamr && cb->memlimit)
- cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK,
- 0, cb->memlimit, PAGE_SIZE, 0);
- else
- cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK,
- 0, -1UL, PAGE_SIZE, 0);
+
+ cb->start_buf = kmalloc(cb->size, GFP_KERNEL);
if (!cb->start_buf) {
- log(LOG_ERR, "start_buf malloc failed\n");
- ret = ENOMEM;
- goto err2;
+ DEBUG_LOG(cb, "start_buf malloc failed\n");
+ ret = -ENOMEM;
+ goto bail;
}
- if (!cb->use_dmamr) {
+
+ cb->start_dma_addr = dma_map_single(cb->pd->device->dma_device,
+ cb->start_buf, cb->size,
+ DMA_BIDIRECTIONAL);
+ pci_unmap_addr_set(cb, start_mapping, cb->start_dma_addr);
+
+ if (cb->mem == MR || cb->mem == MW) {
unsigned flags = IB_ACCESS_REMOTE_READ;
- if (cb->wlat || cb->rlat || cb->bw)
+ if (cb->wlat || cb->rlat || cb->bw)
flags |= IB_ACCESS_REMOTE_WRITE;
- buf.addr = vtophys(cb->start_buf);
+
+ buf.addr = cb->start_dma_addr;
buf.size = cb->size;
- iovbase = vtophys(cb->start_buf);
+ DEBUG_LOG(cb, "start buf dma_addr %llx size %d\n",
+ buf.addr, (int)buf.size);
+ iovbase = cb->start_dma_addr;
cb->start_mr = ib_reg_phys_mr(cb->pd, &buf, 1,
flags,
&iovbase);
if (IS_ERR(cb->start_mr)) {
- log(LOG_ERR, "start_buf reg_mr failed\n");
+ DEBUG_LOG(cb, "start_buf reg_mr failed\n");
ret = PTR_ERR(cb->start_mr);
- goto err3;
+ goto bail;
}
}
}
krping_setup_wr(cb);
- DEBUG_LOG(PFX "allocated & registered buffers...\n");
+ DEBUG_LOG(cb, "allocated & registered buffers...\n");
return 0;
-err3:
- contigfree(cb->start_buf, cb->size, M_DEVBUF);
-
- if (!cb->use_dmamr)
+bail:
+ if (cb->fastreg_mr && !IS_ERR(cb->fastreg_mr))
+ ib_dereg_mr(cb->fastreg_mr);
+ if (cb->mw && !IS_ERR(cb->mw))
+ ib_dealloc_mw(cb->mw);
+ if (cb->rdma_mr && !IS_ERR(cb->rdma_mr))
ib_dereg_mr(cb->rdma_mr);
-err2:
- contigfree(cb->rdma_buf, cb->size, M_DEVBUF);
-err1:
- if (cb->use_dmamr)
+ if (cb->page_list && !IS_ERR(cb->page_list))
+ ib_free_fast_reg_page_list(cb->page_list);
+ if (cb->dma_mr && !IS_ERR(cb->dma_mr))
ib_dereg_mr(cb->dma_mr);
- else {
+ if (cb->recv_mr && !IS_ERR(cb->recv_mr))
ib_dereg_mr(cb->recv_mr);
+ if (cb->send_mr && !IS_ERR(cb->send_mr))
ib_dereg_mr(cb->send_mr);
- }
+ if (cb->rdma_buf)
+ kfree(cb->rdma_buf);
+ if (cb->start_buf)
+ kfree(cb->start_buf);
return ret;
}
static void krping_free_buffers(struct krping_cb *cb)
{
- DEBUG_LOG(PFX "krping_free_buffers called on cb %p\n", cb);
+ DEBUG_LOG(cb, "krping_free_buffers called on cb %p\n", cb);
-#if 0
+ if (cb->dma_mr)
+ ib_dereg_mr(cb->dma_mr);
+ if (cb->send_mr)
+ ib_dereg_mr(cb->send_mr);
+ if (cb->recv_mr)
+ ib_dereg_mr(cb->recv_mr);
+ if (cb->rdma_mr)
+ ib_dereg_mr(cb->rdma_mr);
+ if (cb->start_mr)
+ ib_dereg_mr(cb->start_mr);
+ if (cb->fastreg_mr)
+ ib_dereg_mr(cb->fastreg_mr);
+ if (cb->mw)
+ ib_dealloc_mw(cb->mw);
+
dma_unmap_single(cb->pd->device->dma_device,
pci_unmap_addr(cb, recv_mapping),
sizeof(cb->recv_buf), DMA_BIDIRECTIONAL);
@@ -543,24 +749,12 @@ static void krping_free_buffers(struct krping_cb *cb)
dma_unmap_single(cb->pd->device->dma_device,
pci_unmap_addr(cb, rdma_mapping),
cb->size, DMA_BIDIRECTIONAL);
-#endif
- contigfree(cb->rdma_buf, cb->size, M_DEVBUF);
- if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
-#if 0
+ kfree(cb->rdma_buf);
+ if (cb->start_buf) {
dma_unmap_single(cb->pd->device->dma_device,
pci_unmap_addr(cb, start_mapping),
cb->size, DMA_BIDIRECTIONAL);
-#endif
- contigfree(cb->start_buf, cb->size, M_DEVBUF);
- }
- if (cb->use_dmamr)
- ib_dereg_mr(cb->dma_mr);
- else {
- ib_dereg_mr(cb->send_mr);
- ib_dereg_mr(cb->recv_mr);
- ib_dereg_mr(cb->rdma_mr);
- if (!cb->server)
- ib_dereg_mr(cb->start_mr);
+ kfree(cb->start_buf);
}
}
@@ -577,6 +771,7 @@ static int krping_create_qp(struct krping_cb *cb)
init_attr.qp_type = IB_QPT_RC;
init_attr.send_cq = cb->cq;
init_attr.recv_cq = cb->cq;
+ init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
if (cb->server) {
ret = rdma_create_qp(cb->child_cm_id, cb->pd, &init_attr);
@@ -603,36 +798,36 @@ static int krping_setup_qp(struct krping_cb *cb, struct rdma_cm_id *cm_id)
int ret;
cb->pd = ib_alloc_pd(cm_id->device);
if (IS_ERR(cb->pd)) {
- log(LOG_ERR, "ib_alloc_pd failed\n");
+ PRINTF(cb, "ib_alloc_pd failed\n");
return PTR_ERR(cb->pd);
}
- DEBUG_LOG(PFX "created pd %p\n", cb->pd);
+ DEBUG_LOG(cb, "created pd %p\n", cb->pd);
- strlcpy(cb->name, cb->pd->device->name, sizeof(cb->name));
+ strlcpy(cb->stats.name, cb->pd->device->name, sizeof(cb->stats.name));
cb->cq = ib_create_cq(cm_id->device, krping_cq_event_handler, NULL,
cb, cb->txdepth * 2, 0);
if (IS_ERR(cb->cq)) {
- log(LOG_ERR, "ib_create_cq failed\n");
+ PRINTF(cb, "ib_create_cq failed\n");
ret = PTR_ERR(cb->cq);
goto err1;
}
- DEBUG_LOG(PFX "created cq %p\n", cb->cq);
+ DEBUG_LOG(cb, "created cq %p\n", cb->cq);
- if (!cb->wlat && !cb->rlat && !cb->bw) {
+ if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest) {
ret = ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
if (ret) {
- log(LOG_ERR, "ib_create_cq failed\n");
+ PRINTF(cb, "ib_create_cq failed\n");
goto err2;
}
}
ret = krping_create_qp(cb);
if (ret) {
- log(LOG_ERR, "krping_create_qp failed: %d\n", ret);
+ PRINTF(cb, "krping_create_qp failed: %d\n", ret);
goto err2;
}
- DEBUG_LOG(PFX "created qp %p\n", cb->qp);
+ DEBUG_LOG(cb, "created qp %p\n", cb->qp);
return 0;
err2:
ib_destroy_cq(cb->cq);
@@ -641,115 +836,257 @@ err1:
return ret;
}
-static void krping_format_send(struct krping_cb *cb, u64 buf,
- struct ib_mr *mr)
+/*
+ * return the (possibly rebound) rkey for the rdma buffer.
+ * FASTREG mode: invalidate and rebind via fastreg wr.
+ * MW mode: rebind the MW.
+ * other modes: just return the mr rkey.
+ */
+static u32 krping_rdma_rkey(struct krping_cb *cb, u64 buf, int post_inv)
{
- struct krping_rdma_info *info = &cb->send_buf;
+ u32 rkey = 0xffffffff;
+ u64 p;
+ struct ib_send_wr *bad_wr;
+ int i;
+ int ret;
- info->buf = htonll(buf);
- info->rkey = htonl(mr->rkey);
- info->size = htonl(cb->size);
+ switch (cb->mem) {
+ case FASTREG:
+ cb->invalidate_wr.ex.invalidate_rkey = cb->fastreg_mr->rkey;
+
+ /*
+ * Update the fastreg key.
+ */
+ ib_update_fast_reg_key(cb->fastreg_mr, ++cb->key);
+ cb->fastreg_wr.wr.fast_reg.rkey = cb->fastreg_mr->rkey;
+
+ /*
+ * Update the fastreg WR with new buf info.
+ */
+ if (buf == (u64)cb->start_dma_addr)
+ cb->fastreg_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_READ;
+ else
+ cb->fastreg_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
+ cb->fastreg_wr.wr.fast_reg.iova_start = buf;
+ p = (u64)(buf & PAGE_MASK);
+ for (i=0; i < cb->fastreg_wr.wr.fast_reg.page_list_len;
+ i++, p += PAGE_SIZE) {
+ cb->page_list->page_list[i] = p;
+ DEBUG_LOG(cb, "page_list[%d] 0x%llx\n", i, p);
+ }
- DEBUG_LOG(PFX "RDMA addr %llx rkey %x len %d\n",
- (unsigned long long)buf, mr->rkey, cb->size);
+ DEBUG_LOG(cb, "post_inv = %d, fastreg new rkey 0x%x shift %u len %u"
+ " iova_start %llx page_list_len %u\n",
+ post_inv,
+ cb->fastreg_wr.wr.fast_reg.rkey,
+ cb->fastreg_wr.wr.fast_reg.page_shift,
+ cb->fastreg_wr.wr.fast_reg.length,
+ cb->fastreg_wr.wr.fast_reg.iova_start,
+ cb->fastreg_wr.wr.fast_reg.page_list_len);
+
+ if (post_inv)
+ ret = ib_post_send(cb->qp, &cb->invalidate_wr, &bad_wr);
+ else
+ ret = ib_post_send(cb->qp, &cb->fastreg_wr, &bad_wr);
+ if (ret) {
+ PRINTF(cb, "post send error %d\n", ret);
+ cb->state = ERROR;
+ }
+ rkey = cb->fastreg_mr->rkey;
+ break;
+ case MW:
+ /*
+ * Update the MW with new buf info.
+ */
+ if (buf == (u64)cb->start_dma_addr) {
+ cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_READ;
+ cb->bind_attr.mr = cb->start_mr;
+ } else {
+ cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_WRITE;
+ cb->bind_attr.mr = cb->rdma_mr;
+ }
+ cb->bind_attr.addr = buf;
+ DEBUG_LOG(cb, "binding mw rkey 0x%x to buf %llx mr rkey 0x%x\n",
+ cb->mw->rkey, buf, cb->bind_attr.mr->rkey);
+ ret = ib_bind_mw(cb->qp, cb->mw, &cb->bind_attr);
+ if (ret) {
+ PRINTF(cb, "bind mw error %d\n", ret);
+ cb->state = ERROR;
+ } else
+ rkey = cb->mw->rkey;
+ break;
+ case MR:
+ if (buf == (u64)cb->start_dma_addr)
+ rkey = cb->start_mr->rkey;
+ else
+ rkey = cb->rdma_mr->rkey;
+ break;
+ case DMA:
+ rkey = cb->dma_mr->rkey;
+ break;
+ default:
+ PRINTF(cb, "%s:%d case ERROR\n", __func__, __LINE__);
+ cb->state = ERROR;
+ break;
+ }
+ return rkey;
+}
+
+static void krping_format_send(struct krping_cb *cb, u64 buf)
+{
+ struct krping_rdma_info *info = &cb->send_buf;
+ u32 rkey;
+
+ /*
+ * Client side will do fastreg or mw bind before
+ * advertising the rdma buffer. Server side
+ * sends have no data.
+ */
+ if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
+ rkey = krping_rdma_rkey(cb, buf, !cb->server_invalidate);
+ info->buf = htonll(buf);
+ info->rkey = htonl(rkey);
+ info->size = htonl(cb->size);
+ DEBUG_LOG(cb, "RDMA addr %llx rkey %x len %d\n",
+ (unsigned long long)buf, rkey, cb->size);
+ }
}
static void krping_test_server(struct krping_cb *cb)
{
- struct ib_send_wr *bad_wr;
+ struct ib_send_wr *bad_wr, inv;
int ret;
while (1) {
/* Wait for client's Start STAG/TO/Len */
- krping_wait(cb, RDMA_READ_ADV);
+ wait_event_interruptible(cb->sem, cb->state >= RDMA_READ_ADV);
if (cb->state != RDMA_READ_ADV) {
- DEBUG_LOG(PFX "wait for RDMA_READ_ADV state %d\n",
+ PRINTF(cb, "wait for RDMA_READ_ADV state %d\n",
cb->state);
break;
}
- DEBUG_LOG(PFX "server received sink adv\n");
+ DEBUG_LOG(cb, "server received sink adv\n");
- /* Issue RDMA Read. */
- cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ;
cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
cb->rdma_sq_wr.sg_list->length = cb->remote_len;
+ cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 1);
+ cb->rdma_sq_wr.next = NULL;
+
+ /* Issue RDMA Read. */
+ if (cb->read_inv)
+ cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+ else {
+
+ cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ;
+ if (cb->mem == FASTREG) {
+ /*
+ * Immediately follow the read with a
+ * fenced LOCAL_INV.
+ */
+ cb->rdma_sq_wr.next = &inv;
+ memset(&inv, 0, sizeof inv);
+ inv.opcode = IB_WR_LOCAL_INV;
+ inv.ex.invalidate_rkey = cb->fastreg_mr->rkey;
+ inv.send_flags = IB_SEND_FENCE;
+ }
+ }
ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr);
if (ret) {
- log(LOG_ERR, "post send error %d\n", ret);
+ PRINTF(cb, "post send error %d\n", ret);
break;
}
- DEBUG_LOG(PFX "server posted rdma read req \n");
+ cb->rdma_sq_wr.next = NULL;
+
+ DEBUG_LOG(cb, "server posted rdma read req \n");
/* Wait for read completion */
- krping_wait(cb, RDMA_READ_COMPLETE);
+ wait_event_interruptible(cb->sem,
+ cb->state >= RDMA_READ_COMPLETE);
if (cb->state != RDMA_READ_COMPLETE) {
- log(LOG_ERR,
+ PRINTF(cb,
"wait for RDMA_READ_COMPLETE state %d\n",
cb->state);
break;
}
- DEBUG_LOG(PFX "server received read complete\n");
+ DEBUG_LOG(cb, "server received read complete\n");
/* Display data in recv buf */
if (cb->verbose)
- DEBUG_LOG("server ping data: %s\n", cb->rdma_buf);
+ PRINTF(cb, "server ping data: %s\n",
+ cb->rdma_buf);
/* Tell client to continue */
+ if (cb->server && cb->server_invalidate) {
+ cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey;
+ cb->sq_wr.opcode = IB_WR_SEND_WITH_INV;
+ DEBUG_LOG(cb, "send-w-inv rkey 0x%x\n", cb->remote_rkey);
+ }
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
if (ret) {
- log(LOG_ERR, "post send error %d\n", ret);
+ PRINTF(cb, "post send error %d\n", ret);
break;
}
- DEBUG_LOG(PFX "server posted go ahead\n");
+ DEBUG_LOG(cb, "server posted go ahead\n");
/* Wait for client's RDMA STAG/TO/Len */
- krping_wait(cb, RDMA_WRITE_ADV);
+ wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV);
if (cb->state != RDMA_WRITE_ADV) {
- log(LOG_ERR,
+ PRINTF(cb,
"wait for RDMA_WRITE_ADV state %d\n",
cb->state);
break;
}
- DEBUG_LOG(PFX "server received sink adv\n");
+ DEBUG_LOG(cb, "server received sink adv\n");
/* RDMA Write echo data */
cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
cb->rdma_sq_wr.sg_list->length = strlen(cb->rdma_buf) + 1;
- DEBUG_LOG(PFX "rdma write from lkey %x laddr %llx len %d\n",
+ if (cb->local_dma_lkey)
+ cb->rdma_sgl.lkey = cb->qp->device->local_dma_lkey;
+ else
+ cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 0);
+
+ DEBUG_LOG(cb, "rdma write from lkey %x laddr %llx len %d\n",
cb->rdma_sq_wr.sg_list->lkey,
(unsigned long long)cb->rdma_sq_wr.sg_list->addr,
cb->rdma_sq_wr.sg_list->length);
ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr);
if (ret) {
- log(LOG_ERR, "post send error %d\n", ret);
+ PRINTF(cb, "post send error %d\n", ret);
break;
}
/* Wait for completion */
- krping_wait(cb, RDMA_WRITE_COMPLETE);
+ ret = wait_event_interruptible(cb->sem, cb->state >=
+ RDMA_WRITE_COMPLETE);
if (cb->state != RDMA_WRITE_COMPLETE) {
- log(LOG_ERR,
+ PRINTF(cb,
"wait for RDMA_WRITE_COMPLETE state %d\n",
cb->state);
break;
}
- DEBUG_LOG(PFX "server rdma write complete \n");
+ DEBUG_LOG(cb, "server rdma write complete \n");
cb->state = CONNECTED;
/* Tell client to begin again */
+ if (cb->server && cb->server_invalidate) {
+ cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey;
+ cb->sq_wr.opcode = IB_WR_SEND_WITH_INV;
+ DEBUG_LOG(cb, "send-w-inv rkey 0x%x\n", cb->remote_rkey);
+ }
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
if (ret) {
- log(LOG_ERR, "post send error %d\n", ret);
+ PRINTF(cb, "post send error %d\n", ret);
break;
}
- DEBUG_LOG(PFX "server posted go ahead\n");
+ DEBUG_LOG(cb, "server posted go ahead\n");
}
}
@@ -770,16 +1107,16 @@ static void rlat_test(struct krping_cb *cb)
cb->rdma_sq_wr.sg_list->length = cb->size;
microtime(&start_tv);
- if (!cb->poll) {
- cb->state = RDMA_READ_ADV;
- ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
- }
+ if (!cb->poll) {
+ cb->state = RDMA_READ_ADV;
+ ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
+ }
while (scnt < iters) {
- cb->state = RDMA_READ_ADV;
+ cb->state = RDMA_READ_ADV;
ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr);
if (ret) {
- log(LOG_ERR,
+ PRINTF(cb,
"Couldn't post send: ret=%d scnt %d\n",
ret, scnt);
return;
@@ -787,30 +1124,33 @@ static void rlat_test(struct krping_cb *cb)
do {
if (!cb->poll) {
- krping_wait(cb, RDMA_READ_COMPLETE);
+ wait_event_interruptible(cb->sem,
+ cb->state != RDMA_READ_ADV);
if (cb->state == RDMA_READ_COMPLETE) {
ne = 1;
- ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
+ ib_req_notify_cq(cb->cq,
+ IB_CQ_NEXT_COMP);
} else {
ne = -1;
}
} else
ne = ib_poll_cq(cb->cq, 1, &wc);
if (cb->state == ERROR) {
- log(LOG_ERR,
- "state == ERROR...bailing scnt %d\n", scnt);
+ PRINTF(cb,
+ "state == ERROR...bailing scnt %d\n",
+ scnt);
return;
}
} while (ne == 0);
if (ne < 0) {
- log(LOG_ERR, "poll CQ failed %d\n", ne);
+ PRINTF(cb, "poll CQ failed %d\n", ne);
return;
}
- if (cb->poll && wc.status != IB_WC_SUCCESS) {
- log(LOG_ERR, "Completion wth error at %s:\n",
+ if (cb->poll && wc.status != IB_WC_SUCCESS) {
+ PRINTF(cb, "Completion wth error at %s:\n",
cb->server ? "server" : "client");
- log(LOG_ERR, "Failed status %d: wr_id %d\n",
+ PRINTF(cb, "Failed status %d: wr_id %d\n",
wc.status, (int) wc.wr_id);
return;
}
@@ -823,75 +1163,18 @@ static void rlat_test(struct krping_cb *cb)
stop_tv.tv_sec -= 1;
}
- log(LOG_ERR, "delta sec %zu delta usec %lu iter %d size %d\n",
+ PRINTF(cb, "delta sec %lu delta usec %lu iter %d size %d\n",
stop_tv.tv_sec - start_tv.tv_sec,
stop_tv.tv_usec - start_tv.tv_usec,
scnt, cb->size);
}
-static int alloc_cycle_mem(int cycle_iters,
- cycles_t **post_cycles_start,
- cycles_t **post_cycles_stop,
- cycles_t **poll_cycles_start,
- cycles_t **poll_cycles_stop,
- cycles_t **last_poll_cycles_start)
-{
- *post_cycles_start = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK);
- if (!*post_cycles_start) {
- goto fail1;
- }
- *post_cycles_stop = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK);
- if (!*post_cycles_stop) {
- goto fail2;
- }
- *poll_cycles_start = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK);
- if (!*poll_cycles_start) {
- goto fail3;
- }
- *poll_cycles_stop = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK);
- if (!*poll_cycles_stop) {
- goto fail4;
- }
- *last_poll_cycles_start = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK);
- if (!*last_poll_cycles_start) {
- goto fail5;
- }
- return 0;
-fail5:
- free(*poll_cycles_stop, M_DEVBUF);
-fail4:
- free(*poll_cycles_start, M_DEVBUF);
-fail3:
- free(*post_cycles_stop, M_DEVBUF);
-fail2:
- free(*post_cycles_start, M_DEVBUF);
-fail1:
- log(LOG_ERR, "%s malloc failed\n", __FUNCTION__);
- return ENOMEM;
-}
-
-static void free_cycle_mem(cycles_t *post_cycles_start,
- cycles_t *post_cycles_stop,
- cycles_t *poll_cycles_start,
- cycles_t *poll_cycles_stop,
- cycles_t *last_poll_cycles_start)
-{
- free(last_poll_cycles_start, M_DEVBUF);
- free(poll_cycles_stop, M_DEVBUF);
- free(poll_cycles_start, M_DEVBUF);
- free(post_cycles_stop, M_DEVBUF);
- free(post_cycles_start, M_DEVBUF);
-}
-
static void wlat_test(struct krping_cb *cb)
{
int ccnt, scnt, rcnt;
int iters=cb->count;
volatile char *poll_buf = (char *) cb->start_buf;
char *buf = (char *)cb->rdma_buf;
- ccnt = 0;
- scnt = 0;
- rcnt = 0;
struct timeval start_tv, stop_tv;
cycles_t *post_cycles_start, *post_cycles_stop;
cycles_t *poll_cycles_start, *poll_cycles_stop;
@@ -899,16 +1182,37 @@ static void wlat_test(struct krping_cb *cb)
cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0;
int i;
int cycle_iters = 1000;
- int err;
- err = alloc_cycle_mem(cycle_iters, &post_cycles_start, &post_cycles_stop,
- &poll_cycles_start, &poll_cycles_stop, &last_poll_cycles_start);
-
- if (err) {
- log(LOG_ERR, "%s malloc failed\n", __FUNCTION__);
+ ccnt = 0;
+ scnt = 0;
+ rcnt = 0;
+
+ post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
+ if (!post_cycles_start) {
+ PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__);
+ return;
+ }
+ post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
+ if (!post_cycles_stop) {
+ PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__);
+ return;
+ }
+ poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
+ if (!poll_cycles_start) {
+ PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__);
+ return;
+ }
+ poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
+ if (!poll_cycles_stop) {
+ PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__);
+ return;
+ }
+ last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t),
+ GFP_KERNEL);
+ if (!last_poll_cycles_start) {
+ PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__);
return;
}
-
cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
@@ -924,7 +1228,8 @@ static void wlat_test(struct krping_cb *cb)
++rcnt;
while (*poll_buf != (char)rcnt) {
if (cb->state == ERROR) {
- log(LOG_ERR, "state = ERROR, bailing\n");
+ PRINTF(cb,
+ "state = ERROR, bailing\n");
return;
}
}
@@ -937,7 +1242,8 @@ static void wlat_test(struct krping_cb *cb)
if (scnt < cycle_iters)
post_cycles_start[scnt] = get_cycles();
if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) {
- log(LOG_ERR, "Couldn't post send: scnt=%d\n",
+ PRINTF(cb,
+ "Couldn't post send: scnt=%d\n",
scnt);
return;
}
@@ -954,7 +1260,8 @@ static void wlat_test(struct krping_cb *cb)
poll_cycles_start[ccnt] = get_cycles();
do {
if (ccnt < cycle_iters)
- last_poll_cycles_start[ccnt] = get_cycles();
+ last_poll_cycles_start[ccnt] =
+ get_cycles();
ne = ib_poll_cq(cb->cq, 1, &wc);
} while (ne == 0);
if (ccnt < cycle_iters)
@@ -962,15 +1269,18 @@ static void wlat_test(struct krping_cb *cb)
++ccnt;
if (ne < 0) {
- log(LOG_ERR, "poll CQ failed %d\n", ne);
+ PRINTF(cb, "poll CQ failed %d\n", ne);
return;
}
if (wc.status != IB_WC_SUCCESS) {
- log(LOG_ERR, "Completion wth error at %s:\n",
+ PRINTF(cb,
+ "Completion wth error at %s:\n",
cb->server ? "server" : "client");
- log(LOG_ERR, "Failed status %d: wr_id %d\n",
+ PRINTF(cb,
+ "Failed status %d: wr_id %d\n",
wc.status, (int) wc.wr_id);
- log(LOG_ERR, "scnt=%d, rcnt=%d, ccnt=%d\n",
+ PRINTF(cb,
+ "scnt=%d, rcnt=%d, ccnt=%d\n",
scnt, rcnt, ccnt);
return;
}
@@ -986,27 +1296,27 @@ static void wlat_test(struct krping_cb *cb)
for (i=0; i < cycle_iters; i++) {
sum_post += post_cycles_stop[i] - post_cycles_start[i];
sum_poll += poll_cycles_stop[i] - poll_cycles_start[i];
- sum_last_poll += poll_cycles_stop[i] - last_poll_cycles_start[i];
+ sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i];
}
-
- log(LOG_ERR, "delta sec %zu delta usec %lu iter %d size %d cycle_iters %d sum_post %llu sum_poll %llu sum_last_poll %llu\n",
+ PRINTF(cb,
+ "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d"
+ " sum_post %llu sum_poll %llu sum_last_poll %llu\n",
stop_tv.tv_sec - start_tv.tv_sec,
stop_tv.tv_usec - start_tv.tv_usec,
scnt, cb->size, cycle_iters,
(unsigned long long)sum_post, (unsigned long long)sum_poll,
(unsigned long long)sum_last_poll);
-
- free_cycle_mem(post_cycles_start, post_cycles_stop, poll_cycles_start,
- poll_cycles_stop, last_poll_cycles_start);
+ kfree(post_cycles_start);
+ kfree(post_cycles_stop);
+ kfree(poll_cycles_start);
+ kfree(poll_cycles_stop);
+ kfree(last_poll_cycles_start);
}
static void bw_test(struct krping_cb *cb)
{
int ccnt, scnt, rcnt;
int iters=cb->count;
- ccnt = 0;
- scnt = 0;
- rcnt = 0;
struct timeval start_tv, stop_tv;
cycles_t *post_cycles_start, *post_cycles_stop;
cycles_t *poll_cycles_start, *poll_cycles_stop;
@@ -1014,16 +1324,37 @@ static void bw_test(struct krping_cb *cb)
cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0;
int i;
int cycle_iters = 1000;
- int err;
- err = alloc_cycle_mem(cycle_iters, &post_cycles_start, &post_cycles_stop,
- &poll_cycles_start, &poll_cycles_stop, &last_poll_cycles_start);
-
- if (err) {
- log(LOG_ERR, "%s kmalloc failed\n", __FUNCTION__);
+ ccnt = 0;
+ scnt = 0;
+ rcnt = 0;
+
+ post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
+ if (!post_cycles_start) {
+ PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__);
+ return;
+ }
+ post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
+ if (!post_cycles_stop) {
+ PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__);
+ return;
+ }
+ poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
+ if (!poll_cycles_start) {
+ PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__);
+ return;
+ }
+ poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL);
+ if (!poll_cycles_stop) {
+ PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__);
+ return;
+ }
+ last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t),
+ GFP_KERNEL);
+ if (!last_poll_cycles_start) {
+ PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__);
return;
}
-
cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
@@ -1040,7 +1371,8 @@ static void bw_test(struct krping_cb *cb)
if (scnt < cycle_iters)
post_cycles_start[scnt] = get_cycles();
if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) {
- log(LOG_ERR, "Couldn't post send: scnt=%d\n",
+ PRINTF(cb,
+ "Couldn't post send: scnt=%d\n",
scnt);
return;
}
@@ -1057,7 +1389,8 @@ static void bw_test(struct krping_cb *cb)
poll_cycles_start[ccnt] = get_cycles();
do {
if (ccnt < cycle_iters)
- last_poll_cycles_start[ccnt] = get_cycles();
+ last_poll_cycles_start[ccnt] =
+ get_cycles();
ne = ib_poll_cq(cb->cq, 1, &wc);
} while (ne == 0);
if (ccnt < cycle_iters)
@@ -1065,13 +1398,15 @@ static void bw_test(struct krping_cb *cb)
ccnt += 1;
if (ne < 0) {
- log(LOG_ERR, "poll CQ failed %d\n", ne);
+ PRINTF(cb, "poll CQ failed %d\n", ne);
return;
}
if (wc.status != IB_WC_SUCCESS) {
- log(LOG_ERR, "Completion wth error at %s:\n",
+ PRINTF(cb,
+ "Completion wth error at %s:\n",
cb->server ? "server" : "client");
- log(LOG_ERR, "Failed status %d: wr_id %d\n",
+ PRINTF(cb,
+ "Failed status %d: wr_id %d\n",
wc.status, (int) wc.wr_id);
return;
}
@@ -1087,18 +1422,21 @@ static void bw_test(struct krping_cb *cb)
for (i=0; i < cycle_iters; i++) {
sum_post += post_cycles_stop[i] - post_cycles_start[i];
sum_poll += poll_cycles_stop[i] - poll_cycles_start[i];
- sum_last_poll += poll_cycles_stop[i] - last_poll_cycles_start[i];
+ sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i];
}
-
- log(LOG_ERR, "delta sec %zu delta usec %lu iter %d size %d cycle_iters %d sum_post %llu sum_poll %llu sum_last_poll %llu\n",
+ PRINTF(cb,
+ "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d"
+ " sum_post %llu sum_poll %llu sum_last_poll %llu\n",
stop_tv.tv_sec - start_tv.tv_sec,
stop_tv.tv_usec - start_tv.tv_usec,
scnt, cb->size, cycle_iters,
(unsigned long long)sum_post, (unsigned long long)sum_poll,
(unsigned long long)sum_last_poll);
-
- free_cycle_mem(post_cycles_start, post_cycles_stop, poll_cycles_start,
- poll_cycles_stop, last_poll_cycles_start);
+ kfree(post_cycles_start);
+ kfree(post_cycles_stop);
+ kfree(poll_cycles_start);
+ kfree(poll_cycles_stop);
+ kfree(last_poll_cycles_start);
}
static void krping_rlat_test_server(struct krping_cb *cb)
@@ -1113,28 +1451,25 @@ static void krping_rlat_test_server(struct krping_cb *cb)
}
/* Send STAG/TO/Len to client */
- if (cb->dma_mr)
- krping_format_send(cb, cb->start_addr, cb->dma_mr);
- else
- krping_format_send(cb, cb->start_addr, cb->start_mr);
+ krping_format_send(cb, cb->start_dma_addr);
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
if (ret) {
- log(LOG_ERR, "post send error %d\n", ret);
+ PRINTF(cb, "post send error %d\n", ret);
return;
}
/* Spin waiting for send completion */
while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
if (ret < 0) {
- log(LOG_ERR, "poll error %d\n", ret);
+ PRINTF(cb, "poll error %d\n", ret);
return;
}
if (wc.status) {
- log(LOG_ERR, "send completiong error %d\n", wc.status);
+ PRINTF(cb, "send completiong error %d\n", wc.status);
return;
}
- krping_wait(cb, ERROR);
+ wait_event_interruptible(cb->sem, cb->state == ERROR);
}
static void krping_wlat_test_server(struct krping_cb *cb)
@@ -1149,29 +1484,26 @@ static void krping_wlat_test_server(struct krping_cb *cb)
}
/* Send STAG/TO/Len to client */
- if (cb->dma_mr)
- krping_format_send(cb, cb->start_addr, cb->dma_mr);
- else
- krping_format_send(cb, cb->start_addr, cb->start_mr);
+ krping_format_send(cb, cb->start_dma_addr);
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
if (ret) {
- log(LOG_ERR, "post send error %d\n", ret);
+ PRINTF(cb, "post send error %d\n", ret);
return;
}
/* Spin waiting for send completion */
while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
if (ret < 0) {
- log(LOG_ERR, "poll error %d\n", ret);
+ PRINTF(cb, "poll error %d\n", ret);
return;
}
if (wc.status) {
- log(LOG_ERR, "send completiong error %d\n", wc.status);
+ PRINTF(cb, "send completiong error %d\n", wc.status);
return;
}
wlat_test(cb);
- krping_wait(cb, ERROR);
+ wait_event_interruptible(cb->sem, cb->state == ERROR);
}
static void krping_bw_test_server(struct krping_cb *cb)
@@ -1186,30 +1518,48 @@ static void krping_bw_test_server(struct krping_cb *cb)
}
/* Send STAG/TO/Len to client */
- if (cb->dma_mr)
- krping_format_send(cb, cb->start_addr, cb->dma_mr);
- else
- krping_format_send(cb, cb->start_addr, cb->start_mr);
+ krping_format_send(cb, cb->start_dma_addr);
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
if (ret) {
- log(LOG_ERR, "post send error %d\n", ret);
+ PRINTF(cb, "post send error %d\n", ret);
return;
}
/* Spin waiting for send completion */
while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
if (ret < 0) {
- log(LOG_ERR, "poll error %d\n", ret);
+ PRINTF(cb, "poll error %d\n", ret);
return;
}
if (wc.status) {
- log(LOG_ERR, "send completiong error %d\n", wc.status);
+ PRINTF(cb, "send completiong error %d\n", wc.status);
return;
}
if (cb->duplex)
bw_test(cb);
- krping_wait(cb, ERROR);
+ wait_event_interruptible(cb->sem, cb->state == ERROR);
+}
+
+static int fastreg_supported(struct krping_cb *cb)
+{
+ struct ib_device *dev = cb->child_cm_id->device;
+ struct ib_device_attr attr;
+ int ret;
+
+ ret = ib_query_device(dev, &attr);
+ if (ret) {
+ PRINTF(cb, "ib_query_device failed ret %d\n", ret);
+ return 0;
+ }
+ if (!(attr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) {
+ PRINTF(cb, "Fastreg not supported - device_cap_flags 0x%x\n",
+ attr.device_cap_flags);
+ return 0;
+ }
+ DEBUG_LOG(cb, "Fastreg supported - device_cap_flags 0x%x\n",
+ attr.device_cap_flags);
+ return 1;
}
static int krping_bind_server(struct krping_cb *cb)
@@ -1225,25 +1575,28 @@ static int krping_bind_server(struct krping_cb *cb)
ret = rdma_bind_addr(cb->cm_id, (struct sockaddr *) &sin);
if (ret) {
- log(LOG_ERR, "rdma_bind_addr error %d\n", ret);
+ PRINTF(cb, "rdma_bind_addr error %d\n", ret);
return ret;
}
- DEBUG_LOG(PFX "rdma_bind_addr successful\n");
+ DEBUG_LOG(cb, "rdma_bind_addr successful\n");
- DEBUG_LOG(PFX "rdma_listen\n");
+ DEBUG_LOG(cb, "rdma_listen\n");
ret = rdma_listen(cb->cm_id, 3);
if (ret) {
- log(LOG_ERR, "rdma_listen failed: %d\n", ret);
+ PRINTF(cb, "rdma_listen failed: %d\n", ret);
return ret;
}
- krping_wait(cb, CONNECT_REQUEST);
+ wait_event_interruptible(cb->sem, cb->state >= CONNECT_REQUEST);
if (cb->state != CONNECT_REQUEST) {
- log(LOG_ERR, "wait for CONNECT_REQUEST state %d\n",
+ PRINTF(cb, "wait for CONNECT_REQUEST state %d\n",
cb->state);
return -1;
}
+ if (cb->mem == FASTREG && !fastreg_supported(cb))
+ return -EINVAL;
+
return 0;
}
@@ -1258,25 +1611,25 @@ static void krping_run_server(struct krping_cb *cb)
ret = krping_setup_qp(cb, cb->child_cm_id);
if (ret) {
- log(LOG_ERR, "setup_qp failed: %d\n", ret);
- return;
+ PRINTF(cb, "setup_qp failed: %d\n", ret);
+ goto err0;
}
ret = krping_setup_buffers(cb);
if (ret) {
- log(LOG_ERR, "krping_setup_buffers failed: %d\n", ret);
+ PRINTF(cb, "krping_setup_buffers failed: %d\n", ret);
goto err1;
}
ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
if (ret) {
- log(LOG_ERR, "ib_post_recv failed: %d\n", ret);
+ PRINTF(cb, "ib_post_recv failed: %d\n", ret);
goto err2;
}
ret = krping_accept(cb);
if (ret) {
- log(LOG_ERR, "connect error %d\n", ret);
+ PRINTF(cb, "connect error %d\n", ret);
goto err2;
}
@@ -1288,13 +1641,13 @@ static void krping_run_server(struct krping_cb *cb)
krping_bw_test_server(cb);
else
krping_test_server(cb);
-
rdma_disconnect(cb->child_cm_id);
- rdma_destroy_id(cb->child_cm_id);
err2:
krping_free_buffers(cb);
err1:
krping_free_qp(cb);
+err0:
+ rdma_destroy_id(cb->child_cm_id);
}
static void krping_test_client(struct krping_cb *cb)
@@ -1320,41 +1673,38 @@ static void krping_test_client(struct krping_cb *cb)
start = 65;
cb->start_buf[cb->size - 1] = 0;
- if (cb->dma_mr)
- krping_format_send(cb, cb->start_addr, cb->dma_mr);
- else
- krping_format_send(cb, cb->start_addr, cb->start_mr);
-
+ krping_format_send(cb, cb->start_dma_addr);
+ if (cb->state == ERROR) {
+ PRINTF(cb, "krping_format_send failed\n");
+ break;
+ }
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
if (ret) {
- log(LOG_ERR, "post send error %d\n", ret);
+ PRINTF(cb, "post send error %d\n", ret);
break;
}
/* Wait for server to ACK */
- krping_wait(cb, RDMA_WRITE_ADV);
+ wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV);
if (cb->state != RDMA_WRITE_ADV) {
- log(LOG_ERR,
+ PRINTF(cb,
"wait for RDMA_WRITE_ADV state %d\n",
cb->state);
break;
}
- if (cb->dma_mr)
- krping_format_send(cb, cb->rdma_addr, cb->dma_mr);
- else
- krping_format_send(cb, cb->rdma_addr, cb->rdma_mr);
-
+ krping_format_send(cb, cb->rdma_dma_addr);
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
if (ret) {
- log(LOG_ERR, "post send error %d\n", ret);
+ PRINTF(cb, "post send error %d\n", ret);
break;
}
/* Wait for the server to say the RDMA Write is complete. */
- krping_wait(cb, RDMA_WRITE_COMPLETE);
+ wait_event_interruptible(cb->sem,
+ cb->state >= RDMA_WRITE_COMPLETE);
if (cb->state != RDMA_WRITE_COMPLETE) {
- log(LOG_ERR,
+ PRINTF(cb,
"wait for RDMA_WRITE_COMPLETE state %d\n",
cb->state);
break;
@@ -1362,12 +1712,15 @@ static void krping_test_client(struct krping_cb *cb)
if (cb->validate)
if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) {
- log(LOG_ERR, "data mismatch!\n");
+ PRINTF(cb, "data mismatch!\n");
break;
}
if (cb->verbose)
- DEBUG_LOG("ping data: %s\n", cb->rdma_buf);
+ PRINTF(cb, "ping data: %s\n", cb->rdma_buf);
+#ifdef SLOW_KRPING
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+#endif
}
}
@@ -1380,24 +1733,25 @@ static void krping_rlat_test_client(struct krping_cb *cb)
cb->state = RDMA_READ_ADV;
/* Send STAG/TO/Len to client */
- if (cb->dma_mr)
- krping_format_send(cb, cb->start_addr, cb->dma_mr);
- else
- krping_format_send(cb, cb->start_addr, cb->rdma_mr);
+ krping_format_send(cb, cb->start_dma_addr);
+ if (cb->state == ERROR) {
+ PRINTF(cb, "krping_format_send failed\n");
+ return;
+ }
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
if (ret) {
- log(LOG_ERR, "post send error %d\n", ret);
+ PRINTF(cb, "post send error %d\n", ret);
return;
}
/* Spin waiting for send completion */
while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
if (ret < 0) {
- log(LOG_ERR, "poll error %d\n", ret);
+ PRINTF(cb, "poll error %d\n", ret);
return;
}
if (wc.status) {
- log(LOG_ERR, "send completion error %d\n", wc.status);
+ PRINTF(cb, "send completion error %d\n", wc.status);
return;
}
@@ -1426,20 +1780,20 @@ static void krping_rlat_test_client(struct krping_cb *cb)
microtime(&start);
for (i=0; i < 100000; i++) {
if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) {
- log(LOG_ERR, "Couldn't post send\n");
+ PRINTF(cb, "Couldn't post send\n");
return;
}
do {
ne = ib_poll_cq(cb->cq, 1, &wc);
} while (ne == 0);
if (ne < 0) {
- log(LOG_ERR, "poll CQ failed %d\n", ne);
+ PRINTF(cb, "poll CQ failed %d\n", ne);
return;
}
if (wc.status != IB_WC_SUCCESS) {
- log(LOG_ERR, "Completion wth error at %s:\n",
+ PRINTF(cb, "Completion wth error at %s:\n",
cb->server ? "server" : "client");
- log(LOG_ERR, "Failed status %d: wr_id %d\n",
+ PRINTF(cb, "Failed status %d: wr_id %d\n",
wc.status, (int) wc.wr_id);
return;
}
@@ -1453,7 +1807,7 @@ static void krping_rlat_test_client(struct krping_cb *cb)
sec = stop.tv_sec - start.tv_sec;
usec = stop.tv_usec - start.tv_usec;
elapsed = sec * 1000000 + usec;
- log(LOG_ERR, "0B-write-lat iters 100000 usec %llu\n", elapsed);
+ PRINTF(cb, "0B-write-lat iters 100000 usec %llu\n", elapsed);
}
#endif
@@ -1469,24 +1823,25 @@ static void krping_wlat_test_client(struct krping_cb *cb)
cb->state = RDMA_READ_ADV;
/* Send STAG/TO/Len to client */
- if (cb->dma_mr)
- krping_format_send(cb, cb->start_addr, cb->dma_mr);
- else
- krping_format_send(cb, cb->start_addr, cb->start_mr);
+ krping_format_send(cb, cb->start_dma_addr);
+ if (cb->state == ERROR) {
+ PRINTF(cb, "krping_format_send failed\n");
+ return;
+ }
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
if (ret) {
- log(LOG_ERR, "post send error %d\n", ret);
+ PRINTF(cb, "post send error %d\n", ret);
return;
}
/* Spin waiting for send completion */
while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
if (ret < 0) {
- log(LOG_ERR, "poll error %d\n", ret);
+ PRINTF(cb, "poll error %d\n", ret);
return;
}
if (wc.status) {
- log(LOG_ERR, "send completion error %d\n", wc.status);
+ PRINTF(cb, "send completion error %d\n", wc.status);
return;
}
@@ -1507,24 +1862,25 @@ static void krping_bw_test_client(struct krping_cb *cb)
cb->state = RDMA_READ_ADV;
/* Send STAG/TO/Len to client */
- if (cb->dma_mr)
- krping_format_send(cb, cb->start_addr, cb->dma_mr);
- else
- krping_format_send(cb, cb->start_addr, cb->start_mr);
+ krping_format_send(cb, cb->start_dma_addr);
+ if (cb->state == ERROR) {
+ PRINTF(cb, "krping_format_send failed\n");
+ return;
+ }
ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
if (ret) {
- log(LOG_ERR, "post send error %d\n", ret);
+ PRINTF(cb, "post send error %d\n", ret);
return;
}
/* Spin waiting for send completion */
while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
if (ret < 0) {
- log(LOG_ERR, "poll error %d\n", ret);
+ PRINTF(cb, "poll error %d\n", ret);
return;
}
if (wc.status) {
- log(LOG_ERR, "send completion error %d\n", wc.status);
+ PRINTF(cb, "send completion error %d\n", wc.status);
return;
}
@@ -1536,6 +1892,121 @@ static void krping_bw_test_client(struct krping_cb *cb)
bw_test(cb);
}
+static void krping_fr_test(struct krping_cb *cb)
+{
+ struct ib_fast_reg_page_list *pl;
+ struct ib_send_wr fr, inv, *bad;
+ struct ib_wc wc;
+ u8 key = 0;
+ struct ib_mr *mr;
+ int i;
+ int ret;
+ int size = cb->size;
+ int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+ time_t start;
+ int count = 0;
+ int scnt = 0;
+
+ pl = ib_alloc_fast_reg_page_list(cb->qp->device, plen);
+ if (IS_ERR(pl)) {
+ PRINTF(cb, "ib_alloc_fast_reg_page_list failed %ld\n", PTR_ERR(pl));
+ return;
+ }
+
+ mr = ib_alloc_fast_reg_mr(cb->pd, plen);
+ if (IS_ERR(mr)) {
+ PRINTF(cb, "ib_alloc_fast_reg_mr failed %ld\n", PTR_ERR(pl));
+ goto err1;
+ }
+
+ for (i=0; i<plen; i++)
+ pl->page_list[i] = 0xcafebabe | i;
+
+ memset(&fr, 0, sizeof fr);
+ fr.opcode = IB_WR_FAST_REG_MR;
+ fr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ fr.wr.fast_reg.length = size;
+ fr.wr.fast_reg.page_list = pl;
+ fr.wr.fast_reg.page_list_len = plen;
+ fr.wr.fast_reg.iova_start = 0;
+ fr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE;
+ fr.next = &inv;
+ memset(&inv, 0, sizeof inv);
+ inv.opcode = IB_WR_LOCAL_INV;
+ inv.send_flags = IB_SEND_SIGNALED;
+
+ DEBUG_LOG(cb, "fr_test: stag index 0x%x plen %u size %u depth %u\n", mr->rkey >> 8, plen, cb->size, cb->txdepth);
+ start = time_uptime;
+ while (1) {
+ if ((time_uptime - start) >= 9) {
+ DEBUG_LOG(cb, "fr_test: pausing 1 second! count %u latest size %u plen %u\n", count, size, plen);
+ wait_event_interruptible(cb->sem, cb->state == ERROR);
+ if (cb->state == ERROR)
+ break;
+ start = time_uptime;
+ }
+ while (scnt < (cb->txdepth>>1)) {
+ ib_update_fast_reg_key(mr, ++key);
+ fr.wr.fast_reg.rkey = mr->rkey;
+ inv.ex.invalidate_rkey = mr->rkey;
+ size = arc4random() % cb->size;
+ if (size == 0)
+ size = cb->size;
+ plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT;
+ fr.wr.fast_reg.length = size;
+ fr.wr.fast_reg.page_list_len = plen;
+ ret = ib_post_send(cb->qp, &fr, &bad);
+ if (ret) {
+ PRINTF(cb, "ib_post_send failed %d\n", ret);
+ goto err2;
+ }
+ scnt++;
+ }
+
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ goto err2;
+ }
+ if (ret == 1) {
+ if (wc.status) {
+ PRINTF(cb, "completion error %u\n", wc.status);
+ goto err2;
+ }
+ count++;
+ scnt--;
+ }
+ else if (krping_sigpending()) {
+ PRINTF(cb, "signal!\n");
+ goto err2;
+ }
+ } while (ret == 1);
+ }
+err2:
+#if 0
+ DEBUG_LOG(cb, "sleeping 1 second\n");
+ wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ);
+#endif
+ DEBUG_LOG(cb, "draining the cq...\n");
+ do {
+ ret = ib_poll_cq(cb->cq, 1, &wc);
+ if (ret < 0) {
+ PRINTF(cb, "ib_poll_cq failed %d\n", ret);
+ break;
+ }
+ if (ret == 1) {
+ if (wc.status) {
+ PRINTF(cb, "completion error %u opcode %u\n", wc.status, wc.opcode);
+ }
+ }
+ } while (ret == 1);
+ DEBUG_LOG(cb, "fr_test: done!\n");
+ ib_dereg_mr(mr);
+err1:
+ ib_free_fast_reg_page_list(pl);
+}
+
static int krping_connect_client(struct krping_cb *cb)
{
struct rdma_conn_param conn_param;
@@ -1548,17 +2019,17 @@ static int krping_connect_client(struct krping_cb *cb)
ret = rdma_connect(cb->cm_id, &conn_param);
if (ret) {
- log(LOG_ERR, "rdma_connect error %d\n", ret);
+ PRINTF(cb, "rdma_connect error %d\n", ret);
return ret;
}
- krping_wait(cb, CONNECTED);
+ wait_event_interruptible(cb->sem, cb->state >= CONNECTED);
if (cb->state == ERROR) {
- log(LOG_ERR, "wait for CONNECTED state %d\n", cb->state);
+ PRINTF(cb, "wait for CONNECTED state %d\n", cb->state);
return -1;
}
- DEBUG_LOG(PFX "rdma_connect successful\n");
+ DEBUG_LOG(cb, "rdma_connect successful\n");
return 0;
}
@@ -1576,19 +2047,22 @@ static int krping_bind_client(struct krping_cb *cb)
ret = rdma_resolve_addr(cb->cm_id, NULL, (struct sockaddr *) &sin,
2000);
if (ret) {
- log(LOG_ERR, "rdma_resolve_addr error %d\n", ret);
+ PRINTF(cb, "rdma_resolve_addr error %d\n", ret);
return ret;
}
- krping_wait(cb, ROUTE_RESOLVED);
+ wait_event_interruptible(cb->sem, cb->state >= ROUTE_RESOLVED);
if (cb->state != ROUTE_RESOLVED) {
- log(LOG_ERR,
+ PRINTF(cb,
"addr/route resolution did not resolve: state %d\n",
cb->state);
- return EINTR;
+ return -EINTR;
}
- DEBUG_LOG(PFX "rdma_resolve_addr - rdma_resolve_route successful\n");
+ if (cb->mem == FASTREG && !fastreg_supported(cb))
+ return -EINVAL;
+
+ DEBUG_LOG(cb, "rdma_resolve_addr - rdma_resolve_route successful\n");
return 0;
}
@@ -1603,25 +2077,25 @@ static void krping_run_client(struct krping_cb *cb)
ret = krping_setup_qp(cb, cb->cm_id);
if (ret) {
- log(LOG_ERR, "setup_qp failed: %d\n", ret);
+ PRINTF(cb, "setup_qp failed: %d\n", ret);
return;
}
ret = krping_setup_buffers(cb);
if (ret) {
- log(LOG_ERR, "krping_setup_buffers failed: %d\n", ret);
+ PRINTF(cb, "krping_setup_buffers failed: %d\n", ret);
goto err1;
}
ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
if (ret) {
- log(LOG_ERR, "ib_post_recv failed: %d\n", ret);
+ PRINTF(cb, "ib_post_recv failed: %d\n", ret);
goto err2;
}
ret = krping_connect_client(cb);
if (ret) {
- log(LOG_ERR, "connect error %d\n", ret);
+ PRINTF(cb, "connect error %d\n", ret);
goto err2;
}
@@ -1631,6 +2105,8 @@ static void krping_run_client(struct krping_cb *cb)
krping_rlat_test_client(cb);
else if (cb->bw)
krping_bw_test_client(cb);
+ else if (cb->frtest)
+ krping_fr_test(cb);
else
krping_test_client(cb);
rdma_disconnect(cb->cm_id);
@@ -1640,119 +2116,137 @@ err1:
krping_free_qp(cb);
}
-int krping_doit(char *cmd)
+int krping_doit(char *cmd, void *cookie)
{
struct krping_cb *cb;
int op;
int ret = 0;
char *optarg;
unsigned long optint;
- debug = 0;
- cb = malloc(sizeof(*cb), M_DEVBUF, M_WAITOK);
+ cb = kzalloc(sizeof(*cb), GFP_KERNEL);
if (!cb)
- return ENOMEM;
- bzero(cb, sizeof *cb);
+ return -ENOMEM;
- mtx_lock(&krping_mutex);
- TAILQ_INSERT_TAIL(&krping_cbs, cb, list);
- mtx_unlock(&krping_mutex);
+ mutex_lock(&krping_mutex);
+ list_add_tail(&cb->list, &krping_cbs);
+ mutex_unlock(&krping_mutex);
+ cb->cookie = cookie;
cb->server = -1;
cb->state = IDLE;
cb->size = 64;
cb->txdepth = RPING_SQ_DEPTH;
- cb->use_dmamr = 1;
- cb->memlimit = 0;
- mtx_init(&cb->lock, "krping mtx", NULL, MTX_DUPOK|MTX_DEF);
+ cb->mem = DMA;
+ init_waitqueue_head(&cb->sem);
while ((op = krping_getopt("krping", &cmd, krping_opts, NULL, &optarg,
&optint)) != 0) {
switch (op) {
case 'a':
cb->addr_str = optarg;
- DEBUG_LOG(PFX "ipaddr (%s)\n", optarg);
+ DEBUG_LOG(cb, "ipaddr (%s)\n", optarg);
if (!inet_aton(optarg, &cb->addr)) {
- log(LOG_ERR, "bad addr string %s\n", optarg);
+ PRINTF(cb, "bad addr string %s\n",
+ optarg);
ret = EINVAL;
}
break;
- case 'D':
- cb->use_dmamr = 1;
- DEBUG_LOG(PFX "using dma mr\n");
- break;
case 'p':
cb->port = htons(optint);
- DEBUG_LOG(PFX "port %d\n", (int)optint);
+ DEBUG_LOG(cb, "port %d\n", (int)optint);
break;
case 'P':
cb->poll = 1;
- DEBUG_LOG("server\n");
+ DEBUG_LOG(cb, "server\n");
break;
case 's':
cb->server = 1;
- DEBUG_LOG(PFX "server\n");
+ DEBUG_LOG(cb, "server\n");
break;
case 'c':
cb->server = 0;
- DEBUG_LOG(PFX "client\n");
+ DEBUG_LOG(cb, "client\n");
break;
case 'S':
cb->size = optint;
if ((cb->size < 1) ||
(cb->size > RPING_BUFSIZE)) {
- log(LOG_ERR, "Invalid size %d "
+ PRINTF(cb, "Invalid size %d "
"(valid range is 1 to %d)\n",
cb->size, RPING_BUFSIZE);
ret = EINVAL;
} else
- DEBUG_LOG(PFX "size %d\n", (int)optint);
+ DEBUG_LOG(cb, "size %d\n", (int)optint);
break;
case 'C':
cb->count = optint;
if (cb->count < 0) {
- log(LOG_ERR, "Invalid count %d\n",
+ PRINTF(cb, "Invalid count %d\n",
cb->count);
ret = EINVAL;
} else
- DEBUG_LOG(PFX "count %d\n", (int) cb->count);
+ DEBUG_LOG(cb, "count %d\n", (int) cb->count);
break;
case 'v':
cb->verbose++;
- DEBUG_LOG(PFX "verbose\n");
+ DEBUG_LOG(cb, "verbose\n");
break;
case 'V':
cb->validate++;
- DEBUG_LOG(PFX "validate data\n");
- break;
- case 'L':
- cb->rlat++;
+ DEBUG_LOG(cb, "validate data\n");
break;
case 'l':
cb->wlat++;
break;
+ case 'L':
+ cb->rlat++;
+ break;
case 'B':
cb->bw++;
break;
- case 't':
- cb->txdepth = optint;
- DEBUG_LOG(PFX "txdepth %d\n", cb->txdepth);
- break;
case 'd':
- debug++;
+ cb->duplex++;
break;
case 'm':
- cb->memlimit = optint;
- if (cb->memlimit < 1) {
- log(LOG_ERR, "Invalid memory limit %ju\n",
- cb->memlimit);
- ret = EINVAL;
- } else
- DEBUG_LOG(PFX "memory limit %d\n", (int)optint);
- break;
+ if (!strncmp(optarg, "dma", 3))
+ cb->mem = DMA;
+ else if (!strncmp(optarg, "fastreg", 7))
+ cb->mem = FASTREG;
+ else if (!strncmp(optarg, "mw", 2))
+ cb->mem = MW;
+ else if (!strncmp(optarg, "mr", 2))
+ cb->mem = MR;
+ else {
+ PRINTF(cb, "unknown mem mode %s. "
+ "Must be dma, fastreg, mw, or mr\n",
+ optarg);
+ ret = -EINVAL;
+ break;
+ }
+ break;
+ case 'I':
+ cb->server_invalidate = 1;
+ break;
+ case 'T':
+ cb->txdepth = optint;
+ DEBUG_LOG(cb, "txdepth %d\n", (int) cb->txdepth);
+ break;
+ case 'Z':
+ cb->local_dma_lkey = 1;
+ DEBUG_LOG(cb, "using local dma lkey\n");
+ break;
+ case 'R':
+ cb->read_inv = 1;
+ DEBUG_LOG(cb, "using read-with-inv\n");
+ break;
+ case 'f':
+ cb->frtest = 1;
+ DEBUG_LOG(cb, "fast-reg test!\n");
+ break;
default:
- log(LOG_ERR, "unknown opt %s\n", optarg);
- ret = EINVAL;
+ PRINTF(cb, "unknown opt %s\n", optarg);
+ ret = -EINVAL;
break;
}
}
@@ -1760,46 +2254,77 @@ int krping_doit(char *cmd)
goto out;
if (cb->server == -1) {
- log(LOG_ERR, "must be either client or server\n");
- ret = EINVAL;
+ PRINTF(cb, "must be either client or server\n");
+ ret = -EINVAL;
goto out;
}
- if ((cb->bw + cb->rlat + cb->wlat) > 1) {
- log(LOG_ERR, "Pick only one test: bw, rlat, wlat\n");
- ret = EINVAL;
+
+ if (cb->server && cb->frtest) {
+ PRINTF(cb, "must be client to run frtest\n");
+ ret = -EINVAL;
goto out;
}
+ if ((cb->frtest + cb->bw + cb->rlat + cb->wlat) > 1) {
+ PRINTF(cb, "Pick only one test: fr, bw, rlat, wlat\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (cb->server_invalidate && cb->mem != FASTREG) {
+ PRINTF(cb, "server_invalidate only valid with fastreg mem_mode\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (cb->read_inv && cb->mem != FASTREG) {
+ PRINTF(cb, "read_inv only valid with fastreg mem_mode\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (cb->mem != MR && (cb->wlat || cb->rlat || cb->bw)) {
+ PRINTF(cb, "wlat, rlat, and bw tests only support mem_mode MR\n");
+ ret = -EINVAL;
+ goto out;
+ }
cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP);
if (IS_ERR(cb->cm_id)) {
ret = PTR_ERR(cb->cm_id);
- log(LOG_ERR, "rdma_create_id error %d\n", ret);
+ PRINTF(cb, "rdma_create_id error %d\n", ret);
goto out;
}
- DEBUG_LOG(PFX "created cm_id %p\n", cb->cm_id);
+ DEBUG_LOG(cb, "created cm_id %p\n", cb->cm_id);
+
if (cb->server)
krping_run_server(cb);
else
krping_run_client(cb);
- DEBUG_LOG(PFX "destroy cm_id %p\n", cb->cm_id);
-
- mtx_lock(&cb->lock);
- cb->state = CLEANUP;
- wakeup(cb);
- mtx_unlock(&cb->lock);
+ DEBUG_LOG(cb, "destroy cm_id %p\n", cb->cm_id);
rdma_destroy_id(cb->cm_id);
out:
- mtx_lock(&krping_mutex);
- TAILQ_REMOVE(&krping_cbs, cb, list);
- mtx_unlock(&krping_mutex);
- free(cb, M_DEVBUF);
+ mutex_lock(&krping_mutex);
+ list_del(&cb->list);
+ mutex_unlock(&krping_mutex);
+ kfree(cb);
return ret;
}
+void
+krping_walk_cb_list(void (*f)(struct krping_stats *, void *), void *arg)
+{
+ struct krping_cb *cb;
+
+ mutex_lock(&krping_mutex);
+ list_for_each_entry(cb, &krping_cbs, list)
+ (*f)(cb->pd ? &cb->stats : NULL, arg);
+ mutex_unlock(&krping_mutex);
+}
+
void krping_init(void)
{
- mtx_init(&krping_mutex, "krping lock", NULL, MTX_DEF);
- TAILQ_INIT(&krping_cbs);
+
+ mutex_init(&krping_mutex);
}
diff --git a/sys/contrib/rdma/krping/krping.h b/sys/contrib/rdma/krping/krping.h
index 5cced30..04be531 100644
--- a/sys/contrib/rdma/krping/krping.h
+++ b/sys/contrib/rdma/krping/krping.h
@@ -1,133 +1,21 @@
/*
* $FreeBSD$
*/
-#include <rdma/ib_verbs.h>
-#include <netinet/in.h>
-
-/*
- * Krping header stuffs...
- */
struct krping_stats {
- unsigned send_bytes;
- unsigned send_msgs;
- unsigned recv_bytes;
- unsigned recv_msgs;
- unsigned write_bytes;
- unsigned write_msgs;
- unsigned read_bytes;
- unsigned read_msgs;
-};
-
-
-/*
- * These states are used to signal events between the completion handler
- * and the main client or server thread.
- *
- * Once CONNECTED, they cycle through RDMA_READ_ADV, RDMA_WRITE_ADV,
- * and RDMA_WRITE_COMPLETE for each ping.
- */
-enum test_state {
- IDLE = 1,
- CONNECT_REQUEST,
- ADDR_RESOLVED,
- ROUTE_RESOLVED,
- CONNECTED,
- RDMA_READ_ADV,
- RDMA_READ_COMPLETE,
- RDMA_WRITE_ADV,
- RDMA_WRITE_COMPLETE,
- ERROR,
- CLEANUP
-};
-
-struct krping_rdma_info {
- uint64_t buf;
- uint32_t rkey;
- uint32_t size;
-};
-
-/*
- * Control block struct.
- */
-struct krping_cb {
- int server; /* 0 iff client */
- struct ib_cq *cq;
- struct ib_pd *pd;
- struct ib_qp *qp;
- struct ib_mr *dma_mr;
- int use_dmamr;
-
- struct ib_recv_wr rq_wr; /* recv work request record */
- struct ib_sge recv_sgl; /* recv single SGE */
- struct krping_rdma_info recv_buf;/* malloc'd buffer */
- struct ib_mr *recv_mr;
-
- struct ib_send_wr sq_wr; /* send work requrest record */
- struct ib_sge send_sgl;
- struct krping_rdma_info send_buf;/* single send buf */
- struct ib_mr *send_mr;
-
- struct ib_send_wr rdma_sq_wr; /* rdma work request record */
- struct ib_sge rdma_sgl; /* rdma single SGE */
- char *rdma_buf; /* used as rdma sink */
- u64 rdma_addr;
- struct ib_mr *rdma_mr;
-
- uint32_t remote_rkey; /* remote guys RKEY */
- uint64_t remote_addr; /* remote guys TO */
- uint32_t remote_len; /* remote guys LEN */
-
- char *start_buf; /* rdma read src */
- u64 start_addr;
- struct ib_mr *start_mr;
-
- enum test_state state; /* used for cond/signalling */
- struct mtx lock;
- struct krping_stats stats;
-
- uint16_t port; /* dst port in NBO */
- struct in_addr addr; /* dst addr in NBO */
- char *addr_str; /* dst addr string */
- int verbose; /* verbose logging */
- int count; /* ping count */
- int size; /* ping data size */
- int validate; /* validate ping data */
- uint64_t memlimit; /* limit of the physical memory that
- can be registered with dma_mr mode */
-
- /* CM stuff */
- struct rdma_cm_id *cm_id; /* connection on client side,*/
- /* listener on service side. */
- struct rdma_cm_id *child_cm_id; /* connection on server side */
- TAILQ_ENTRY(krping_cb) list;
-
- int rlat; /* run read latency test */
- int wlat; /* run write latency test */
- int bw; /* run write bw test */
- int duplex; /* run write bw full duplex test */
- int poll; /* poll vs block in rlat */
- int txdepth;
-
+ unsigned long long send_bytes;
+ unsigned long long send_msgs;
+ unsigned long long recv_bytes;
+ unsigned long long recv_msgs;
+ unsigned long long write_bytes;
+ unsigned long long write_msgs;
+ unsigned long long read_bytes;
+ unsigned long long read_msgs;
char name[16];
};
-static __inline uint64_t
-get_cycles(void)
-{
- u_int32_t low, high;
- __asm __volatile("rdtsc" : "=a" (low), "=d" (high));
- return (low | ((u_int64_t)high << 32));
-}
-
-#define htonll(x) htobe64((x))
-#define ntohll(x) be64toh((x))
-
-typedef uint64_t cycles_t;
-
-extern struct mtx krping_mutex;
-TAILQ_HEAD(krping_cb_list, krping_cb);
-extern struct krping_cb_list krping_cbs;
-
-int krping_doit(char *cmd);
+int krping_doit(char *, void *);
+void krping_walk_cb_list(void (*)(struct krping_stats *, void *), void *);
void krping_init(void);
+void krping_printf(void *, const char *, ...);
+int krping_sigpending(void);
diff --git a/sys/contrib/rdma/krping/krping_dev.c b/sys/contrib/rdma/krping/krping_dev.c
index d6ab00a..2244d72 100644
--- a/sys/contrib/rdma/krping/krping_dev.c
+++ b/sys/contrib/rdma/krping/krping_dev.c
@@ -1,19 +1,20 @@
/*
- * This code lifted from:
+ * This code lifted from:
* Simple `echo' pseudo-device KLD
* Murray Stokely
* Converted to 5.X by Søren (Xride) Straarup
*/
/*
- * /bin/echo "server,port=9999,addr=192.168.69.142,validate" > /dev/krping
- * /bin/echo "client,port=9999,addr=192.168.69.142,validate" > /dev/krping
+ * /bin/echo "server,port=9999,addr=192.168.69.142,validate" > /dev/krping
+ * /bin/echo "client,port=9999,addr=192.168.69.142,validate" > /dev/krping
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
+#include <sys/module.h>
#include <sys/systm.h> /* uprintf */
#include <sys/errno.h>
#include <sys/param.h> /* defines used in kernel.h */
@@ -21,11 +22,19 @@ __FBSDID("$FreeBSD$");
#include <sys/conf.h> /* cdevsw struct */
#include <sys/uio.h> /* uio struct */
#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <machine/stdarg.h>
#include "krping.h"
#define BUFFERSIZE 512
+SYSCTL_NODE(_dev, OID_AUTO, krping, CTLFLAG_RW, 0, "kernel rping module");
+
+int krping_debug = 0;
+SYSCTL_INT(_dev_krping, OID_AUTO, debug, CTLFLAG_RW, &krping_debug, 0 , "");
+
/* Function prototypes */
static d_open_t krping_open;
static d_close_t krping_close;
@@ -47,12 +56,15 @@ typedef struct s_krping {
int len;
} krping_t;
+struct stats_list_entry {
+ STAILQ_ENTRY(stats_list_entry) link;
+ struct krping_stats *stats;
+};
+STAILQ_HEAD(stats_list, stats_list_entry);
+
/* vars */
static struct cdev *krping_dev;
-#undef MODULE_VERSION
-#include <sys/module.h>
-
static int
krping_loader(struct module *m, int what, void *arg)
{
@@ -61,7 +73,7 @@ krping_loader(struct module *m, int what, void *arg)
switch (what) {
case MOD_LOAD: /* kldload */
krping_init();
- krping_dev = make_dev(&krping_cdevsw, 0, UID_ROOT, GID_WHEEL,
+ krping_dev = make_dev(&krping_cdevsw, 0, UID_ROOT, GID_WHEEL,
0600, "krping");
printf("Krping device loaded.\n");
break;
@@ -73,61 +85,82 @@ krping_loader(struct module *m, int what, void *arg)
err = EOPNOTSUPP;
break;
}
- return err;
+
+ return (err);
}
static int
krping_open(struct cdev *dev, int oflags, int devtype, struct thread *p)
{
- int err = 0;
- return err;
+
+ return (0);
}
static int
krping_close(struct cdev *dev, int fflag, int devtype, struct thread *p)
{
+
return 0;
}
+static void
+krping_copy_stats(struct krping_stats *stats, void *arg)
+{
+ struct stats_list_entry *s;
+ struct stats_list *list = arg;
+
+ s = malloc(sizeof(*s), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (s == NULL)
+ return;
+ if (stats != NULL) {
+ s->stats = malloc(sizeof(*stats), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (s->stats == NULL) {
+ free(s, M_DEVBUF);
+ return;
+ }
+ *s->stats = *stats;
+ }
+ STAILQ_INSERT_TAIL(list, s, link);
+}
+
static int
krping_read(struct cdev *dev, struct uio *uio, int ioflag)
{
- struct krping_cb *cb, *cb2;
- int num=1;
- struct krping_cb_list copy_cbs;
+ int num = 1;
+ struct stats_list list;
+ struct stats_list_entry *e;
+
+ STAILQ_INIT(&list);
+ krping_walk_cb_list(krping_copy_stats, &list);
+
+ if (STAILQ_EMPTY(&list))
+ return (0);
uprintf("krping: %4s %10s %10s %10s %10s %10s %10s %10s %10s %10s\n",
- "num", "device", "snd bytes", "snd msgs", "rcv bytes",
- "rcv msgs", "wr bytes", "wr msgs", "rd bytes", "rd msgs");
- TAILQ_INIT(&copy_cbs);
-
- mtx_lock(&krping_mutex);
- TAILQ_FOREACH(cb, &krping_cbs, list) {
- cb2 = malloc(sizeof(*cb), M_DEVBUF, M_NOWAIT|M_ZERO);
- if (!cb2)
- break;
- bcopy(cb, cb2, sizeof(*cb));
- TAILQ_INSERT_TAIL(&copy_cbs, cb2, list);
- }
- mtx_unlock(&krping_mutex);
-
- while (!TAILQ_EMPTY(&copy_cbs)) {
- cb = TAILQ_FIRST(&copy_cbs);
- TAILQ_REMOVE(&copy_cbs, cb, list);
- if (cb->pd) {
- uprintf("krping: %4d %10s %10u %10u %10u %10u %10u %10u %10u %10u\n",
- num++, cb->name, cb->stats.send_bytes,
- cb->stats.send_msgs, cb->stats.recv_bytes,
- cb->stats.recv_msgs, cb->stats.write_bytes,
- cb->stats.write_msgs,
- cb->stats.read_bytes,
- cb->stats.read_msgs);
- } else {
- uprintf("krping: %d listen\n", num++);
+ "num", "device", "snd bytes", "snd msgs", "rcv bytes", "rcv msgs",
+ "wr bytes", "wr msgs", "rd bytes", "rd msgs");
+
+ while (!STAILQ_EMPTY(&list)) {
+ e = STAILQ_FIRST(&list);
+ STAILQ_REMOVE_HEAD(&list, link);
+ if (e->stats == NULL)
+ uprintf("krping: %d listen\n", num);
+ else {
+ struct krping_stats *stats = e->stats;
+
+ uprintf("krping: %4d %10s %10llu %10llu %10llu %10llu "
+ "%10llu %10llu %10llu %10llu\n", num, stats->name,
+ stats->send_bytes, stats->send_msgs,
+ stats->recv_bytes, stats->recv_msgs,
+ stats->write_bytes, stats->write_msgs,
+ stats->read_bytes, stats->read_msgs);
+ free(stats, M_DEVBUF);
}
- free(cb, M_DEVBUF);
+ num++;
+ free(e, M_DEVBUF);
}
- return 0;
+
+ return (0);
}
static int
@@ -171,9 +204,27 @@ krping_write(struct cdev *dev, struct uio *uio, int ioflag)
*cp = 0;
krpingmsg->len = (unsigned long)(cp - krpingmsg->msg);
uprintf("krping: write string = |%s|\n", krpingmsg->msg);
- err = krping_doit(krpingmsg->msg);
+ err = krping_doit(krpingmsg->msg, curproc);
free(krpingmsg, M_DEVBUF);
return(err);
}
-DEV_MODULE(krping,krping_loader,NULL);
+void
+krping_printf(void *cookie, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vtprintf(cookie, -1, fmt, ap);
+ va_end(ap);
+}
+
+int
+krping_sigpending(void)
+{
+
+ return (SIGPENDING(curthread));
+}
+
+DEV_MODULE(krping, krping_loader, NULL);
+MODULE_DEPEND(krping, ibcore, 1, 1, 1);
OpenPOWER on IntegriCloud