From 71c5e06cccd7a1b993d32b76a3f872129a5c7c8f Mon Sep 17 00:00:00 2001 From: np Date: Mon, 21 Oct 2013 06:31:56 +0000 Subject: MFC r256470: Update krping to the latest upstream code. Move all the FreeBSD specific parts to krping_dev.c, which leaves the other files as close to their upstream versions as possible. Approved by: re (glebius) --- sys/contrib/rdma/krping/getopt.c | 17 +- sys/contrib/rdma/krping/krping.c | 1589 ++++++++++++++++++++++------------ sys/contrib/rdma/krping/krping.h | 136 +-- sys/contrib/rdma/krping/krping_dev.c | 139 ++- 4 files changed, 1173 insertions(+), 708 deletions(-) (limited to 'sys') diff --git a/sys/contrib/rdma/krping/getopt.c b/sys/contrib/rdma/krping/getopt.c index 701910e..24e62e1 100644 --- a/sys/contrib/rdma/krping/getopt.c +++ b/sys/contrib/rdma/krping/getopt.c @@ -5,9 +5,10 @@ #include __FBSDID("$FreeBSD$"); -#include -#include -#include +#include +#include +#include + #include "getopt.h" /** @@ -49,29 +50,29 @@ int krping_getopt(const char *caller, char **options, if (opts->has_arg & OPT_NOPARAM) { return opts->val; } - printf("%s: the %s option requires " + printk(KERN_INFO "%s: the %s option requires " "an argument\n", caller, token); return -EINVAL; } if (opts->has_arg & OPT_INT) { char* v; - *value = strtoul(val, &v, 0); + *value = simple_strtoul(val, &v, 0); if (!*v) { return opts->val; } - printf("%s: invalid numeric value " + printk(KERN_INFO "%s: invalid numeric value " "in %s=%s\n", caller, token, val); return -EDOM; } if (opts->has_arg & OPT_STRING) { return opts->val; } - printf("%s: unexpected argument %s to the " + printk(KERN_INFO "%s: unexpected argument %s to the " "%s option\n", caller, val, token); return -EINVAL; } } - printf("%s: Unrecognized option %s\n", caller, token); + printk(KERN_INFO "%s: Unrecognized option %s\n", caller, token); return -EOPNOTSUPP; } diff --git a/sys/contrib/rdma/krping/krping.c b/sys/contrib/rdma/krping/krping.c index c0acf0c..1aed101 100644 --- a/sys/contrib/rdma/krping/krping.c +++ b/sys/contrib/rdma/krping/krping.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2005 Ammasso, Inc. All rights reserved. - * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved. + * Copyright (c) 2006-2009 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,39 +34,52 @@ #include __FBSDID("$FreeBSD$"); -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include #include -#include "getopt.h" #include "krping.h" +#include "getopt.h" + +extern int krping_debug; +#define DEBUG_LOG(cb, x...) if (krping_debug) krping_printf((cb)->cookie, x) +#define PRINTF(cb, x...) krping_printf((cb)->cookie, x) + +MODULE_AUTHOR("Steve Wise"); +MODULE_DESCRIPTION("RDMA ping client/server"); +MODULE_LICENSE("Dual BSD/GPL"); + +static __inline uint64_t +get_cycles(void) +{ + uint32_t low, high; + __asm __volatile("rdtsc" : "=a" (low), "=d" (high)); + return (low | ((u_int64_t)high << 32)); +} -#define PFX "krping: " +typedef uint64_t cycles_t; -static int debug = 0; -#define DEBUG_LOG if (debug) printf +enum mem_type { + DMA = 1, + FASTREG = 2, + MW = 3, + MR = 4 +}; static const struct krping_option krping_opts[] = { {"count", OPT_INT, 'C'}, @@ -77,23 +90,29 @@ static const struct krping_option krping_opts[] = { {"validate", OPT_NOPARAM, 'V'}, {"server", OPT_NOPARAM, 's'}, {"client", OPT_NOPARAM, 'c'}, - {"dmamr", OPT_NOPARAM, 'D'}, - {"debug", OPT_NOPARAM, 'd'}, - {"wlat", OPT_NOPARAM, 'l'}, - {"rlat", OPT_NOPARAM, 'L'}, - {"bw", OPT_NOPARAM, 'B'}, - {"tx-depth", OPT_INT, 't'}, - {"poll", OPT_NOPARAM, 'P'}, - {"memlimit", OPT_INT, 'm'}, + {"mem_mode", OPT_STRING, 'm'}, + {"server_inv", OPT_NOPARAM, 'I'}, + {"wlat", OPT_NOPARAM, 'l'}, + {"rlat", OPT_NOPARAM, 'L'}, + {"bw", OPT_NOPARAM, 'B'}, + {"duplex", OPT_NOPARAM, 'd'}, + {"txdepth", OPT_INT, 'T'}, + {"poll", OPT_NOPARAM, 'P'}, + {"local_dma_lkey", OPT_NOPARAM, 'Z'}, + {"read_inv", OPT_NOPARAM, 'R'}, + {"fr", OPT_NOPARAM, 'f'}, {NULL, 0, 0} }; -struct mtx krping_mutex; +#define htonll(x) cpu_to_be64((x)) +#define ntohll(x) cpu_to_be64((x)) + +static struct mutex krping_mutex; /* * List of running krping threads. */ -struct krping_cb_list krping_cbs; +static LIST_HEAD(krping_cbs); /* * krping "ping/pong" loop: @@ -109,24 +128,118 @@ struct krping_cb_list krping_cbs; */ /* + * These states are used to signal events between the completion handler + * and the main client or server thread. + * + * Once CONNECTED, they cycle through RDMA_READ_ADV, RDMA_WRITE_ADV, + * and RDMA_WRITE_COMPLETE for each ping. + */ +enum test_state { + IDLE = 1, + CONNECT_REQUEST, + ADDR_RESOLVED, + ROUTE_RESOLVED, + CONNECTED, + RDMA_READ_ADV, + RDMA_READ_COMPLETE, + RDMA_WRITE_ADV, + RDMA_WRITE_COMPLETE, + ERROR +}; + +struct krping_rdma_info { + uint64_t buf; + uint32_t rkey; + uint32_t size; +}; + +/* * Default max buffer size for IO... */ #define RPING_BUFSIZE 128*1024 -#define RPING_SQ_DEPTH 32 +#define RPING_SQ_DEPTH 64 -static void krping_wait(struct krping_cb *cb, int state) -{ - int rc; - mtx_lock(&cb->lock); - while (cb->state < state) { - rc = msleep(cb, &cb->lock, PCATCH, "krping", 0); - if (rc && rc != ERESTART) { - cb->state = ERROR; - break; - } - } - mtx_unlock(&cb->lock); -} +/* + * Control block struct. + */ +struct krping_cb { + void *cookie; + int server; /* 0 iff client */ + struct ib_cq *cq; + struct ib_pd *pd; + struct ib_qp *qp; + + enum mem_type mem; + struct ib_mr *dma_mr; + + struct ib_fast_reg_page_list *page_list; + int page_list_len; + struct ib_send_wr fastreg_wr; + struct ib_send_wr invalidate_wr; + struct ib_mr *fastreg_mr; + int server_invalidate; + int read_inv; + u8 key; + + struct ib_mw *mw; + struct ib_mw_bind bind_attr; + + struct ib_recv_wr rq_wr; /* recv work request record */ + struct ib_sge recv_sgl; /* recv single SGE */ + struct krping_rdma_info recv_buf;/* malloc'd buffer */ + u64 recv_dma_addr; + DECLARE_PCI_UNMAP_ADDR(recv_mapping) + struct ib_mr *recv_mr; + + struct ib_send_wr sq_wr; /* send work requrest record */ + struct ib_sge send_sgl; + struct krping_rdma_info send_buf;/* single send buf */ + u64 send_dma_addr; + DECLARE_PCI_UNMAP_ADDR(send_mapping) + struct ib_mr *send_mr; + + struct ib_send_wr rdma_sq_wr; /* rdma work request record */ + struct ib_sge rdma_sgl; /* rdma single SGE */ + char *rdma_buf; /* used as rdma sink */ + u64 rdma_dma_addr; + DECLARE_PCI_UNMAP_ADDR(rdma_mapping) + struct ib_mr *rdma_mr; + + uint32_t remote_rkey; /* remote guys RKEY */ + uint64_t remote_addr; /* remote guys TO */ + uint32_t remote_len; /* remote guys LEN */ + + char *start_buf; /* rdma read src */ + u64 start_dma_addr; + DECLARE_PCI_UNMAP_ADDR(start_mapping) + struct ib_mr *start_mr; + + enum test_state state; /* used for cond/signalling */ + wait_queue_head_t sem; + struct krping_stats stats; + + uint16_t port; /* dst port in NBO */ + struct in_addr addr; /* dst addr in NBO */ + char *addr_str; /* dst addr string */ + int verbose; /* verbose logging */ + int count; /* ping count */ + int size; /* ping data size */ + int validate; /* validate ping data */ + int wlat; /* run wlat test */ + int rlat; /* run rlat test */ + int bw; /* run bw test */ + int duplex; /* run bw full duplex test */ + int poll; /* poll or block for rlat test */ + int txdepth; /* SQ depth */ + int local_dma_lkey; /* use 0 for lkey */ + int frtest; /* fastreg test */ + + /* CM stuff */ + struct rdma_cm_id *cm_id; /* connection on client side,*/ + /* listener on server side. */ + struct rdma_cm_id *child_cm_id; /* connection on server side */ + struct list_head list; +}; static int krping_cma_event_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) @@ -134,39 +247,37 @@ static int krping_cma_event_handler(struct rdma_cm_id *cma_id, int ret; struct krping_cb *cb = cma_id->context; - DEBUG_LOG(PFX "cma_event type %d cma_id %p (%s)\n", event->event, cma_id, - (cma_id == cb->cm_id) ? "parent" : "child"); + DEBUG_LOG(cb, "cma_event type %d cma_id %p (%s)\n", event->event, + cma_id, (cma_id == cb->cm_id) ? "parent" : "child"); - mtx_lock(&cb->lock); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: cb->state = ADDR_RESOLVED; ret = rdma_resolve_route(cma_id, 2000); if (ret) { - log(LOG_ERR, "rdma_resolve_route error %d\n", - ret); - wakeup(cb); + PRINTF(cb, "rdma_resolve_route error %d\n", ret); + wake_up_interruptible(&cb->sem); } break; case RDMA_CM_EVENT_ROUTE_RESOLVED: cb->state = ROUTE_RESOLVED; - wakeup(cb); + wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_CONNECT_REQUEST: cb->state = CONNECT_REQUEST; cb->child_cm_id = cma_id; - DEBUG_LOG(PFX "child cma %p\n", cb->child_cm_id); - wakeup(cb); + DEBUG_LOG(cb, "child cma %p\n", cb->child_cm_id); + wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_ESTABLISHED: - DEBUG_LOG(PFX "ESTABLISHED\n"); + DEBUG_LOG(cb, "ESTABLISHED\n"); if (!cb->server) { cb->state = CONNECTED; - wakeup(cb); } + wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_ADDR_ERROR: @@ -174,40 +285,34 @@ static int krping_cma_event_handler(struct rdma_cm_id *cma_id, case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_REJECTED: - log(LOG_ERR, "cma event %d, error %d\n", event->event, + PRINTF(cb, "cma event %d, error %d\n", event->event, event->status); cb->state = ERROR; - wakeup(cb); + wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_DISCONNECTED: - DEBUG_LOG(PFX "DISCONNECT EVENT...\n"); + PRINTF(cb, "DISCONNECT EVENT...\n"); cb->state = ERROR; - wakeup(cb); + wake_up_interruptible(&cb->sem); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: - DEBUG_LOG(PFX "cma detected device removal!!!!\n"); - cb->state = ERROR; - wakeup(cb); - mtx_unlock(&cb->lock); - krping_wait(cb, CLEANUP); - tsleep(cb, 0, "krping", 5000); - return 0; + PRINTF(cb, "cma detected device removal!!!!\n"); + break; default: - log(LOG_ERR, "oof bad type!\n"); - wakeup(cb); + PRINTF(cb, "oof bad type!\n"); + wake_up_interruptible(&cb->sem); break; } - mtx_unlock(&cb->lock); return 0; } static int server_recv(struct krping_cb *cb, struct ib_wc *wc) { if (wc->byte_len != sizeof(cb->recv_buf)) { - log(LOG_ERR, "Received bogus data, size %d\n", + PRINTF(cb, "Received bogus data, size %d\n", wc->byte_len); return -1; } @@ -215,7 +320,7 @@ static int server_recv(struct krping_cb *cb, struct ib_wc *wc) cb->remote_rkey = ntohl(cb->recv_buf.rkey); cb->remote_addr = ntohll(cb->recv_buf.buf); cb->remote_len = ntohl(cb->recv_buf.size); - DEBUG_LOG(PFX "Received rkey %x addr %llx len %d from peer\n", + DEBUG_LOG(cb, "Received rkey %x addr %llx len %d from peer\n", cb->remote_rkey, (unsigned long long)cb->remote_addr, cb->remote_len); @@ -230,7 +335,7 @@ static int server_recv(struct krping_cb *cb, struct ib_wc *wc) static int client_recv(struct krping_cb *cb, struct ib_wc *wc) { if (wc->byte_len != sizeof(cb->recv_buf)) { - log(LOG_ERR, "Received bogus data, size %d\n", + PRINTF(cb, "Received bogus data, size %d\n", wc->byte_len); return -1; } @@ -250,11 +355,13 @@ static void krping_cq_event_handler(struct ib_cq *cq, void *ctx) struct ib_recv_wr *bad_wr; int ret; - mtx_lock(&cb->lock); - KASSERT(cb->cq == cq, ("bad condition")); + BUG_ON(cb->cq != cq); if (cb->state == ERROR) { - log(LOG_ERR, "cq completion in ERROR state\n"); - mtx_unlock(&cb->lock); + PRINTF(cb, "cq completion in ERROR state\n"); + return; + } + if (cb->frtest) { + PRINTF(cb, "cq completion event in frtest!\n"); return; } if (!cb->wlat && !cb->rlat && !cb->bw) @@ -262,76 +369,77 @@ static void krping_cq_event_handler(struct ib_cq *cq, void *ctx) while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) { if (wc.status) { if (wc.status == IB_WC_WR_FLUSH_ERR) { - DEBUG_LOG("cq flushed\n"); + DEBUG_LOG(cb, "cq flushed\n"); continue; } else { - log(LOG_CRIT, "cq completion failed status %d\n", - wc.status); + PRINTF(cb, "cq completion failed with " + "wr_id %Lx status %d opcode %d vender_err %x\n", + wc.wr_id, wc.status, wc.opcode, wc.vendor_err); goto error; } } switch (wc.opcode) { case IB_WC_SEND: - DEBUG_LOG(PFX "send completion\n"); + DEBUG_LOG(cb, "send completion\n"); cb->stats.send_bytes += cb->send_sgl.length; cb->stats.send_msgs++; break; case IB_WC_RDMA_WRITE: - DEBUG_LOG(PFX "rdma write completion\n"); + DEBUG_LOG(cb, "rdma write completion\n"); cb->stats.write_bytes += cb->rdma_sq_wr.sg_list->length; cb->stats.write_msgs++; cb->state = RDMA_WRITE_COMPLETE; - wakeup(cb); + wake_up_interruptible(&cb->sem); break; case IB_WC_RDMA_READ: - DEBUG_LOG(PFX "rdma read completion\n"); + DEBUG_LOG(cb, "rdma read completion\n"); cb->stats.read_bytes += cb->rdma_sq_wr.sg_list->length; cb->stats.read_msgs++; cb->state = RDMA_READ_COMPLETE; - wakeup(cb); + wake_up_interruptible(&cb->sem); break; case IB_WC_RECV: - DEBUG_LOG(PFX "recv completion\n"); + DEBUG_LOG(cb, "recv completion\n"); cb->stats.recv_bytes += sizeof(cb->recv_buf); cb->stats.recv_msgs++; if (cb->wlat || cb->rlat || cb->bw) ret = server_recv(cb, &wc); else ret = cb->server ? server_recv(cb, &wc) : - client_recv(cb, &wc); + client_recv(cb, &wc); if (ret) { - log(LOG_ERR, "recv wc error: %d\n", ret); + PRINTF(cb, "recv wc error: %d\n", ret); goto error; } ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); if (ret) { - log(LOG_ERR, "post recv error: %d\n", + PRINTF(cb, "post recv error: %d\n", ret); goto error; } - wakeup(cb); + wake_up_interruptible(&cb->sem); break; default: - log(LOG_ERR, "unknown!!!!! completion\n"); + PRINTF(cb, + "%s:%d Unexpected opcode %d, Shutting down\n", + __func__, __LINE__, wc.opcode); goto error; } } if (ret) { - log(LOG_ERR, "poll error %d\n", ret); + PRINTF(cb, "poll error %d\n", ret); goto error; } - mtx_unlock(&cb->lock); return; error: cb->state = ERROR; - wakeup(cb); - mtx_unlock(&cb->lock); + wake_up_interruptible(&cb->sem); } static int krping_accept(struct krping_cb *cb) @@ -339,7 +447,7 @@ static int krping_accept(struct krping_cb *cb) struct rdma_conn_param conn_param; int ret; - DEBUG_LOG(PFX "accepting client connection request\n"); + DEBUG_LOG(cb, "accepting client connection request\n"); memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; @@ -347,14 +455,15 @@ static int krping_accept(struct krping_cb *cb) ret = rdma_accept(cb->child_cm_id, &conn_param); if (ret) { - log(LOG_ERR, "rdma_accept error: %d\n", ret); + PRINTF(cb, "rdma_accept error: %d\n", ret); return ret; } if (!cb->wlat && !cb->rlat && !cb->bw) { - krping_wait(cb, CONNECTED); + wait_event_interruptible(cb->sem, cb->state >= CONNECTED); if (cb->state == ERROR) { - log(LOG_ERR, "wait for CONNECTED state %d\n", cb->state); + PRINTF(cb, "wait for CONNECTED state %d\n", + cb->state); return -1; } } @@ -363,19 +472,22 @@ static int krping_accept(struct krping_cb *cb) static void krping_setup_wr(struct krping_cb *cb) { - /* XXX X86 only here... not mapping for dma! */ - cb->recv_sgl.addr = vtophys(&cb->recv_buf); + cb->recv_sgl.addr = cb->recv_dma_addr; cb->recv_sgl.length = sizeof cb->recv_buf; - if (cb->use_dmamr) + if (cb->local_dma_lkey) + cb->recv_sgl.lkey = cb->qp->device->local_dma_lkey; + else if (cb->mem == DMA) cb->recv_sgl.lkey = cb->dma_mr->lkey; else cb->recv_sgl.lkey = cb->recv_mr->lkey; cb->rq_wr.sg_list = &cb->recv_sgl; cb->rq_wr.num_sge = 1; - cb->send_sgl.addr = vtophys(&cb->send_buf); + cb->send_sgl.addr = cb->send_dma_addr; cb->send_sgl.length = sizeof cb->send_buf; - if (cb->use_dmamr) + if (cb->local_dma_lkey) + cb->send_sgl.lkey = cb->qp->device->local_dma_lkey; + else if (cb->mem == DMA) cb->send_sgl.lkey = cb->dma_mr->lkey; else cb->send_sgl.lkey = cb->send_mr->lkey; @@ -385,18 +497,39 @@ static void krping_setup_wr(struct krping_cb *cb) cb->sq_wr.sg_list = &cb->send_sgl; cb->sq_wr.num_sge = 1; - cb->rdma_addr = vtophys(cb->rdma_buf); - cb->rdma_sgl.addr = cb->rdma_addr; - if (cb->use_dmamr) - cb->rdma_sgl.lkey = cb->dma_mr->lkey; - else - cb->rdma_sgl.lkey = cb->rdma_mr->lkey; - cb->rdma_sq_wr.send_flags = IB_SEND_SIGNALED; - cb->rdma_sq_wr.sg_list = &cb->rdma_sgl; - cb->rdma_sq_wr.num_sge = 1; - - if (!cb->server || cb->wlat || cb->rlat || cb->bw) { - cb->start_addr = vtophys(cb->start_buf); + if (cb->server || cb->wlat || cb->rlat || cb->bw) { + cb->rdma_sgl.addr = cb->rdma_dma_addr; + if (cb->mem == MR) + cb->rdma_sgl.lkey = cb->rdma_mr->lkey; + cb->rdma_sq_wr.send_flags = IB_SEND_SIGNALED; + cb->rdma_sq_wr.sg_list = &cb->rdma_sgl; + cb->rdma_sq_wr.num_sge = 1; + } + + switch(cb->mem) { + case FASTREG: + + /* + * A chain of 2 WRs, INVALDATE_MR + FAST_REG_MR. + * both unsignaled. The client uses them to reregister + * the rdma buffers with a new key each iteration. + */ + cb->fastreg_wr.opcode = IB_WR_FAST_REG_MR; + cb->fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; + cb->fastreg_wr.wr.fast_reg.length = cb->size; + cb->fastreg_wr.wr.fast_reg.page_list = cb->page_list; + cb->fastreg_wr.wr.fast_reg.page_list_len = cb->page_list_len; + + cb->invalidate_wr.next = &cb->fastreg_wr; + cb->invalidate_wr.opcode = IB_WR_LOCAL_INV; + break; + case MW: + cb->bind_attr.wr_id = 0xabbaabba; + cb->bind_attr.send_flags = 0; /* unsignaled */ + cb->bind_attr.length = cb->size; + break; + default: + break; } } @@ -406,134 +539,207 @@ static int krping_setup_buffers(struct krping_cb *cb) struct ib_phys_buf buf; u64 iovbase; - DEBUG_LOG(PFX "krping_setup_buffers called on cb %p\n", cb); + DEBUG_LOG(cb, "krping_setup_buffers called on cb %p\n", cb); - if (cb->use_dmamr) { + cb->recv_dma_addr = dma_map_single(cb->pd->device->dma_device, + &cb->recv_buf, + sizeof(cb->recv_buf), DMA_BIDIRECTIONAL); + pci_unmap_addr_set(cb, recv_mapping, cb->recv_dma_addr); + cb->send_dma_addr = dma_map_single(cb->pd->device->dma_device, + &cb->send_buf, sizeof(cb->send_buf), + DMA_BIDIRECTIONAL); + pci_unmap_addr_set(cb, send_mapping, cb->send_dma_addr); + + if (cb->mem == DMA) { cb->dma_mr = ib_get_dma_mr(cb->pd, IB_ACCESS_LOCAL_WRITE| IB_ACCESS_REMOTE_READ| IB_ACCESS_REMOTE_WRITE); if (IS_ERR(cb->dma_mr)) { - log(LOG_ERR, "reg_dmamr failed\n"); - return PTR_ERR(cb->dma_mr); + DEBUG_LOG(cb, "reg_dmamr failed\n"); + ret = PTR_ERR(cb->dma_mr); + goto bail; } } else { + if (!cb->local_dma_lkey) { + buf.addr = cb->recv_dma_addr; + buf.size = sizeof cb->recv_buf; + DEBUG_LOG(cb, "recv buf dma_addr %llx size %d\n", buf.addr, + (int)buf.size); + iovbase = cb->recv_dma_addr; + cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1, + IB_ACCESS_LOCAL_WRITE, + &iovbase); + + if (IS_ERR(cb->recv_mr)) { + DEBUG_LOG(cb, "recv_buf reg_mr failed\n"); + ret = PTR_ERR(cb->recv_mr); + goto bail; + } - buf.addr = vtophys(&cb->recv_buf); - buf.size = sizeof cb->recv_buf; - iovbase = vtophys(&cb->recv_buf); - cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1, - IB_ACCESS_LOCAL_WRITE, - &iovbase); - - if (IS_ERR(cb->recv_mr)) { - log(LOG_ERR, "recv_buf reg_mr failed\n"); - return PTR_ERR(cb->recv_mr); - } - - buf.addr = vtophys(&cb->send_buf); - buf.size = sizeof cb->send_buf; - iovbase = vtophys(&cb->send_buf); - cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1, - 0, &iovbase); - - if (IS_ERR(cb->send_mr)) { - log(LOG_ERR, "send_buf reg_mr failed\n"); - ib_dereg_mr(cb->recv_mr); - return PTR_ERR(cb->send_mr); + buf.addr = cb->send_dma_addr; + buf.size = sizeof cb->send_buf; + DEBUG_LOG(cb, "send buf dma_addr %llx size %d\n", buf.addr, + (int)buf.size); + iovbase = cb->send_dma_addr; + cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1, + 0, &iovbase); + + if (IS_ERR(cb->send_mr)) { + DEBUG_LOG(cb, "send_buf reg_mr failed\n"); + ret = PTR_ERR(cb->send_mr); + goto bail; + } } } - /* RNIC adapters have a limit upto which it can register physical memory - * If DMA-MR memory mode is set then normally driver registers maximum - * supported memory. After that if contigmalloc allocates memory beyond the - * specified RNIC limit then Krping may not work. - */ - if (cb->use_dmamr && cb->memlimit) - cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, cb->memlimit, - PAGE_SIZE, 0); - else - cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL, - PAGE_SIZE, 0); - + cb->rdma_buf = kmalloc(cb->size, GFP_KERNEL); if (!cb->rdma_buf) { - log(LOG_ERR, "rdma_buf malloc failed\n"); - ret = ENOMEM; - goto err1; - } - if (!cb->use_dmamr) { - - buf.addr = vtophys(cb->rdma_buf); - buf.size = cb->size; - iovbase = vtophys(cb->rdma_buf); - cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, + DEBUG_LOG(cb, "rdma_buf malloc failed\n"); + ret = -ENOMEM; + goto bail; + } + + cb->rdma_dma_addr = dma_map_single(cb->pd->device->dma_device, + cb->rdma_buf, cb->size, + DMA_BIDIRECTIONAL); + pci_unmap_addr_set(cb, rdma_mapping, cb->rdma_dma_addr); + if (cb->mem != DMA) { + switch (cb->mem) { + case FASTREG: + cb->page_list_len = (((cb->size - 1) & PAGE_MASK) + + PAGE_SIZE) >> PAGE_SHIFT; + cb->page_list = ib_alloc_fast_reg_page_list( + cb->pd->device, + cb->page_list_len); + if (IS_ERR(cb->page_list)) { + DEBUG_LOG(cb, "recv_buf reg_mr failed\n"); + ret = PTR_ERR(cb->page_list); + goto bail; + } + cb->fastreg_mr = ib_alloc_fast_reg_mr(cb->pd, + cb->page_list->max_page_list_len); + if (IS_ERR(cb->fastreg_mr)) { + DEBUG_LOG(cb, "recv_buf reg_mr failed\n"); + ret = PTR_ERR(cb->fastreg_mr); + goto bail; + } + DEBUG_LOG(cb, "fastreg rkey 0x%x page_list %p" + " page_list_len %u\n", cb->fastreg_mr->rkey, + cb->page_list, cb->page_list_len); + break; + case MW: + cb->mw = ib_alloc_mw(cb->pd); + if (IS_ERR(cb->mw)) { + DEBUG_LOG(cb, "recv_buf alloc_mw failed\n"); + ret = PTR_ERR(cb->mw); + goto bail; + } + DEBUG_LOG(cb, "mw rkey 0x%x\n", cb->mw->rkey); + /*FALLTHROUGH*/ + case MR: + buf.addr = cb->rdma_dma_addr; + buf.size = cb->size; + iovbase = cb->rdma_dma_addr; + cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, IB_ACCESS_REMOTE_READ| IB_ACCESS_REMOTE_WRITE, &iovbase); - - if (IS_ERR(cb->rdma_mr)) { - log(LOG_ERR, "rdma_buf reg_mr failed\n"); - ret = PTR_ERR(cb->rdma_mr); - goto err2; + if (IS_ERR(cb->rdma_mr)) { + DEBUG_LOG(cb, "rdma_buf reg_mr failed\n"); + ret = PTR_ERR(cb->rdma_mr); + goto bail; + } + DEBUG_LOG(cb, "rdma buf dma_addr %llx size %d mr rkey 0x%x\n", + buf.addr, (int)buf.size, cb->rdma_mr->rkey); + break; + default: + ret = -EINVAL; + goto bail; + break; } } if (!cb->server || cb->wlat || cb->rlat || cb->bw) { - if (cb->use_dmamr && cb->memlimit) - cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, - 0, cb->memlimit, PAGE_SIZE, 0); - else - cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, - 0, -1UL, PAGE_SIZE, 0); + + cb->start_buf = kmalloc(cb->size, GFP_KERNEL); if (!cb->start_buf) { - log(LOG_ERR, "start_buf malloc failed\n"); - ret = ENOMEM; - goto err2; + DEBUG_LOG(cb, "start_buf malloc failed\n"); + ret = -ENOMEM; + goto bail; } - if (!cb->use_dmamr) { + + cb->start_dma_addr = dma_map_single(cb->pd->device->dma_device, + cb->start_buf, cb->size, + DMA_BIDIRECTIONAL); + pci_unmap_addr_set(cb, start_mapping, cb->start_dma_addr); + + if (cb->mem == MR || cb->mem == MW) { unsigned flags = IB_ACCESS_REMOTE_READ; - if (cb->wlat || cb->rlat || cb->bw) + if (cb->wlat || cb->rlat || cb->bw) flags |= IB_ACCESS_REMOTE_WRITE; - buf.addr = vtophys(cb->start_buf); + + buf.addr = cb->start_dma_addr; buf.size = cb->size; - iovbase = vtophys(cb->start_buf); + DEBUG_LOG(cb, "start buf dma_addr %llx size %d\n", + buf.addr, (int)buf.size); + iovbase = cb->start_dma_addr; cb->start_mr = ib_reg_phys_mr(cb->pd, &buf, 1, flags, &iovbase); if (IS_ERR(cb->start_mr)) { - log(LOG_ERR, "start_buf reg_mr failed\n"); + DEBUG_LOG(cb, "start_buf reg_mr failed\n"); ret = PTR_ERR(cb->start_mr); - goto err3; + goto bail; } } } krping_setup_wr(cb); - DEBUG_LOG(PFX "allocated & registered buffers...\n"); + DEBUG_LOG(cb, "allocated & registered buffers...\n"); return 0; -err3: - contigfree(cb->start_buf, cb->size, M_DEVBUF); - - if (!cb->use_dmamr) +bail: + if (cb->fastreg_mr && !IS_ERR(cb->fastreg_mr)) + ib_dereg_mr(cb->fastreg_mr); + if (cb->mw && !IS_ERR(cb->mw)) + ib_dealloc_mw(cb->mw); + if (cb->rdma_mr && !IS_ERR(cb->rdma_mr)) ib_dereg_mr(cb->rdma_mr); -err2: - contigfree(cb->rdma_buf, cb->size, M_DEVBUF); -err1: - if (cb->use_dmamr) + if (cb->page_list && !IS_ERR(cb->page_list)) + ib_free_fast_reg_page_list(cb->page_list); + if (cb->dma_mr && !IS_ERR(cb->dma_mr)) ib_dereg_mr(cb->dma_mr); - else { + if (cb->recv_mr && !IS_ERR(cb->recv_mr)) ib_dereg_mr(cb->recv_mr); + if (cb->send_mr && !IS_ERR(cb->send_mr)) ib_dereg_mr(cb->send_mr); - } + if (cb->rdma_buf) + kfree(cb->rdma_buf); + if (cb->start_buf) + kfree(cb->start_buf); return ret; } static void krping_free_buffers(struct krping_cb *cb) { - DEBUG_LOG(PFX "krping_free_buffers called on cb %p\n", cb); + DEBUG_LOG(cb, "krping_free_buffers called on cb %p\n", cb); -#if 0 + if (cb->dma_mr) + ib_dereg_mr(cb->dma_mr); + if (cb->send_mr) + ib_dereg_mr(cb->send_mr); + if (cb->recv_mr) + ib_dereg_mr(cb->recv_mr); + if (cb->rdma_mr) + ib_dereg_mr(cb->rdma_mr); + if (cb->start_mr) + ib_dereg_mr(cb->start_mr); + if (cb->fastreg_mr) + ib_dereg_mr(cb->fastreg_mr); + if (cb->mw) + ib_dealloc_mw(cb->mw); + dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, recv_mapping), sizeof(cb->recv_buf), DMA_BIDIRECTIONAL); @@ -543,24 +749,12 @@ static void krping_free_buffers(struct krping_cb *cb) dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, rdma_mapping), cb->size, DMA_BIDIRECTIONAL); -#endif - contigfree(cb->rdma_buf, cb->size, M_DEVBUF); - if (!cb->server || cb->wlat || cb->rlat || cb->bw) { -#if 0 + kfree(cb->rdma_buf); + if (cb->start_buf) { dma_unmap_single(cb->pd->device->dma_device, pci_unmap_addr(cb, start_mapping), cb->size, DMA_BIDIRECTIONAL); -#endif - contigfree(cb->start_buf, cb->size, M_DEVBUF); - } - if (cb->use_dmamr) - ib_dereg_mr(cb->dma_mr); - else { - ib_dereg_mr(cb->send_mr); - ib_dereg_mr(cb->recv_mr); - ib_dereg_mr(cb->rdma_mr); - if (!cb->server) - ib_dereg_mr(cb->start_mr); + kfree(cb->start_buf); } } @@ -577,6 +771,7 @@ static int krping_create_qp(struct krping_cb *cb) init_attr.qp_type = IB_QPT_RC; init_attr.send_cq = cb->cq; init_attr.recv_cq = cb->cq; + init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; if (cb->server) { ret = rdma_create_qp(cb->child_cm_id, cb->pd, &init_attr); @@ -603,36 +798,36 @@ static int krping_setup_qp(struct krping_cb *cb, struct rdma_cm_id *cm_id) int ret; cb->pd = ib_alloc_pd(cm_id->device); if (IS_ERR(cb->pd)) { - log(LOG_ERR, "ib_alloc_pd failed\n"); + PRINTF(cb, "ib_alloc_pd failed\n"); return PTR_ERR(cb->pd); } - DEBUG_LOG(PFX "created pd %p\n", cb->pd); + DEBUG_LOG(cb, "created pd %p\n", cb->pd); - strlcpy(cb->name, cb->pd->device->name, sizeof(cb->name)); + strlcpy(cb->stats.name, cb->pd->device->name, sizeof(cb->stats.name)); cb->cq = ib_create_cq(cm_id->device, krping_cq_event_handler, NULL, cb, cb->txdepth * 2, 0); if (IS_ERR(cb->cq)) { - log(LOG_ERR, "ib_create_cq failed\n"); + PRINTF(cb, "ib_create_cq failed\n"); ret = PTR_ERR(cb->cq); goto err1; } - DEBUG_LOG(PFX "created cq %p\n", cb->cq); + DEBUG_LOG(cb, "created cq %p\n", cb->cq); - if (!cb->wlat && !cb->rlat && !cb->bw) { + if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest) { ret = ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); if (ret) { - log(LOG_ERR, "ib_create_cq failed\n"); + PRINTF(cb, "ib_create_cq failed\n"); goto err2; } } ret = krping_create_qp(cb); if (ret) { - log(LOG_ERR, "krping_create_qp failed: %d\n", ret); + PRINTF(cb, "krping_create_qp failed: %d\n", ret); goto err2; } - DEBUG_LOG(PFX "created qp %p\n", cb->qp); + DEBUG_LOG(cb, "created qp %p\n", cb->qp); return 0; err2: ib_destroy_cq(cb->cq); @@ -641,115 +836,257 @@ err1: return ret; } -static void krping_format_send(struct krping_cb *cb, u64 buf, - struct ib_mr *mr) +/* + * return the (possibly rebound) rkey for the rdma buffer. + * FASTREG mode: invalidate and rebind via fastreg wr. + * MW mode: rebind the MW. + * other modes: just return the mr rkey. + */ +static u32 krping_rdma_rkey(struct krping_cb *cb, u64 buf, int post_inv) { - struct krping_rdma_info *info = &cb->send_buf; + u32 rkey = 0xffffffff; + u64 p; + struct ib_send_wr *bad_wr; + int i; + int ret; - info->buf = htonll(buf); - info->rkey = htonl(mr->rkey); - info->size = htonl(cb->size); + switch (cb->mem) { + case FASTREG: + cb->invalidate_wr.ex.invalidate_rkey = cb->fastreg_mr->rkey; + + /* + * Update the fastreg key. + */ + ib_update_fast_reg_key(cb->fastreg_mr, ++cb->key); + cb->fastreg_wr.wr.fast_reg.rkey = cb->fastreg_mr->rkey; + + /* + * Update the fastreg WR with new buf info. + */ + if (buf == (u64)cb->start_dma_addr) + cb->fastreg_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_READ; + else + cb->fastreg_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE; + cb->fastreg_wr.wr.fast_reg.iova_start = buf; + p = (u64)(buf & PAGE_MASK); + for (i=0; i < cb->fastreg_wr.wr.fast_reg.page_list_len; + i++, p += PAGE_SIZE) { + cb->page_list->page_list[i] = p; + DEBUG_LOG(cb, "page_list[%d] 0x%llx\n", i, p); + } - DEBUG_LOG(PFX "RDMA addr %llx rkey %x len %d\n", - (unsigned long long)buf, mr->rkey, cb->size); + DEBUG_LOG(cb, "post_inv = %d, fastreg new rkey 0x%x shift %u len %u" + " iova_start %llx page_list_len %u\n", + post_inv, + cb->fastreg_wr.wr.fast_reg.rkey, + cb->fastreg_wr.wr.fast_reg.page_shift, + cb->fastreg_wr.wr.fast_reg.length, + cb->fastreg_wr.wr.fast_reg.iova_start, + cb->fastreg_wr.wr.fast_reg.page_list_len); + + if (post_inv) + ret = ib_post_send(cb->qp, &cb->invalidate_wr, &bad_wr); + else + ret = ib_post_send(cb->qp, &cb->fastreg_wr, &bad_wr); + if (ret) { + PRINTF(cb, "post send error %d\n", ret); + cb->state = ERROR; + } + rkey = cb->fastreg_mr->rkey; + break; + case MW: + /* + * Update the MW with new buf info. + */ + if (buf == (u64)cb->start_dma_addr) { + cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_READ; + cb->bind_attr.mr = cb->start_mr; + } else { + cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_WRITE; + cb->bind_attr.mr = cb->rdma_mr; + } + cb->bind_attr.addr = buf; + DEBUG_LOG(cb, "binding mw rkey 0x%x to buf %llx mr rkey 0x%x\n", + cb->mw->rkey, buf, cb->bind_attr.mr->rkey); + ret = ib_bind_mw(cb->qp, cb->mw, &cb->bind_attr); + if (ret) { + PRINTF(cb, "bind mw error %d\n", ret); + cb->state = ERROR; + } else + rkey = cb->mw->rkey; + break; + case MR: + if (buf == (u64)cb->start_dma_addr) + rkey = cb->start_mr->rkey; + else + rkey = cb->rdma_mr->rkey; + break; + case DMA: + rkey = cb->dma_mr->rkey; + break; + default: + PRINTF(cb, "%s:%d case ERROR\n", __func__, __LINE__); + cb->state = ERROR; + break; + } + return rkey; +} + +static void krping_format_send(struct krping_cb *cb, u64 buf) +{ + struct krping_rdma_info *info = &cb->send_buf; + u32 rkey; + + /* + * Client side will do fastreg or mw bind before + * advertising the rdma buffer. Server side + * sends have no data. + */ + if (!cb->server || cb->wlat || cb->rlat || cb->bw) { + rkey = krping_rdma_rkey(cb, buf, !cb->server_invalidate); + info->buf = htonll(buf); + info->rkey = htonl(rkey); + info->size = htonl(cb->size); + DEBUG_LOG(cb, "RDMA addr %llx rkey %x len %d\n", + (unsigned long long)buf, rkey, cb->size); + } } static void krping_test_server(struct krping_cb *cb) { - struct ib_send_wr *bad_wr; + struct ib_send_wr *bad_wr, inv; int ret; while (1) { /* Wait for client's Start STAG/TO/Len */ - krping_wait(cb, RDMA_READ_ADV); + wait_event_interruptible(cb->sem, cb->state >= RDMA_READ_ADV); if (cb->state != RDMA_READ_ADV) { - DEBUG_LOG(PFX "wait for RDMA_READ_ADV state %d\n", + PRINTF(cb, "wait for RDMA_READ_ADV state %d\n", cb->state); break; } - DEBUG_LOG(PFX "server received sink adv\n"); + DEBUG_LOG(cb, "server received sink adv\n"); - /* Issue RDMA Read. */ - cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = cb->remote_len; + cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 1); + cb->rdma_sq_wr.next = NULL; + + /* Issue RDMA Read. */ + if (cb->read_inv) + cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ_WITH_INV; + else { + + cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ; + if (cb->mem == FASTREG) { + /* + * Immediately follow the read with a + * fenced LOCAL_INV. + */ + cb->rdma_sq_wr.next = &inv; + memset(&inv, 0, sizeof inv); + inv.opcode = IB_WR_LOCAL_INV; + inv.ex.invalidate_rkey = cb->fastreg_mr->rkey; + inv.send_flags = IB_SEND_FENCE; + } + } ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); if (ret) { - log(LOG_ERR, "post send error %d\n", ret); + PRINTF(cb, "post send error %d\n", ret); break; } - DEBUG_LOG(PFX "server posted rdma read req \n"); + cb->rdma_sq_wr.next = NULL; + + DEBUG_LOG(cb, "server posted rdma read req \n"); /* Wait for read completion */ - krping_wait(cb, RDMA_READ_COMPLETE); + wait_event_interruptible(cb->sem, + cb->state >= RDMA_READ_COMPLETE); if (cb->state != RDMA_READ_COMPLETE) { - log(LOG_ERR, + PRINTF(cb, "wait for RDMA_READ_COMPLETE state %d\n", cb->state); break; } - DEBUG_LOG(PFX "server received read complete\n"); + DEBUG_LOG(cb, "server received read complete\n"); /* Display data in recv buf */ if (cb->verbose) - DEBUG_LOG("server ping data: %s\n", cb->rdma_buf); + PRINTF(cb, "server ping data: %s\n", + cb->rdma_buf); /* Tell client to continue */ + if (cb->server && cb->server_invalidate) { + cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey; + cb->sq_wr.opcode = IB_WR_SEND_WITH_INV; + DEBUG_LOG(cb, "send-w-inv rkey 0x%x\n", cb->remote_rkey); + } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { - log(LOG_ERR, "post send error %d\n", ret); + PRINTF(cb, "post send error %d\n", ret); break; } - DEBUG_LOG(PFX "server posted go ahead\n"); + DEBUG_LOG(cb, "server posted go ahead\n"); /* Wait for client's RDMA STAG/TO/Len */ - krping_wait(cb, RDMA_WRITE_ADV); + wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV); if (cb->state != RDMA_WRITE_ADV) { - log(LOG_ERR, + PRINTF(cb, "wait for RDMA_WRITE_ADV state %d\n", cb->state); break; } - DEBUG_LOG(PFX "server received sink adv\n"); + DEBUG_LOG(cb, "server received sink adv\n"); /* RDMA Write echo data */ cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; cb->rdma_sq_wr.sg_list->length = strlen(cb->rdma_buf) + 1; - DEBUG_LOG(PFX "rdma write from lkey %x laddr %llx len %d\n", + if (cb->local_dma_lkey) + cb->rdma_sgl.lkey = cb->qp->device->local_dma_lkey; + else + cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 0); + + DEBUG_LOG(cb, "rdma write from lkey %x laddr %llx len %d\n", cb->rdma_sq_wr.sg_list->lkey, (unsigned long long)cb->rdma_sq_wr.sg_list->addr, cb->rdma_sq_wr.sg_list->length); ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); if (ret) { - log(LOG_ERR, "post send error %d\n", ret); + PRINTF(cb, "post send error %d\n", ret); break; } /* Wait for completion */ - krping_wait(cb, RDMA_WRITE_COMPLETE); + ret = wait_event_interruptible(cb->sem, cb->state >= + RDMA_WRITE_COMPLETE); if (cb->state != RDMA_WRITE_COMPLETE) { - log(LOG_ERR, + PRINTF(cb, "wait for RDMA_WRITE_COMPLETE state %d\n", cb->state); break; } - DEBUG_LOG(PFX "server rdma write complete \n"); + DEBUG_LOG(cb, "server rdma write complete \n"); cb->state = CONNECTED; /* Tell client to begin again */ + if (cb->server && cb->server_invalidate) { + cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey; + cb->sq_wr.opcode = IB_WR_SEND_WITH_INV; + DEBUG_LOG(cb, "send-w-inv rkey 0x%x\n", cb->remote_rkey); + } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { - log(LOG_ERR, "post send error %d\n", ret); + PRINTF(cb, "post send error %d\n", ret); break; } - DEBUG_LOG(PFX "server posted go ahead\n"); + DEBUG_LOG(cb, "server posted go ahead\n"); } } @@ -770,16 +1107,16 @@ static void rlat_test(struct krping_cb *cb) cb->rdma_sq_wr.sg_list->length = cb->size; microtime(&start_tv); - if (!cb->poll) { - cb->state = RDMA_READ_ADV; - ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); - } + if (!cb->poll) { + cb->state = RDMA_READ_ADV; + ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); + } while (scnt < iters) { - cb->state = RDMA_READ_ADV; + cb->state = RDMA_READ_ADV; ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr); if (ret) { - log(LOG_ERR, + PRINTF(cb, "Couldn't post send: ret=%d scnt %d\n", ret, scnt); return; @@ -787,30 +1124,33 @@ static void rlat_test(struct krping_cb *cb) do { if (!cb->poll) { - krping_wait(cb, RDMA_READ_COMPLETE); + wait_event_interruptible(cb->sem, + cb->state != RDMA_READ_ADV); if (cb->state == RDMA_READ_COMPLETE) { ne = 1; - ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); + ib_req_notify_cq(cb->cq, + IB_CQ_NEXT_COMP); } else { ne = -1; } } else ne = ib_poll_cq(cb->cq, 1, &wc); if (cb->state == ERROR) { - log(LOG_ERR, - "state == ERROR...bailing scnt %d\n", scnt); + PRINTF(cb, + "state == ERROR...bailing scnt %d\n", + scnt); return; } } while (ne == 0); if (ne < 0) { - log(LOG_ERR, "poll CQ failed %d\n", ne); + PRINTF(cb, "poll CQ failed %d\n", ne); return; } - if (cb->poll && wc.status != IB_WC_SUCCESS) { - log(LOG_ERR, "Completion wth error at %s:\n", + if (cb->poll && wc.status != IB_WC_SUCCESS) { + PRINTF(cb, "Completion wth error at %s:\n", cb->server ? "server" : "client"); - log(LOG_ERR, "Failed status %d: wr_id %d\n", + PRINTF(cb, "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); return; } @@ -823,75 +1163,18 @@ static void rlat_test(struct krping_cb *cb) stop_tv.tv_sec -= 1; } - log(LOG_ERR, "delta sec %zu delta usec %lu iter %d size %d\n", + PRINTF(cb, "delta sec %lu delta usec %lu iter %d size %d\n", stop_tv.tv_sec - start_tv.tv_sec, stop_tv.tv_usec - start_tv.tv_usec, scnt, cb->size); } -static int alloc_cycle_mem(int cycle_iters, - cycles_t **post_cycles_start, - cycles_t **post_cycles_stop, - cycles_t **poll_cycles_start, - cycles_t **poll_cycles_stop, - cycles_t **last_poll_cycles_start) -{ - *post_cycles_start = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK); - if (!*post_cycles_start) { - goto fail1; - } - *post_cycles_stop = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK); - if (!*post_cycles_stop) { - goto fail2; - } - *poll_cycles_start = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK); - if (!*poll_cycles_start) { - goto fail3; - } - *poll_cycles_stop = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK); - if (!*poll_cycles_stop) { - goto fail4; - } - *last_poll_cycles_start = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK); - if (!*last_poll_cycles_start) { - goto fail5; - } - return 0; -fail5: - free(*poll_cycles_stop, M_DEVBUF); -fail4: - free(*poll_cycles_start, M_DEVBUF); -fail3: - free(*post_cycles_stop, M_DEVBUF); -fail2: - free(*post_cycles_start, M_DEVBUF); -fail1: - log(LOG_ERR, "%s malloc failed\n", __FUNCTION__); - return ENOMEM; -} - -static void free_cycle_mem(cycles_t *post_cycles_start, - cycles_t *post_cycles_stop, - cycles_t *poll_cycles_start, - cycles_t *poll_cycles_stop, - cycles_t *last_poll_cycles_start) -{ - free(last_poll_cycles_start, M_DEVBUF); - free(poll_cycles_stop, M_DEVBUF); - free(poll_cycles_start, M_DEVBUF); - free(post_cycles_stop, M_DEVBUF); - free(post_cycles_start, M_DEVBUF); -} - static void wlat_test(struct krping_cb *cb) { int ccnt, scnt, rcnt; int iters=cb->count; volatile char *poll_buf = (char *) cb->start_buf; char *buf = (char *)cb->rdma_buf; - ccnt = 0; - scnt = 0; - rcnt = 0; struct timeval start_tv, stop_tv; cycles_t *post_cycles_start, *post_cycles_stop; cycles_t *poll_cycles_start, *poll_cycles_stop; @@ -899,16 +1182,37 @@ static void wlat_test(struct krping_cb *cb) cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; int i; int cycle_iters = 1000; - int err; - err = alloc_cycle_mem(cycle_iters, &post_cycles_start, &post_cycles_stop, - &poll_cycles_start, &poll_cycles_stop, &last_poll_cycles_start); - - if (err) { - log(LOG_ERR, "%s malloc failed\n", __FUNCTION__); + ccnt = 0; + scnt = 0; + rcnt = 0; + + post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); + if (!post_cycles_start) { + PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); + return; + } + post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); + if (!post_cycles_stop) { + PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); + return; + } + poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); + if (!poll_cycles_start) { + PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); + return; + } + poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); + if (!poll_cycles_stop) { + PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); + return; + } + last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), + GFP_KERNEL); + if (!last_poll_cycles_start) { + PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } - cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; @@ -924,7 +1228,8 @@ static void wlat_test(struct krping_cb *cb) ++rcnt; while (*poll_buf != (char)rcnt) { if (cb->state == ERROR) { - log(LOG_ERR, "state = ERROR, bailing\n"); + PRINTF(cb, + "state = ERROR, bailing\n"); return; } } @@ -937,7 +1242,8 @@ static void wlat_test(struct krping_cb *cb) if (scnt < cycle_iters) post_cycles_start[scnt] = get_cycles(); if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { - log(LOG_ERR, "Couldn't post send: scnt=%d\n", + PRINTF(cb, + "Couldn't post send: scnt=%d\n", scnt); return; } @@ -954,7 +1260,8 @@ static void wlat_test(struct krping_cb *cb) poll_cycles_start[ccnt] = get_cycles(); do { if (ccnt < cycle_iters) - last_poll_cycles_start[ccnt] = get_cycles(); + last_poll_cycles_start[ccnt] = + get_cycles(); ne = ib_poll_cq(cb->cq, 1, &wc); } while (ne == 0); if (ccnt < cycle_iters) @@ -962,15 +1269,18 @@ static void wlat_test(struct krping_cb *cb) ++ccnt; if (ne < 0) { - log(LOG_ERR, "poll CQ failed %d\n", ne); + PRINTF(cb, "poll CQ failed %d\n", ne); return; } if (wc.status != IB_WC_SUCCESS) { - log(LOG_ERR, "Completion wth error at %s:\n", + PRINTF(cb, + "Completion wth error at %s:\n", cb->server ? "server" : "client"); - log(LOG_ERR, "Failed status %d: wr_id %d\n", + PRINTF(cb, + "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); - log(LOG_ERR, "scnt=%d, rcnt=%d, ccnt=%d\n", + PRINTF(cb, + "scnt=%d, rcnt=%d, ccnt=%d\n", scnt, rcnt, ccnt); return; } @@ -986,27 +1296,27 @@ static void wlat_test(struct krping_cb *cb) for (i=0; i < cycle_iters; i++) { sum_post += post_cycles_stop[i] - post_cycles_start[i]; sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; - sum_last_poll += poll_cycles_stop[i] - last_poll_cycles_start[i]; + sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i]; } - - log(LOG_ERR, "delta sec %zu delta usec %lu iter %d size %d cycle_iters %d sum_post %llu sum_poll %llu sum_last_poll %llu\n", + PRINTF(cb, + "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d" + " sum_post %llu sum_poll %llu sum_last_poll %llu\n", stop_tv.tv_sec - start_tv.tv_sec, stop_tv.tv_usec - start_tv.tv_usec, scnt, cb->size, cycle_iters, (unsigned long long)sum_post, (unsigned long long)sum_poll, (unsigned long long)sum_last_poll); - - free_cycle_mem(post_cycles_start, post_cycles_stop, poll_cycles_start, - poll_cycles_stop, last_poll_cycles_start); + kfree(post_cycles_start); + kfree(post_cycles_stop); + kfree(poll_cycles_start); + kfree(poll_cycles_stop); + kfree(last_poll_cycles_start); } static void bw_test(struct krping_cb *cb) { int ccnt, scnt, rcnt; int iters=cb->count; - ccnt = 0; - scnt = 0; - rcnt = 0; struct timeval start_tv, stop_tv; cycles_t *post_cycles_start, *post_cycles_stop; cycles_t *poll_cycles_start, *poll_cycles_stop; @@ -1014,16 +1324,37 @@ static void bw_test(struct krping_cb *cb) cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; int i; int cycle_iters = 1000; - int err; - err = alloc_cycle_mem(cycle_iters, &post_cycles_start, &post_cycles_stop, - &poll_cycles_start, &poll_cycles_stop, &last_poll_cycles_start); - - if (err) { - log(LOG_ERR, "%s kmalloc failed\n", __FUNCTION__); + ccnt = 0; + scnt = 0; + rcnt = 0; + + post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); + if (!post_cycles_start) { + PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); + return; + } + post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); + if (!post_cycles_stop) { + PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); + return; + } + poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); + if (!poll_cycles_start) { + PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); + return; + } + poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); + if (!poll_cycles_stop) { + PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); + return; + } + last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), + GFP_KERNEL); + if (!last_poll_cycles_start) { + PRINTF(cb, "%s kmalloc failed\n", __FUNCTION__); return; } - cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE; cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey; cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr; @@ -1040,7 +1371,8 @@ static void bw_test(struct krping_cb *cb) if (scnt < cycle_iters) post_cycles_start[scnt] = get_cycles(); if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { - log(LOG_ERR, "Couldn't post send: scnt=%d\n", + PRINTF(cb, + "Couldn't post send: scnt=%d\n", scnt); return; } @@ -1057,7 +1389,8 @@ static void bw_test(struct krping_cb *cb) poll_cycles_start[ccnt] = get_cycles(); do { if (ccnt < cycle_iters) - last_poll_cycles_start[ccnt] = get_cycles(); + last_poll_cycles_start[ccnt] = + get_cycles(); ne = ib_poll_cq(cb->cq, 1, &wc); } while (ne == 0); if (ccnt < cycle_iters) @@ -1065,13 +1398,15 @@ static void bw_test(struct krping_cb *cb) ccnt += 1; if (ne < 0) { - log(LOG_ERR, "poll CQ failed %d\n", ne); + PRINTF(cb, "poll CQ failed %d\n", ne); return; } if (wc.status != IB_WC_SUCCESS) { - log(LOG_ERR, "Completion wth error at %s:\n", + PRINTF(cb, + "Completion wth error at %s:\n", cb->server ? "server" : "client"); - log(LOG_ERR, "Failed status %d: wr_id %d\n", + PRINTF(cb, + "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); return; } @@ -1087,18 +1422,21 @@ static void bw_test(struct krping_cb *cb) for (i=0; i < cycle_iters; i++) { sum_post += post_cycles_stop[i] - post_cycles_start[i]; sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; - sum_last_poll += poll_cycles_stop[i] - last_poll_cycles_start[i]; + sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i]; } - - log(LOG_ERR, "delta sec %zu delta usec %lu iter %d size %d cycle_iters %d sum_post %llu sum_poll %llu sum_last_poll %llu\n", + PRINTF(cb, + "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d" + " sum_post %llu sum_poll %llu sum_last_poll %llu\n", stop_tv.tv_sec - start_tv.tv_sec, stop_tv.tv_usec - start_tv.tv_usec, scnt, cb->size, cycle_iters, (unsigned long long)sum_post, (unsigned long long)sum_poll, (unsigned long long)sum_last_poll); - - free_cycle_mem(post_cycles_start, post_cycles_stop, poll_cycles_start, - poll_cycles_stop, last_poll_cycles_start); + kfree(post_cycles_start); + kfree(post_cycles_stop); + kfree(poll_cycles_start); + kfree(poll_cycles_stop); + kfree(last_poll_cycles_start); } static void krping_rlat_test_server(struct krping_cb *cb) @@ -1113,28 +1451,25 @@ static void krping_rlat_test_server(struct krping_cb *cb) } /* Send STAG/TO/Len to client */ - if (cb->dma_mr) - krping_format_send(cb, cb->start_addr, cb->dma_mr); - else - krping_format_send(cb, cb->start_addr, cb->start_mr); + krping_format_send(cb, cb->start_dma_addr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { - log(LOG_ERR, "post send error %d\n", ret); + PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { - log(LOG_ERR, "poll error %d\n", ret); + PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { - log(LOG_ERR, "send completiong error %d\n", wc.status); + PRINTF(cb, "send completiong error %d\n", wc.status); return; } - krping_wait(cb, ERROR); + wait_event_interruptible(cb->sem, cb->state == ERROR); } static void krping_wlat_test_server(struct krping_cb *cb) @@ -1149,29 +1484,26 @@ static void krping_wlat_test_server(struct krping_cb *cb) } /* Send STAG/TO/Len to client */ - if (cb->dma_mr) - krping_format_send(cb, cb->start_addr, cb->dma_mr); - else - krping_format_send(cb, cb->start_addr, cb->start_mr); + krping_format_send(cb, cb->start_dma_addr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { - log(LOG_ERR, "post send error %d\n", ret); + PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { - log(LOG_ERR, "poll error %d\n", ret); + PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { - log(LOG_ERR, "send completiong error %d\n", wc.status); + PRINTF(cb, "send completiong error %d\n", wc.status); return; } wlat_test(cb); - krping_wait(cb, ERROR); + wait_event_interruptible(cb->sem, cb->state == ERROR); } static void krping_bw_test_server(struct krping_cb *cb) @@ -1186,30 +1518,48 @@ static void krping_bw_test_server(struct krping_cb *cb) } /* Send STAG/TO/Len to client */ - if (cb->dma_mr) - krping_format_send(cb, cb->start_addr, cb->dma_mr); - else - krping_format_send(cb, cb->start_addr, cb->start_mr); + krping_format_send(cb, cb->start_dma_addr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { - log(LOG_ERR, "post send error %d\n", ret); + PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { - log(LOG_ERR, "poll error %d\n", ret); + PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { - log(LOG_ERR, "send completiong error %d\n", wc.status); + PRINTF(cb, "send completiong error %d\n", wc.status); return; } if (cb->duplex) bw_test(cb); - krping_wait(cb, ERROR); + wait_event_interruptible(cb->sem, cb->state == ERROR); +} + +static int fastreg_supported(struct krping_cb *cb) +{ + struct ib_device *dev = cb->child_cm_id->device; + struct ib_device_attr attr; + int ret; + + ret = ib_query_device(dev, &attr); + if (ret) { + PRINTF(cb, "ib_query_device failed ret %d\n", ret); + return 0; + } + if (!(attr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { + PRINTF(cb, "Fastreg not supported - device_cap_flags 0x%x\n", + attr.device_cap_flags); + return 0; + } + DEBUG_LOG(cb, "Fastreg supported - device_cap_flags 0x%x\n", + attr.device_cap_flags); + return 1; } static int krping_bind_server(struct krping_cb *cb) @@ -1225,25 +1575,28 @@ static int krping_bind_server(struct krping_cb *cb) ret = rdma_bind_addr(cb->cm_id, (struct sockaddr *) &sin); if (ret) { - log(LOG_ERR, "rdma_bind_addr error %d\n", ret); + PRINTF(cb, "rdma_bind_addr error %d\n", ret); return ret; } - DEBUG_LOG(PFX "rdma_bind_addr successful\n"); + DEBUG_LOG(cb, "rdma_bind_addr successful\n"); - DEBUG_LOG(PFX "rdma_listen\n"); + DEBUG_LOG(cb, "rdma_listen\n"); ret = rdma_listen(cb->cm_id, 3); if (ret) { - log(LOG_ERR, "rdma_listen failed: %d\n", ret); + PRINTF(cb, "rdma_listen failed: %d\n", ret); return ret; } - krping_wait(cb, CONNECT_REQUEST); + wait_event_interruptible(cb->sem, cb->state >= CONNECT_REQUEST); if (cb->state != CONNECT_REQUEST) { - log(LOG_ERR, "wait for CONNECT_REQUEST state %d\n", + PRINTF(cb, "wait for CONNECT_REQUEST state %d\n", cb->state); return -1; } + if (cb->mem == FASTREG && !fastreg_supported(cb)) + return -EINVAL; + return 0; } @@ -1258,25 +1611,25 @@ static void krping_run_server(struct krping_cb *cb) ret = krping_setup_qp(cb, cb->child_cm_id); if (ret) { - log(LOG_ERR, "setup_qp failed: %d\n", ret); - return; + PRINTF(cb, "setup_qp failed: %d\n", ret); + goto err0; } ret = krping_setup_buffers(cb); if (ret) { - log(LOG_ERR, "krping_setup_buffers failed: %d\n", ret); + PRINTF(cb, "krping_setup_buffers failed: %d\n", ret); goto err1; } ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); if (ret) { - log(LOG_ERR, "ib_post_recv failed: %d\n", ret); + PRINTF(cb, "ib_post_recv failed: %d\n", ret); goto err2; } ret = krping_accept(cb); if (ret) { - log(LOG_ERR, "connect error %d\n", ret); + PRINTF(cb, "connect error %d\n", ret); goto err2; } @@ -1288,13 +1641,13 @@ static void krping_run_server(struct krping_cb *cb) krping_bw_test_server(cb); else krping_test_server(cb); - rdma_disconnect(cb->child_cm_id); - rdma_destroy_id(cb->child_cm_id); err2: krping_free_buffers(cb); err1: krping_free_qp(cb); +err0: + rdma_destroy_id(cb->child_cm_id); } static void krping_test_client(struct krping_cb *cb) @@ -1320,41 +1673,38 @@ static void krping_test_client(struct krping_cb *cb) start = 65; cb->start_buf[cb->size - 1] = 0; - if (cb->dma_mr) - krping_format_send(cb, cb->start_addr, cb->dma_mr); - else - krping_format_send(cb, cb->start_addr, cb->start_mr); - + krping_format_send(cb, cb->start_dma_addr); + if (cb->state == ERROR) { + PRINTF(cb, "krping_format_send failed\n"); + break; + } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { - log(LOG_ERR, "post send error %d\n", ret); + PRINTF(cb, "post send error %d\n", ret); break; } /* Wait for server to ACK */ - krping_wait(cb, RDMA_WRITE_ADV); + wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV); if (cb->state != RDMA_WRITE_ADV) { - log(LOG_ERR, + PRINTF(cb, "wait for RDMA_WRITE_ADV state %d\n", cb->state); break; } - if (cb->dma_mr) - krping_format_send(cb, cb->rdma_addr, cb->dma_mr); - else - krping_format_send(cb, cb->rdma_addr, cb->rdma_mr); - + krping_format_send(cb, cb->rdma_dma_addr); ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { - log(LOG_ERR, "post send error %d\n", ret); + PRINTF(cb, "post send error %d\n", ret); break; } /* Wait for the server to say the RDMA Write is complete. */ - krping_wait(cb, RDMA_WRITE_COMPLETE); + wait_event_interruptible(cb->sem, + cb->state >= RDMA_WRITE_COMPLETE); if (cb->state != RDMA_WRITE_COMPLETE) { - log(LOG_ERR, + PRINTF(cb, "wait for RDMA_WRITE_COMPLETE state %d\n", cb->state); break; @@ -1362,12 +1712,15 @@ static void krping_test_client(struct krping_cb *cb) if (cb->validate) if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) { - log(LOG_ERR, "data mismatch!\n"); + PRINTF(cb, "data mismatch!\n"); break; } if (cb->verbose) - DEBUG_LOG("ping data: %s\n", cb->rdma_buf); + PRINTF(cb, "ping data: %s\n", cb->rdma_buf); +#ifdef SLOW_KRPING + wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ); +#endif } } @@ -1380,24 +1733,25 @@ static void krping_rlat_test_client(struct krping_cb *cb) cb->state = RDMA_READ_ADV; /* Send STAG/TO/Len to client */ - if (cb->dma_mr) - krping_format_send(cb, cb->start_addr, cb->dma_mr); - else - krping_format_send(cb, cb->start_addr, cb->rdma_mr); + krping_format_send(cb, cb->start_dma_addr); + if (cb->state == ERROR) { + PRINTF(cb, "krping_format_send failed\n"); + return; + } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { - log(LOG_ERR, "post send error %d\n", ret); + PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { - log(LOG_ERR, "poll error %d\n", ret); + PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { - log(LOG_ERR, "send completion error %d\n", wc.status); + PRINTF(cb, "send completion error %d\n", wc.status); return; } @@ -1426,20 +1780,20 @@ static void krping_rlat_test_client(struct krping_cb *cb) microtime(&start); for (i=0; i < 100000; i++) { if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) { - log(LOG_ERR, "Couldn't post send\n"); + PRINTF(cb, "Couldn't post send\n"); return; } do { ne = ib_poll_cq(cb->cq, 1, &wc); } while (ne == 0); if (ne < 0) { - log(LOG_ERR, "poll CQ failed %d\n", ne); + PRINTF(cb, "poll CQ failed %d\n", ne); return; } if (wc.status != IB_WC_SUCCESS) { - log(LOG_ERR, "Completion wth error at %s:\n", + PRINTF(cb, "Completion wth error at %s:\n", cb->server ? "server" : "client"); - log(LOG_ERR, "Failed status %d: wr_id %d\n", + PRINTF(cb, "Failed status %d: wr_id %d\n", wc.status, (int) wc.wr_id); return; } @@ -1453,7 +1807,7 @@ static void krping_rlat_test_client(struct krping_cb *cb) sec = stop.tv_sec - start.tv_sec; usec = stop.tv_usec - start.tv_usec; elapsed = sec * 1000000 + usec; - log(LOG_ERR, "0B-write-lat iters 100000 usec %llu\n", elapsed); + PRINTF(cb, "0B-write-lat iters 100000 usec %llu\n", elapsed); } #endif @@ -1469,24 +1823,25 @@ static void krping_wlat_test_client(struct krping_cb *cb) cb->state = RDMA_READ_ADV; /* Send STAG/TO/Len to client */ - if (cb->dma_mr) - krping_format_send(cb, cb->start_addr, cb->dma_mr); - else - krping_format_send(cb, cb->start_addr, cb->start_mr); + krping_format_send(cb, cb->start_dma_addr); + if (cb->state == ERROR) { + PRINTF(cb, "krping_format_send failed\n"); + return; + } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { - log(LOG_ERR, "post send error %d\n", ret); + PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { - log(LOG_ERR, "poll error %d\n", ret); + PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { - log(LOG_ERR, "send completion error %d\n", wc.status); + PRINTF(cb, "send completion error %d\n", wc.status); return; } @@ -1507,24 +1862,25 @@ static void krping_bw_test_client(struct krping_cb *cb) cb->state = RDMA_READ_ADV; /* Send STAG/TO/Len to client */ - if (cb->dma_mr) - krping_format_send(cb, cb->start_addr, cb->dma_mr); - else - krping_format_send(cb, cb->start_addr, cb->start_mr); + krping_format_send(cb, cb->start_dma_addr); + if (cb->state == ERROR) { + PRINTF(cb, "krping_format_send failed\n"); + return; + } ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); if (ret) { - log(LOG_ERR, "post send error %d\n", ret); + PRINTF(cb, "post send error %d\n", ret); return; } /* Spin waiting for send completion */ while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); if (ret < 0) { - log(LOG_ERR, "poll error %d\n", ret); + PRINTF(cb, "poll error %d\n", ret); return; } if (wc.status) { - log(LOG_ERR, "send completion error %d\n", wc.status); + PRINTF(cb, "send completion error %d\n", wc.status); return; } @@ -1536,6 +1892,121 @@ static void krping_bw_test_client(struct krping_cb *cb) bw_test(cb); } +static void krping_fr_test(struct krping_cb *cb) +{ + struct ib_fast_reg_page_list *pl; + struct ib_send_wr fr, inv, *bad; + struct ib_wc wc; + u8 key = 0; + struct ib_mr *mr; + int i; + int ret; + int size = cb->size; + int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT; + time_t start; + int count = 0; + int scnt = 0; + + pl = ib_alloc_fast_reg_page_list(cb->qp->device, plen); + if (IS_ERR(pl)) { + PRINTF(cb, "ib_alloc_fast_reg_page_list failed %ld\n", PTR_ERR(pl)); + return; + } + + mr = ib_alloc_fast_reg_mr(cb->pd, plen); + if (IS_ERR(mr)) { + PRINTF(cb, "ib_alloc_fast_reg_mr failed %ld\n", PTR_ERR(pl)); + goto err1; + } + + for (i=0; ipage_list[i] = 0xcafebabe | i; + + memset(&fr, 0, sizeof fr); + fr.opcode = IB_WR_FAST_REG_MR; + fr.wr.fast_reg.page_shift = PAGE_SHIFT; + fr.wr.fast_reg.length = size; + fr.wr.fast_reg.page_list = pl; + fr.wr.fast_reg.page_list_len = plen; + fr.wr.fast_reg.iova_start = 0; + fr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE; + fr.next = &inv; + memset(&inv, 0, sizeof inv); + inv.opcode = IB_WR_LOCAL_INV; + inv.send_flags = IB_SEND_SIGNALED; + + DEBUG_LOG(cb, "fr_test: stag index 0x%x plen %u size %u depth %u\n", mr->rkey >> 8, plen, cb->size, cb->txdepth); + start = time_uptime; + while (1) { + if ((time_uptime - start) >= 9) { + DEBUG_LOG(cb, "fr_test: pausing 1 second! count %u latest size %u plen %u\n", count, size, plen); + wait_event_interruptible(cb->sem, cb->state == ERROR); + if (cb->state == ERROR) + break; + start = time_uptime; + } + while (scnt < (cb->txdepth>>1)) { + ib_update_fast_reg_key(mr, ++key); + fr.wr.fast_reg.rkey = mr->rkey; + inv.ex.invalidate_rkey = mr->rkey; + size = arc4random() % cb->size; + if (size == 0) + size = cb->size; + plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT; + fr.wr.fast_reg.length = size; + fr.wr.fast_reg.page_list_len = plen; + ret = ib_post_send(cb->qp, &fr, &bad); + if (ret) { + PRINTF(cb, "ib_post_send failed %d\n", ret); + goto err2; + } + scnt++; + } + + do { + ret = ib_poll_cq(cb->cq, 1, &wc); + if (ret < 0) { + PRINTF(cb, "ib_poll_cq failed %d\n", ret); + goto err2; + } + if (ret == 1) { + if (wc.status) { + PRINTF(cb, "completion error %u\n", wc.status); + goto err2; + } + count++; + scnt--; + } + else if (krping_sigpending()) { + PRINTF(cb, "signal!\n"); + goto err2; + } + } while (ret == 1); + } +err2: +#if 0 + DEBUG_LOG(cb, "sleeping 1 second\n"); + wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ); +#endif + DEBUG_LOG(cb, "draining the cq...\n"); + do { + ret = ib_poll_cq(cb->cq, 1, &wc); + if (ret < 0) { + PRINTF(cb, "ib_poll_cq failed %d\n", ret); + break; + } + if (ret == 1) { + if (wc.status) { + PRINTF(cb, "completion error %u opcode %u\n", wc.status, wc.opcode); + } + } + } while (ret == 1); + DEBUG_LOG(cb, "fr_test: done!\n"); + ib_dereg_mr(mr); +err1: + ib_free_fast_reg_page_list(pl); +} + static int krping_connect_client(struct krping_cb *cb) { struct rdma_conn_param conn_param; @@ -1548,17 +2019,17 @@ static int krping_connect_client(struct krping_cb *cb) ret = rdma_connect(cb->cm_id, &conn_param); if (ret) { - log(LOG_ERR, "rdma_connect error %d\n", ret); + PRINTF(cb, "rdma_connect error %d\n", ret); return ret; } - krping_wait(cb, CONNECTED); + wait_event_interruptible(cb->sem, cb->state >= CONNECTED); if (cb->state == ERROR) { - log(LOG_ERR, "wait for CONNECTED state %d\n", cb->state); + PRINTF(cb, "wait for CONNECTED state %d\n", cb->state); return -1; } - DEBUG_LOG(PFX "rdma_connect successful\n"); + DEBUG_LOG(cb, "rdma_connect successful\n"); return 0; } @@ -1576,19 +2047,22 @@ static int krping_bind_client(struct krping_cb *cb) ret = rdma_resolve_addr(cb->cm_id, NULL, (struct sockaddr *) &sin, 2000); if (ret) { - log(LOG_ERR, "rdma_resolve_addr error %d\n", ret); + PRINTF(cb, "rdma_resolve_addr error %d\n", ret); return ret; } - krping_wait(cb, ROUTE_RESOLVED); + wait_event_interruptible(cb->sem, cb->state >= ROUTE_RESOLVED); if (cb->state != ROUTE_RESOLVED) { - log(LOG_ERR, + PRINTF(cb, "addr/route resolution did not resolve: state %d\n", cb->state); - return EINTR; + return -EINTR; } - DEBUG_LOG(PFX "rdma_resolve_addr - rdma_resolve_route successful\n"); + if (cb->mem == FASTREG && !fastreg_supported(cb)) + return -EINVAL; + + DEBUG_LOG(cb, "rdma_resolve_addr - rdma_resolve_route successful\n"); return 0; } @@ -1603,25 +2077,25 @@ static void krping_run_client(struct krping_cb *cb) ret = krping_setup_qp(cb, cb->cm_id); if (ret) { - log(LOG_ERR, "setup_qp failed: %d\n", ret); + PRINTF(cb, "setup_qp failed: %d\n", ret); return; } ret = krping_setup_buffers(cb); if (ret) { - log(LOG_ERR, "krping_setup_buffers failed: %d\n", ret); + PRINTF(cb, "krping_setup_buffers failed: %d\n", ret); goto err1; } ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); if (ret) { - log(LOG_ERR, "ib_post_recv failed: %d\n", ret); + PRINTF(cb, "ib_post_recv failed: %d\n", ret); goto err2; } ret = krping_connect_client(cb); if (ret) { - log(LOG_ERR, "connect error %d\n", ret); + PRINTF(cb, "connect error %d\n", ret); goto err2; } @@ -1631,6 +2105,8 @@ static void krping_run_client(struct krping_cb *cb) krping_rlat_test_client(cb); else if (cb->bw) krping_bw_test_client(cb); + else if (cb->frtest) + krping_fr_test(cb); else krping_test_client(cb); rdma_disconnect(cb->cm_id); @@ -1640,119 +2116,137 @@ err1: krping_free_qp(cb); } -int krping_doit(char *cmd) +int krping_doit(char *cmd, void *cookie) { struct krping_cb *cb; int op; int ret = 0; char *optarg; unsigned long optint; - debug = 0; - cb = malloc(sizeof(*cb), M_DEVBUF, M_WAITOK); + cb = kzalloc(sizeof(*cb), GFP_KERNEL); if (!cb) - return ENOMEM; - bzero(cb, sizeof *cb); + return -ENOMEM; - mtx_lock(&krping_mutex); - TAILQ_INSERT_TAIL(&krping_cbs, cb, list); - mtx_unlock(&krping_mutex); + mutex_lock(&krping_mutex); + list_add_tail(&cb->list, &krping_cbs); + mutex_unlock(&krping_mutex); + cb->cookie = cookie; cb->server = -1; cb->state = IDLE; cb->size = 64; cb->txdepth = RPING_SQ_DEPTH; - cb->use_dmamr = 1; - cb->memlimit = 0; - mtx_init(&cb->lock, "krping mtx", NULL, MTX_DUPOK|MTX_DEF); + cb->mem = DMA; + init_waitqueue_head(&cb->sem); while ((op = krping_getopt("krping", &cmd, krping_opts, NULL, &optarg, &optint)) != 0) { switch (op) { case 'a': cb->addr_str = optarg; - DEBUG_LOG(PFX "ipaddr (%s)\n", optarg); + DEBUG_LOG(cb, "ipaddr (%s)\n", optarg); if (!inet_aton(optarg, &cb->addr)) { - log(LOG_ERR, "bad addr string %s\n", optarg); + PRINTF(cb, "bad addr string %s\n", + optarg); ret = EINVAL; } break; - case 'D': - cb->use_dmamr = 1; - DEBUG_LOG(PFX "using dma mr\n"); - break; case 'p': cb->port = htons(optint); - DEBUG_LOG(PFX "port %d\n", (int)optint); + DEBUG_LOG(cb, "port %d\n", (int)optint); break; case 'P': cb->poll = 1; - DEBUG_LOG("server\n"); + DEBUG_LOG(cb, "server\n"); break; case 's': cb->server = 1; - DEBUG_LOG(PFX "server\n"); + DEBUG_LOG(cb, "server\n"); break; case 'c': cb->server = 0; - DEBUG_LOG(PFX "client\n"); + DEBUG_LOG(cb, "client\n"); break; case 'S': cb->size = optint; if ((cb->size < 1) || (cb->size > RPING_BUFSIZE)) { - log(LOG_ERR, "Invalid size %d " + PRINTF(cb, "Invalid size %d " "(valid range is 1 to %d)\n", cb->size, RPING_BUFSIZE); ret = EINVAL; } else - DEBUG_LOG(PFX "size %d\n", (int)optint); + DEBUG_LOG(cb, "size %d\n", (int)optint); break; case 'C': cb->count = optint; if (cb->count < 0) { - log(LOG_ERR, "Invalid count %d\n", + PRINTF(cb, "Invalid count %d\n", cb->count); ret = EINVAL; } else - DEBUG_LOG(PFX "count %d\n", (int) cb->count); + DEBUG_LOG(cb, "count %d\n", (int) cb->count); break; case 'v': cb->verbose++; - DEBUG_LOG(PFX "verbose\n"); + DEBUG_LOG(cb, "verbose\n"); break; case 'V': cb->validate++; - DEBUG_LOG(PFX "validate data\n"); - break; - case 'L': - cb->rlat++; + DEBUG_LOG(cb, "validate data\n"); break; case 'l': cb->wlat++; break; + case 'L': + cb->rlat++; + break; case 'B': cb->bw++; break; - case 't': - cb->txdepth = optint; - DEBUG_LOG(PFX "txdepth %d\n", cb->txdepth); - break; case 'd': - debug++; + cb->duplex++; break; case 'm': - cb->memlimit = optint; - if (cb->memlimit < 1) { - log(LOG_ERR, "Invalid memory limit %ju\n", - cb->memlimit); - ret = EINVAL; - } else - DEBUG_LOG(PFX "memory limit %d\n", (int)optint); - break; + if (!strncmp(optarg, "dma", 3)) + cb->mem = DMA; + else if (!strncmp(optarg, "fastreg", 7)) + cb->mem = FASTREG; + else if (!strncmp(optarg, "mw", 2)) + cb->mem = MW; + else if (!strncmp(optarg, "mr", 2)) + cb->mem = MR; + else { + PRINTF(cb, "unknown mem mode %s. " + "Must be dma, fastreg, mw, or mr\n", + optarg); + ret = -EINVAL; + break; + } + break; + case 'I': + cb->server_invalidate = 1; + break; + case 'T': + cb->txdepth = optint; + DEBUG_LOG(cb, "txdepth %d\n", (int) cb->txdepth); + break; + case 'Z': + cb->local_dma_lkey = 1; + DEBUG_LOG(cb, "using local dma lkey\n"); + break; + case 'R': + cb->read_inv = 1; + DEBUG_LOG(cb, "using read-with-inv\n"); + break; + case 'f': + cb->frtest = 1; + DEBUG_LOG(cb, "fast-reg test!\n"); + break; default: - log(LOG_ERR, "unknown opt %s\n", optarg); - ret = EINVAL; + PRINTF(cb, "unknown opt %s\n", optarg); + ret = -EINVAL; break; } } @@ -1760,46 +2254,77 @@ int krping_doit(char *cmd) goto out; if (cb->server == -1) { - log(LOG_ERR, "must be either client or server\n"); - ret = EINVAL; + PRINTF(cb, "must be either client or server\n"); + ret = -EINVAL; goto out; } - if ((cb->bw + cb->rlat + cb->wlat) > 1) { - log(LOG_ERR, "Pick only one test: bw, rlat, wlat\n"); - ret = EINVAL; + + if (cb->server && cb->frtest) { + PRINTF(cb, "must be client to run frtest\n"); + ret = -EINVAL; goto out; } + if ((cb->frtest + cb->bw + cb->rlat + cb->wlat) > 1) { + PRINTF(cb, "Pick only one test: fr, bw, rlat, wlat\n"); + ret = -EINVAL; + goto out; + } + + if (cb->server_invalidate && cb->mem != FASTREG) { + PRINTF(cb, "server_invalidate only valid with fastreg mem_mode\n"); + ret = -EINVAL; + goto out; + } + + if (cb->read_inv && cb->mem != FASTREG) { + PRINTF(cb, "read_inv only valid with fastreg mem_mode\n"); + ret = -EINVAL; + goto out; + } + + if (cb->mem != MR && (cb->wlat || cb->rlat || cb->bw)) { + PRINTF(cb, "wlat, rlat, and bw tests only support mem_mode MR\n"); + ret = -EINVAL; + goto out; + } cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP); if (IS_ERR(cb->cm_id)) { ret = PTR_ERR(cb->cm_id); - log(LOG_ERR, "rdma_create_id error %d\n", ret); + PRINTF(cb, "rdma_create_id error %d\n", ret); goto out; } - DEBUG_LOG(PFX "created cm_id %p\n", cb->cm_id); + DEBUG_LOG(cb, "created cm_id %p\n", cb->cm_id); + if (cb->server) krping_run_server(cb); else krping_run_client(cb); - DEBUG_LOG(PFX "destroy cm_id %p\n", cb->cm_id); - - mtx_lock(&cb->lock); - cb->state = CLEANUP; - wakeup(cb); - mtx_unlock(&cb->lock); + DEBUG_LOG(cb, "destroy cm_id %p\n", cb->cm_id); rdma_destroy_id(cb->cm_id); out: - mtx_lock(&krping_mutex); - TAILQ_REMOVE(&krping_cbs, cb, list); - mtx_unlock(&krping_mutex); - free(cb, M_DEVBUF); + mutex_lock(&krping_mutex); + list_del(&cb->list); + mutex_unlock(&krping_mutex); + kfree(cb); return ret; } +void +krping_walk_cb_list(void (*f)(struct krping_stats *, void *), void *arg) +{ + struct krping_cb *cb; + + mutex_lock(&krping_mutex); + list_for_each_entry(cb, &krping_cbs, list) + (*f)(cb->pd ? &cb->stats : NULL, arg); + mutex_unlock(&krping_mutex); +} + void krping_init(void) { - mtx_init(&krping_mutex, "krping lock", NULL, MTX_DEF); - TAILQ_INIT(&krping_cbs); + + mutex_init(&krping_mutex); } diff --git a/sys/contrib/rdma/krping/krping.h b/sys/contrib/rdma/krping/krping.h index 5cced30..04be531 100644 --- a/sys/contrib/rdma/krping/krping.h +++ b/sys/contrib/rdma/krping/krping.h @@ -1,133 +1,21 @@ /* * $FreeBSD$ */ -#include -#include - -/* - * Krping header stuffs... - */ struct krping_stats { - unsigned send_bytes; - unsigned send_msgs; - unsigned recv_bytes; - unsigned recv_msgs; - unsigned write_bytes; - unsigned write_msgs; - unsigned read_bytes; - unsigned read_msgs; -}; - - -/* - * These states are used to signal events between the completion handler - * and the main client or server thread. - * - * Once CONNECTED, they cycle through RDMA_READ_ADV, RDMA_WRITE_ADV, - * and RDMA_WRITE_COMPLETE for each ping. - */ -enum test_state { - IDLE = 1, - CONNECT_REQUEST, - ADDR_RESOLVED, - ROUTE_RESOLVED, - CONNECTED, - RDMA_READ_ADV, - RDMA_READ_COMPLETE, - RDMA_WRITE_ADV, - RDMA_WRITE_COMPLETE, - ERROR, - CLEANUP -}; - -struct krping_rdma_info { - uint64_t buf; - uint32_t rkey; - uint32_t size; -}; - -/* - * Control block struct. - */ -struct krping_cb { - int server; /* 0 iff client */ - struct ib_cq *cq; - struct ib_pd *pd; - struct ib_qp *qp; - struct ib_mr *dma_mr; - int use_dmamr; - - struct ib_recv_wr rq_wr; /* recv work request record */ - struct ib_sge recv_sgl; /* recv single SGE */ - struct krping_rdma_info recv_buf;/* malloc'd buffer */ - struct ib_mr *recv_mr; - - struct ib_send_wr sq_wr; /* send work requrest record */ - struct ib_sge send_sgl; - struct krping_rdma_info send_buf;/* single send buf */ - struct ib_mr *send_mr; - - struct ib_send_wr rdma_sq_wr; /* rdma work request record */ - struct ib_sge rdma_sgl; /* rdma single SGE */ - char *rdma_buf; /* used as rdma sink */ - u64 rdma_addr; - struct ib_mr *rdma_mr; - - uint32_t remote_rkey; /* remote guys RKEY */ - uint64_t remote_addr; /* remote guys TO */ - uint32_t remote_len; /* remote guys LEN */ - - char *start_buf; /* rdma read src */ - u64 start_addr; - struct ib_mr *start_mr; - - enum test_state state; /* used for cond/signalling */ - struct mtx lock; - struct krping_stats stats; - - uint16_t port; /* dst port in NBO */ - struct in_addr addr; /* dst addr in NBO */ - char *addr_str; /* dst addr string */ - int verbose; /* verbose logging */ - int count; /* ping count */ - int size; /* ping data size */ - int validate; /* validate ping data */ - uint64_t memlimit; /* limit of the physical memory that - can be registered with dma_mr mode */ - - /* CM stuff */ - struct rdma_cm_id *cm_id; /* connection on client side,*/ - /* listener on service side. */ - struct rdma_cm_id *child_cm_id; /* connection on server side */ - TAILQ_ENTRY(krping_cb) list; - - int rlat; /* run read latency test */ - int wlat; /* run write latency test */ - int bw; /* run write bw test */ - int duplex; /* run write bw full duplex test */ - int poll; /* poll vs block in rlat */ - int txdepth; - + unsigned long long send_bytes; + unsigned long long send_msgs; + unsigned long long recv_bytes; + unsigned long long recv_msgs; + unsigned long long write_bytes; + unsigned long long write_msgs; + unsigned long long read_bytes; + unsigned long long read_msgs; char name[16]; }; -static __inline uint64_t -get_cycles(void) -{ - u_int32_t low, high; - __asm __volatile("rdtsc" : "=a" (low), "=d" (high)); - return (low | ((u_int64_t)high << 32)); -} - -#define htonll(x) htobe64((x)) -#define ntohll(x) be64toh((x)) - -typedef uint64_t cycles_t; - -extern struct mtx krping_mutex; -TAILQ_HEAD(krping_cb_list, krping_cb); -extern struct krping_cb_list krping_cbs; - -int krping_doit(char *cmd); +int krping_doit(char *, void *); +void krping_walk_cb_list(void (*)(struct krping_stats *, void *), void *); void krping_init(void); +void krping_printf(void *, const char *, ...); +int krping_sigpending(void); diff --git a/sys/contrib/rdma/krping/krping_dev.c b/sys/contrib/rdma/krping/krping_dev.c index d6ab00a..2244d72 100644 --- a/sys/contrib/rdma/krping/krping_dev.c +++ b/sys/contrib/rdma/krping/krping_dev.c @@ -1,19 +1,20 @@ /* - * This code lifted from: + * This code lifted from: * Simple `echo' pseudo-device KLD * Murray Stokely * Converted to 5.X by Søren (Xride) Straarup */ /* - * /bin/echo "server,port=9999,addr=192.168.69.142,validate" > /dev/krping - * /bin/echo "client,port=9999,addr=192.168.69.142,validate" > /dev/krping + * /bin/echo "server,port=9999,addr=192.168.69.142,validate" > /dev/krping + * /bin/echo "client,port=9999,addr=192.168.69.142,validate" > /dev/krping */ #include __FBSDID("$FreeBSD$"); #include +#include #include /* uprintf */ #include #include /* defines used in kernel.h */ @@ -21,11 +22,19 @@ __FBSDID("$FreeBSD$"); #include /* cdevsw struct */ #include /* uio struct */ #include +#include +#include +#include #include "krping.h" #define BUFFERSIZE 512 +SYSCTL_NODE(_dev, OID_AUTO, krping, CTLFLAG_RW, 0, "kernel rping module"); + +int krping_debug = 0; +SYSCTL_INT(_dev_krping, OID_AUTO, debug, CTLFLAG_RW, &krping_debug, 0 , ""); + /* Function prototypes */ static d_open_t krping_open; static d_close_t krping_close; @@ -47,12 +56,15 @@ typedef struct s_krping { int len; } krping_t; +struct stats_list_entry { + STAILQ_ENTRY(stats_list_entry) link; + struct krping_stats *stats; +}; +STAILQ_HEAD(stats_list, stats_list_entry); + /* vars */ static struct cdev *krping_dev; -#undef MODULE_VERSION -#include - static int krping_loader(struct module *m, int what, void *arg) { @@ -61,7 +73,7 @@ krping_loader(struct module *m, int what, void *arg) switch (what) { case MOD_LOAD: /* kldload */ krping_init(); - krping_dev = make_dev(&krping_cdevsw, 0, UID_ROOT, GID_WHEEL, + krping_dev = make_dev(&krping_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "krping"); printf("Krping device loaded.\n"); break; @@ -73,61 +85,82 @@ krping_loader(struct module *m, int what, void *arg) err = EOPNOTSUPP; break; } - return err; + + return (err); } static int krping_open(struct cdev *dev, int oflags, int devtype, struct thread *p) { - int err = 0; - return err; + + return (0); } static int krping_close(struct cdev *dev, int fflag, int devtype, struct thread *p) { + return 0; } +static void +krping_copy_stats(struct krping_stats *stats, void *arg) +{ + struct stats_list_entry *s; + struct stats_list *list = arg; + + s = malloc(sizeof(*s), M_DEVBUF, M_NOWAIT | M_ZERO); + if (s == NULL) + return; + if (stats != NULL) { + s->stats = malloc(sizeof(*stats), M_DEVBUF, M_NOWAIT | M_ZERO); + if (s->stats == NULL) { + free(s, M_DEVBUF); + return; + } + *s->stats = *stats; + } + STAILQ_INSERT_TAIL(list, s, link); +} + static int krping_read(struct cdev *dev, struct uio *uio, int ioflag) { - struct krping_cb *cb, *cb2; - int num=1; - struct krping_cb_list copy_cbs; + int num = 1; + struct stats_list list; + struct stats_list_entry *e; + + STAILQ_INIT(&list); + krping_walk_cb_list(krping_copy_stats, &list); + + if (STAILQ_EMPTY(&list)) + return (0); uprintf("krping: %4s %10s %10s %10s %10s %10s %10s %10s %10s %10s\n", - "num", "device", "snd bytes", "snd msgs", "rcv bytes", - "rcv msgs", "wr bytes", "wr msgs", "rd bytes", "rd msgs"); - TAILQ_INIT(©_cbs); - - mtx_lock(&krping_mutex); - TAILQ_FOREACH(cb, &krping_cbs, list) { - cb2 = malloc(sizeof(*cb), M_DEVBUF, M_NOWAIT|M_ZERO); - if (!cb2) - break; - bcopy(cb, cb2, sizeof(*cb)); - TAILQ_INSERT_TAIL(©_cbs, cb2, list); - } - mtx_unlock(&krping_mutex); - - while (!TAILQ_EMPTY(©_cbs)) { - cb = TAILQ_FIRST(©_cbs); - TAILQ_REMOVE(©_cbs, cb, list); - if (cb->pd) { - uprintf("krping: %4d %10s %10u %10u %10u %10u %10u %10u %10u %10u\n", - num++, cb->name, cb->stats.send_bytes, - cb->stats.send_msgs, cb->stats.recv_bytes, - cb->stats.recv_msgs, cb->stats.write_bytes, - cb->stats.write_msgs, - cb->stats.read_bytes, - cb->stats.read_msgs); - } else { - uprintf("krping: %d listen\n", num++); + "num", "device", "snd bytes", "snd msgs", "rcv bytes", "rcv msgs", + "wr bytes", "wr msgs", "rd bytes", "rd msgs"); + + while (!STAILQ_EMPTY(&list)) { + e = STAILQ_FIRST(&list); + STAILQ_REMOVE_HEAD(&list, link); + if (e->stats == NULL) + uprintf("krping: %d listen\n", num); + else { + struct krping_stats *stats = e->stats; + + uprintf("krping: %4d %10s %10llu %10llu %10llu %10llu " + "%10llu %10llu %10llu %10llu\n", num, stats->name, + stats->send_bytes, stats->send_msgs, + stats->recv_bytes, stats->recv_msgs, + stats->write_bytes, stats->write_msgs, + stats->read_bytes, stats->read_msgs); + free(stats, M_DEVBUF); } - free(cb, M_DEVBUF); + num++; + free(e, M_DEVBUF); } - return 0; + + return (0); } static int @@ -171,9 +204,27 @@ krping_write(struct cdev *dev, struct uio *uio, int ioflag) *cp = 0; krpingmsg->len = (unsigned long)(cp - krpingmsg->msg); uprintf("krping: write string = |%s|\n", krpingmsg->msg); - err = krping_doit(krpingmsg->msg); + err = krping_doit(krpingmsg->msg, curproc); free(krpingmsg, M_DEVBUF); return(err); } -DEV_MODULE(krping,krping_loader,NULL); +void +krping_printf(void *cookie, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vtprintf(cookie, -1, fmt, ap); + va_end(ap); +} + +int +krping_sigpending(void) +{ + + return (SIGPENDING(curthread)); +} + +DEV_MODULE(krping, krping_loader, NULL); +MODULE_DEPEND(krping, ibcore, 1, 1, 1); -- cgit v1.1