From e622f2f4ad2142d2a613a57fb85f8cf737935ef5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Oct 2015 09:16:33 +0100 Subject: IB: split struct ib_send_wr This patch split up struct ib_send_wr so that all non-trivial verbs use their own structure which embedds struct ib_send_wr. This dramaticly shrinks the size of a WR for most common operations: sizeof(struct ib_send_wr) (old): 96 sizeof(struct ib_send_wr): 48 sizeof(struct ib_rdma_wr): 64 sizeof(struct ib_atomic_wr): 96 sizeof(struct ib_ud_wr): 88 sizeof(struct ib_fast_reg_wr): 88 sizeof(struct ib_bind_mw_wr): 96 sizeof(struct ib_sig_handover_wr): 80 And with Sagi's pending MR rework the fast registration WR will also be down to a reasonable size: sizeof(struct ib_fastreg_wr): 64 Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche [srp, srpt] Reviewed-by: Chuck Lever [sunrpc] Tested-by: Haggai Eran Tested-by: Sagi Grimberg Tested-by: Steve Wise --- net/rds/ib.h | 6 +- net/rds/ib_send.c | 71 ++++++++++---------- net/rds/iw.h | 6 +- net/rds/iw_rdma.c | 29 ++++---- net/rds/iw_send.c | 113 ++++++++++++++++---------------- net/sunrpc/xprtrdma/frwr_ops.c | 23 +++---- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 70 ++++++++++---------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 18 ++--- 8 files changed, 176 insertions(+), 160 deletions(-) (limited to 'net') diff --git a/net/rds/ib.h b/net/rds/ib.h index aae60fd..301c483 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -69,7 +69,11 @@ struct rds_ib_connect_private { struct rds_ib_send_work { void *s_op; - struct ib_send_wr s_wr; + union { + struct ib_send_wr s_wr; + struct ib_rdma_wr s_rdma_wr; + struct ib_atomic_wr s_atomic_wr; + }; struct ib_sge s_sge[RDS_IB_MAX_SGE]; unsigned long s_queued; }; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 4e88047..987386e 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -785,23 +785,23 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) send->s_queued = jiffies; if (op->op_type == RDS_ATOMIC_TYPE_CSWP) { - send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP; - send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare; - send->s_wr.wr.atomic.swap = op->op_m_cswp.swap; - send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask; - send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask; + send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP; + send->s_atomic_wr.compare_add = op->op_m_cswp.compare; + send->s_atomic_wr.swap = op->op_m_cswp.swap; + send->s_atomic_wr.compare_add_mask = op->op_m_cswp.compare_mask; + send->s_atomic_wr.swap_mask = op->op_m_cswp.swap_mask; } else { /* FADD */ - send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD; - send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add; - send->s_wr.wr.atomic.swap = 0; - send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask; - send->s_wr.wr.atomic.swap_mask = 0; + send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD; + send->s_atomic_wr.compare_add = op->op_m_fadd.add; + send->s_atomic_wr.swap = 0; + send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask; + send->s_atomic_wr.swap_mask = 0; } nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify); - send->s_wr.num_sge = 1; - send->s_wr.next = NULL; - send->s_wr.wr.atomic.remote_addr = op->op_remote_addr; - send->s_wr.wr.atomic.rkey = op->op_rkey; + send->s_atomic_wr.wr.num_sge = 1; + send->s_atomic_wr.wr.next = NULL; + send->s_atomic_wr.remote_addr = op->op_remote_addr; + send->s_atomic_wr.rkey = op->op_rkey; send->s_op = op; rds_message_addref(container_of(send->s_op, struct rds_message, atomic)); @@ -826,11 +826,11 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) if (nr_sig) atomic_add(nr_sig, &ic->i_signaled_sends); - failed_wr = &send->s_wr; - ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr); + failed_wr = &send->s_atomic_wr.wr; + ret = ib_post_send(ic->i_cm_id->qp, &send->s_atomic_wr.wr, &failed_wr); rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic, - send, &send->s_wr, ret, failed_wr); - BUG_ON(failed_wr != &send->s_wr); + send, &send->s_atomic_wr, ret, failed_wr); + BUG_ON(failed_wr != &send->s_atomic_wr.wr); if (ret) { printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 " "returned %d\n", &conn->c_faddr, ret); @@ -839,9 +839,9 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) goto out; } - if (unlikely(failed_wr != &send->s_wr)) { + if (unlikely(failed_wr != &send->s_atomic_wr.wr)) { printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret); - BUG_ON(failed_wr != &send->s_wr); + BUG_ON(failed_wr != &send->s_atomic_wr.wr); } out: @@ -912,22 +912,23 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify); send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; - send->s_wr.wr.rdma.remote_addr = remote_addr; - send->s_wr.wr.rdma.rkey = op->op_rkey; + send->s_rdma_wr.remote_addr = remote_addr; + send->s_rdma_wr.rkey = op->op_rkey; if (num_sge > max_sge) { - send->s_wr.num_sge = max_sge; + send->s_rdma_wr.wr.num_sge = max_sge; num_sge -= max_sge; } else { - send->s_wr.num_sge = num_sge; + send->s_rdma_wr.wr.num_sge = num_sge; } - send->s_wr.next = NULL; + send->s_rdma_wr.wr.next = NULL; if (prev) - prev->s_wr.next = &send->s_wr; + prev->s_rdma_wr.wr.next = &send->s_rdma_wr.wr; - for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) { + for (j = 0; j < send->s_rdma_wr.wr.num_sge && + scat != &op->op_sg[op->op_count]; j++) { len = ib_sg_dma_len(ic->i_cm_id->device, scat); send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat); @@ -942,7 +943,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) } rdsdebug("send %p wr %p num_sge %u next %p\n", send, - &send->s_wr, send->s_wr.num_sge, send->s_wr.next); + &send->s_rdma_wr.wr, + send->s_rdma_wr.wr.num_sge, + send->s_rdma_wr.wr.next); prev = send; if (++send == &ic->i_sends[ic->i_send_ring.w_nr]) @@ -963,11 +966,11 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) if (nr_sig) atomic_add(nr_sig, &ic->i_signaled_sends); - failed_wr = &first->s_wr; - ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); + failed_wr = &first->s_rdma_wr.wr; + ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr); rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, - first, &first->s_wr, ret, failed_wr); - BUG_ON(failed_wr != &first->s_wr); + first, &first->s_rdma_wr.wr, ret, failed_wr); + BUG_ON(failed_wr != &first->s_rdma_wr.wr); if (ret) { printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 " "returned %d\n", &conn->c_faddr, ret); @@ -976,9 +979,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) goto out; } - if (unlikely(failed_wr != &first->s_wr)) { + if (unlikely(failed_wr != &first->s_rdma_wr.wr)) { printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret); - BUG_ON(failed_wr != &first->s_wr); + BUG_ON(failed_wr != &first->s_rdma_wr.wr); } diff --git a/net/rds/iw.h b/net/rds/iw.h index cbe6674..fe858e5 100644 --- a/net/rds/iw.h +++ b/net/rds/iw.h @@ -77,7 +77,11 @@ struct rds_iw_send_work { struct ib_fast_reg_page_list *s_page_list; unsigned char s_remap_count; - struct ib_send_wr s_wr; + union { + struct ib_send_wr s_send_wr; + struct ib_rdma_wr s_rdma_wr; + struct ib_fast_reg_wr s_fast_reg_wr; + }; struct ib_sge s_sge[RDS_IW_MAX_SGE]; unsigned long s_queued; }; diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 6a8fbd6..f8a612c 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -696,7 +696,8 @@ static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping) { struct rds_iw_mr *ibmr = mapping->m_mr; - struct ib_send_wr f_wr, *failed_wr; + struct ib_fast_reg_wr f_wr; + struct ib_send_wr *failed_wr; int ret; /* @@ -709,22 +710,22 @@ static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping) mapping->m_rkey = ibmr->mr->rkey; memset(&f_wr, 0, sizeof(f_wr)); - f_wr.wr_id = RDS_IW_FAST_REG_WR_ID; - f_wr.opcode = IB_WR_FAST_REG_MR; - f_wr.wr.fast_reg.length = mapping->m_sg.bytes; - f_wr.wr.fast_reg.rkey = mapping->m_rkey; - f_wr.wr.fast_reg.page_list = ibmr->page_list; - f_wr.wr.fast_reg.page_list_len = mapping->m_sg.dma_len; - f_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - f_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE | + f_wr.wr.wr_id = RDS_IW_FAST_REG_WR_ID; + f_wr.wr.opcode = IB_WR_FAST_REG_MR; + f_wr.length = mapping->m_sg.bytes; + f_wr.rkey = mapping->m_rkey; + f_wr.page_list = ibmr->page_list; + f_wr.page_list_len = mapping->m_sg.dma_len; + f_wr.page_shift = PAGE_SHIFT; + f_wr.access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; - f_wr.wr.fast_reg.iova_start = 0; - f_wr.send_flags = IB_SEND_SIGNALED; + f_wr.iova_start = 0; + f_wr.wr.send_flags = IB_SEND_SIGNALED; - failed_wr = &f_wr; - ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr); - BUG_ON(failed_wr != &f_wr); + failed_wr = &f_wr.wr; + ret = ib_post_send(ibmr->cm_id->qp, &f_wr.wr, &failed_wr); + BUG_ON(failed_wr != &f_wr.wr); if (ret) printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", __func__, __LINE__, ret); diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 86152ec..f6e23c5 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -137,13 +137,13 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic) send->s_op = NULL; send->s_mapping = NULL; - send->s_wr.next = NULL; - send->s_wr.wr_id = i; - send->s_wr.sg_list = send->s_sge; - send->s_wr.num_sge = 1; - send->s_wr.opcode = IB_WR_SEND; - send->s_wr.send_flags = 0; - send->s_wr.ex.imm_data = 0; + send->s_send_wr.next = NULL; + send->s_send_wr.wr_id = i; + send->s_send_wr.sg_list = send->s_sge; + send->s_send_wr.num_sge = 1; + send->s_send_wr.opcode = IB_WR_SEND; + send->s_send_wr.send_flags = 0; + send->s_send_wr.ex.imm_data = 0; sge = rds_iw_data_sge(ic, send->s_sge); sge->lkey = 0; @@ -179,7 +179,7 @@ void rds_iw_send_clear_ring(struct rds_iw_connection *ic) ib_dereg_mr(send->s_mr); BUG_ON(!send->s_page_list); ib_free_fast_reg_page_list(send->s_page_list); - if (send->s_wr.opcode == 0xdead) + if (send->s_send_wr.opcode == 0xdead) continue; if (send->s_rm) rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); @@ -247,7 +247,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) send = &ic->i_sends[oldest]; /* In the error case, wc.opcode sometimes contains garbage */ - switch (send->s_wr.opcode) { + switch (send->s_send_wr.opcode) { case IB_WR_SEND: if (send->s_rm) rds_iw_send_unmap_rm(ic, send, wc.status); @@ -262,12 +262,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) default: printk_ratelimited(KERN_NOTICE "RDS/IW: %s: unexpected opcode 0x%x in WR!\n", - __func__, send->s_wr.opcode); + __func__, send->s_send_wr.opcode); break; } - send->s_wr.opcode = 0xdead; - send->s_wr.num_sge = 1; + send->s_send_wr.opcode = 0xdead; + send->s_send_wr.num_sge = 1; if (time_after(jiffies, send->s_queued + HZ/2)) rds_iw_stats_inc(s_iw_tx_stalled); @@ -455,10 +455,10 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic, WARN_ON(pos != send - ic->i_sends); - send->s_wr.send_flags = send_flags; - send->s_wr.opcode = IB_WR_SEND; - send->s_wr.num_sge = 2; - send->s_wr.next = NULL; + send->s_send_wr.send_flags = send_flags; + send->s_send_wr.opcode = IB_WR_SEND; + send->s_send_wr.num_sge = 2; + send->s_send_wr.next = NULL; send->s_queued = jiffies; send->s_op = NULL; @@ -472,7 +472,7 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic, } else { /* We're sending a packet with no payload. There is only * one SGE */ - send->s_wr.num_sge = 1; + send->s_send_wr.num_sge = 1; sge = &send->s_sge[0]; } @@ -672,23 +672,23 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, */ if (ic->i_unsignaled_wrs-- == 0) { ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; - send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; } ic->i_unsignaled_bytes -= len; if (ic->i_unsignaled_bytes <= 0) { ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes; - send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; } /* * Always signal the last one if we're stopping due to flow control. */ if (flow_controlled && i == (work_alloc-1)) - send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; rdsdebug("send %p wr %p num_sge %u next %p\n", send, - &send->s_wr, send->s_wr.num_sge, send->s_wr.next); + &send->s_send_wr, send->s_send_wr.num_sge, send->s_send_wr.next); sent += len; rm->data.op_dmaoff += len; @@ -722,7 +722,7 @@ add_header: } if (prev) - prev->s_wr.next = &send->s_wr; + prev->s_send_wr.next = &send->s_send_wr; prev = send; pos = (pos + 1) % ic->i_send_ring.w_nr; @@ -736,7 +736,7 @@ add_header: /* if we finished the message then send completion owns it */ if (scat == &rm->data.op_sg[rm->data.op_count]) { prev->s_rm = ic->i_rm; - prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + prev->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; ic->i_rm = NULL; } @@ -748,11 +748,11 @@ add_header: rds_iw_send_add_credits(conn, credit_alloc - i); /* XXX need to worry about failed_wr and partial sends. */ - failed_wr = &first->s_wr; - ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); + failed_wr = &first->s_send_wr; + ret = ib_post_send(ic->i_cm_id->qp, &first->s_send_wr, &failed_wr); rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, - first, &first->s_wr, ret, failed_wr); - BUG_ON(failed_wr != &first->s_wr); + first, &first->s_send_wr, ret, failed_wr); + BUG_ON(failed_wr != &first->s_send_wr); if (ret) { printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 " "returned %d\n", &conn->c_faddr, ret); @@ -778,14 +778,14 @@ static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rd * in the sg list is added to the fast reg page list and placed * inside the fast_reg_mr WR. */ - send->s_wr.opcode = IB_WR_FAST_REG_MR; - send->s_wr.wr.fast_reg.length = len; - send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey; - send->s_wr.wr.fast_reg.page_list = send->s_page_list; - send->s_wr.wr.fast_reg.page_list_len = nent; - send->s_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE; - send->s_wr.wr.fast_reg.iova_start = sg_addr; + send->s_fast_reg_wr.wr.opcode = IB_WR_FAST_REG_MR; + send->s_fast_reg_wr.length = len; + send->s_fast_reg_wr.rkey = send->s_mr->rkey; + send->s_fast_reg_wr.page_list = send->s_page_list; + send->s_fast_reg_wr.page_list_len = nent; + send->s_fast_reg_wr.page_shift = PAGE_SHIFT; + send->s_fast_reg_wr.access_flags = IB_ACCESS_REMOTE_WRITE; + send->s_fast_reg_wr.iova_start = sg_addr; ib_update_fast_reg_key(send->s_mr, send->s_remap_count++); } @@ -863,7 +863,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) num_sge = op->op_count; for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) { - send->s_wr.send_flags = 0; + send->s_rdma_wr.wr.send_flags = 0; send->s_queued = jiffies; /* @@ -872,7 +872,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) */ if (ic->i_unsignaled_wrs-- == 0) { ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; - send->s_wr.send_flags = IB_SEND_SIGNALED; + send->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED; } /* To avoid the need to have the plumbing to invalidate the fastreg_mr used @@ -880,29 +880,30 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed. */ if (op->op_write) - send->s_wr.opcode = IB_WR_RDMA_WRITE; + send->s_rdma_wr.wr.opcode = IB_WR_RDMA_WRITE; else - send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV; + send->s_rdma_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; - send->s_wr.wr.rdma.remote_addr = remote_addr; - send->s_wr.wr.rdma.rkey = op->op_rkey; + send->s_rdma_wr.remote_addr = remote_addr; + send->s_rdma_wr.rkey = op->op_rkey; send->s_op = op; if (num_sge > rds_iwdev->max_sge) { - send->s_wr.num_sge = rds_iwdev->max_sge; + send->s_rdma_wr.wr.num_sge = rds_iwdev->max_sge; num_sge -= rds_iwdev->max_sge; } else - send->s_wr.num_sge = num_sge; + send->s_rdma_wr.wr.num_sge = num_sge; - send->s_wr.next = NULL; + send->s_rdma_wr.wr.next = NULL; if (prev) - prev->s_wr.next = &send->s_wr; + prev->s_send_wr.next = &send->s_rdma_wr.wr; - for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) { + for (j = 0; j < send->s_rdma_wr.wr.num_sge && + scat != &op->op_sg[op->op_count]; j++) { len = ib_sg_dma_len(ic->i_cm_id->device, scat); - if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) + if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV) send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat); else { send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat); @@ -917,15 +918,17 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) scat++; } - if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) { - send->s_wr.num_sge = 1; + if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV) { + send->s_rdma_wr.wr.num_sge = 1; send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr; send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes; send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey; } rdsdebug("send %p wr %p num_sge %u next %p\n", send, - &send->s_wr, send->s_wr.num_sge, send->s_wr.next); + &send->s_rdma_wr, + send->s_rdma_wr.wr.num_sge, + send->s_rdma_wr.wr.next); prev = send; if (++send == &ic->i_sends[ic->i_send_ring.w_nr]) @@ -934,7 +937,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) /* if we finished the message then send completion owns it */ if (scat == &op->op_sg[op->op_count]) - first->s_wr.send_flags = IB_SEND_SIGNALED; + first->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED; if (i < work_alloc) { rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i); @@ -953,11 +956,11 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) work_alloc++; } - failed_wr = &first->s_wr; - ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); + failed_wr = &first->s_rdma_wr.wr; + ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr); rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, - first, &first->s_wr, ret, failed_wr); - BUG_ON(failed_wr != &first->s_wr); + first, &first->s_rdma_wr, ret, failed_wr); + BUG_ON(failed_wr != &first->s_rdma_wr.wr); if (ret) { printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 " "returned %d\n", &conn->c_faddr, ret); diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 5318951..0d2f46f 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -312,7 +312,8 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, struct rpcrdma_mw *mw; struct rpcrdma_frmr *frmr; struct ib_mr *mr; - struct ib_send_wr fastreg_wr, *bad_wr; + struct ib_fast_reg_wr fastreg_wr; + struct ib_send_wr *bad_wr; u8 key; int len, pageoff; int i, rc; @@ -358,23 +359,23 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, __func__, mw, i, len); memset(&fastreg_wr, 0, sizeof(fastreg_wr)); - fastreg_wr.wr_id = (unsigned long)(void *)mw; - fastreg_wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff; - fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; - fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - fastreg_wr.wr.fast_reg.page_list_len = page_no; - fastreg_wr.wr.fast_reg.length = len; - fastreg_wr.wr.fast_reg.access_flags = writing ? + fastreg_wr.wr.wr_id = (unsigned long)(void *)mw; + fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.iova_start = seg1->mr_dma + pageoff; + fastreg_wr.page_list = frmr->fr_pgl; + fastreg_wr.page_shift = PAGE_SHIFT; + fastreg_wr.page_list_len = page_no; + fastreg_wr.length = len; + fastreg_wr.access_flags = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; mr = frmr->fr_mr; key = (u8)(mr->rkey & 0x000000FF); ib_update_fast_reg_key(mr, ++key); - fastreg_wr.wr.fast_reg.rkey = mr->rkey; + fastreg_wr.rkey = mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); - rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); + rc = ib_post_send(ia->ri_id->qp, &fastreg_wr.wr, &bad_wr); if (rc) goto out_senderr; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index cb51742..7be42d0 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -126,7 +126,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, u64 rs_offset, bool last) { - struct ib_send_wr read_wr; + struct ib_rdma_wr read_wr; int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); int ret, read, pno; @@ -179,16 +179,16 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); memset(&read_wr, 0, sizeof(read_wr)); - read_wr.wr_id = (unsigned long)ctxt; - read_wr.opcode = IB_WR_RDMA_READ; - ctxt->wr_op = read_wr.opcode; - read_wr.send_flags = IB_SEND_SIGNALED; - read_wr.wr.rdma.rkey = rs_handle; - read_wr.wr.rdma.remote_addr = rs_offset; - read_wr.sg_list = ctxt->sge; - read_wr.num_sge = pages_needed; - - ret = svc_rdma_send(xprt, &read_wr); + read_wr.wr.wr_id = (unsigned long)ctxt; + read_wr.wr.opcode = IB_WR_RDMA_READ; + ctxt->wr_op = read_wr.wr.opcode; + read_wr.wr.send_flags = IB_SEND_SIGNALED; + read_wr.rkey = rs_handle; + read_wr.remote_addr = rs_offset; + read_wr.wr.sg_list = ctxt->sge; + read_wr.wr.num_sge = pages_needed; + + ret = svc_rdma_send(xprt, &read_wr.wr); if (ret) { pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); @@ -218,9 +218,9 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, u64 rs_offset, bool last) { - struct ib_send_wr read_wr; + struct ib_rdma_wr read_wr; struct ib_send_wr inv_wr; - struct ib_send_wr fastreg_wr; + struct ib_fast_reg_wr fastreg_wr; u8 key; int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); @@ -289,31 +289,31 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, /* Prepare FASTREG WR */ memset(&fastreg_wr, 0, sizeof(fastreg_wr)); - fastreg_wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.send_flags = IB_SEND_SIGNALED; - fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; - fastreg_wr.wr.fast_reg.page_list = frmr->page_list; - fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; - fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - fastreg_wr.wr.fast_reg.length = frmr->map_len; - fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; - fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; - fastreg_wr.next = &read_wr; + fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.wr.send_flags = IB_SEND_SIGNALED; + fastreg_wr.iova_start = (unsigned long)frmr->kva; + fastreg_wr.page_list = frmr->page_list; + fastreg_wr.page_list_len = frmr->page_list_len; + fastreg_wr.page_shift = PAGE_SHIFT; + fastreg_wr.length = frmr->map_len; + fastreg_wr.access_flags = frmr->access_flags; + fastreg_wr.rkey = frmr->mr->lkey; + fastreg_wr.wr.next = &read_wr.wr; /* Prepare RDMA_READ */ memset(&read_wr, 0, sizeof(read_wr)); - read_wr.send_flags = IB_SEND_SIGNALED; - read_wr.wr.rdma.rkey = rs_handle; - read_wr.wr.rdma.remote_addr = rs_offset; - read_wr.sg_list = ctxt->sge; - read_wr.num_sge = 1; + read_wr.wr.send_flags = IB_SEND_SIGNALED; + read_wr.rkey = rs_handle; + read_wr.remote_addr = rs_offset; + read_wr.wr.sg_list = ctxt->sge; + read_wr.wr.num_sge = 1; if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { - read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; - read_wr.wr_id = (unsigned long)ctxt; - read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; + read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; + read_wr.wr.wr_id = (unsigned long)ctxt; + read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; } else { - read_wr.opcode = IB_WR_RDMA_READ; - read_wr.next = &inv_wr; + read_wr.wr.opcode = IB_WR_RDMA_READ; + read_wr.wr.next = &inv_wr; /* Prepare invalidate */ memset(&inv_wr, 0, sizeof(inv_wr)); inv_wr.wr_id = (unsigned long)ctxt; @@ -321,10 +321,10 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; inv_wr.ex.invalidate_rkey = frmr->mr->lkey; } - ctxt->wr_op = read_wr.opcode; + ctxt->wr_op = read_wr.wr.opcode; /* Post the chain */ - ret = svc_rdma_send(xprt, &fastreg_wr); + ret = svc_rdma_send(xprt, &fastreg_wr.wr); if (ret) { pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 1dfae83..969a1ab 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -217,7 +217,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, u32 xdr_off, int write_len, struct svc_rdma_req_map *vec) { - struct ib_send_wr write_wr; + struct ib_rdma_wr write_wr; struct ib_sge *sge; int xdr_sge_no; int sge_no; @@ -282,17 +282,17 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, /* Prepare WRITE WR */ memset(&write_wr, 0, sizeof write_wr); ctxt->wr_op = IB_WR_RDMA_WRITE; - write_wr.wr_id = (unsigned long)ctxt; - write_wr.sg_list = &sge[0]; - write_wr.num_sge = sge_no; - write_wr.opcode = IB_WR_RDMA_WRITE; - write_wr.send_flags = IB_SEND_SIGNALED; - write_wr.wr.rdma.rkey = rmr; - write_wr.wr.rdma.remote_addr = to; + write_wr.wr.wr_id = (unsigned long)ctxt; + write_wr.wr.sg_list = &sge[0]; + write_wr.wr.num_sge = sge_no; + write_wr.wr.opcode = IB_WR_RDMA_WRITE; + write_wr.wr.send_flags = IB_SEND_SIGNALED; + write_wr.rkey = rmr; + write_wr.remote_addr = to; /* Post It */ atomic_inc(&rdma_stat_write); - if (svc_rdma_send(xprt, &write_wr)) + if (svc_rdma_send(xprt, &write_wr.wr)) goto err; return write_len - bc; err: -- cgit v1.1 From fa20105e09e97e81aadf02f722c31195e4a75c84 Mon Sep 17 00:00:00 2001 From: Guy Shapiro Date: Thu, 22 Oct 2015 15:20:10 +0300 Subject: IB/cma: Add support for network namespaces Add support for network namespaces in the ib_cma module. This is accomplished by: 1. Adding network namespace parameter for rdma_create_id. This parameter is used to populate the network namespace field in rdma_id_private. rdma_create_id keeps a reference on the network namespace. 2. Using the network namespace from the rdma_id instead of init_net inside of ib_cma, when listening on an ID and when looking for an ID for an incoming request. 3. Decrementing the reference count for the appropriate network namespace when calling rdma_destroy_id. In order to preserve the current behavior init_net is passed when calling from other modules. Signed-off-by: Guy Shapiro Signed-off-by: Haggai Eran Signed-off-by: Yotam Kenneth Signed-off-by: Shachar Raindel Signed-off-by: Doug Ledford --- net/9p/trans_rdma.c | 4 ++-- net/rds/ib.c | 2 +- net/rds/ib_cm.c | 2 +- net/rds/iw.c | 2 +- net/rds/iw_cm.c | 2 +- net/rds/rdma_transport.c | 4 ++-- net/sunrpc/xprtrdma/svc_rdma_transport.c | 4 ++-- net/sunrpc/xprtrdma/verbs.c | 3 ++- 8 files changed, 12 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index ba12102..52b4a2f 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -655,8 +655,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) return -ENOMEM; /* Create the RDMA CM ID */ - rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, - IB_QPT_RC); + rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client, + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(rdma->cm_id)) goto error; diff --git a/net/rds/ib.c b/net/rds/ib.c index 2d3f2ab..cd64ef9 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -317,7 +317,7 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr) /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); + cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 9043f5c..f5a9806 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -565,7 +565,7 @@ int rds_ib_conn_connect(struct rds_connection *conn) /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ - ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, + ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ic->i_cm_id)) { ret = PTR_ERR(ic->i_cm_id); diff --git a/net/rds/iw.c b/net/rds/iw.c index 3df0295..576f182 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -223,7 +223,7 @@ static int rds_iw_laddr_check(struct net *net, __be32 addr) /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); + cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index a6553a6..aea4c91 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -524,7 +524,7 @@ int rds_iw_conn_connect(struct rds_connection *conn) /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ - ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, + ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ic->i_cm_id)) { ret = PTR_ERR(ic->i_cm_id); diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index b9b40af..9c1fed8 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -142,8 +142,8 @@ static int rds_rdma_listen_init(void) struct rdma_cm_id *cm_id; int ret; - cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP, - IB_QPT_RC); + cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL, + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) { ret = PTR_ERR(cm_id); printk(KERN_ERR "RDS/RDMA: failed to setup listener, " diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index fcc3eb8..4a41122 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -692,8 +692,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, if (!cma_xprt) return ERR_PTR(-ENOMEM); - listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP, - IB_QPT_RC); + listen_id = rdma_create_id(&init_net, rdma_listen_handler, cma_xprt, + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(listen_id)) { ret = PTR_ERR(listen_id); dprintk("svcrdma: rdma_create_id failed = %d\n", ret); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 81e8d31..6c06ba0 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -432,7 +432,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, init_completion(&ia->ri_done); - id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC); + id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, + IB_QPT_RC); if (IS_ERR(id)) { rc = PTR_ERR(id); dprintk("RPC: %s: rdma_create_id() failed %i\n", -- cgit v1.1 From 4143f34e01e9cdf1882f98c54d9073e4de8c28fb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:35 +0300 Subject: xprtrdma: Port to new memory registration API Instead of maintaining a fastreg page list, keep an sg table and convert an array of pages to a sg list. Then call ib_map_mr_sg and construct ib_reg_wr. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Tested-by: Steve Wise Tested-by: Selvin Xavier Reviewed-by: Chuck Lever Signed-off-by: Doug Ledford --- net/sunrpc/xprtrdma/frwr_ops.c | 118 +++++++++++++++++++++++----------------- net/sunrpc/xprtrdma/xprt_rdma.h | 3 +- 2 files changed, 69 insertions(+), 52 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 0d2f46f..a143444 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); if (IS_ERR(f->fr_mr)) goto out_mr_err; - f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); - if (IS_ERR(f->fr_pgl)) + + f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL); + if (!f->sg) goto out_list_err; + + sg_init_table(f->sg, depth); + return 0; out_mr_err: @@ -163,9 +167,9 @@ out_mr_err: return rc; out_list_err: - rc = PTR_ERR(f->fr_pgl); - dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n", - __func__, rc); + rc = -ENOMEM; + dprintk("RPC: %s: sg allocation failure\n", + __func__); ib_dereg_mr(f->fr_mr); return rc; } @@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r) if (rc) dprintk("RPC: %s: ib_dereg_mr status %i\n", __func__, rc); - ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); + kfree(r->r.frmr.sg); } static int @@ -312,14 +316,10 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, struct rpcrdma_mw *mw; struct rpcrdma_frmr *frmr; struct ib_mr *mr; - struct ib_fast_reg_wr fastreg_wr; + struct ib_reg_wr reg_wr; struct ib_send_wr *bad_wr; + int rc, i, n, dma_nents; u8 key; - int len, pageoff; - int i, rc; - int seg_len; - u64 pa; - int page_no; mw = seg1->rl_mw; seg1->rl_mw = NULL; @@ -332,64 +332,80 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); frmr = &mw->r.frmr; frmr->fr_state = FRMR_IS_VALID; + mr = frmr->fr_mr; - pageoff = offset_in_page(seg1->mr_offset); - seg1->mr_offset -= pageoff; /* start of page */ - seg1->mr_len += pageoff; - len = -pageoff; if (nsegs > ia->ri_max_frmr_depth) nsegs = ia->ri_max_frmr_depth; - for (page_no = i = 0; i < nsegs;) { - rpcrdma_map_one(device, seg, direction); - pa = seg->mr_dma; - for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { - frmr->fr_pgl->page_list[page_no++] = pa; - pa += PAGE_SIZE; - } - len += seg->mr_len; + for (i = 0; i < nsegs;) { + if (seg->mr_page) + sg_set_page(&frmr->sg[i], + seg->mr_page, + seg->mr_len, + offset_in_page(seg->mr_offset)); + else + sg_set_buf(&frmr->sg[i], seg->mr_offset, + seg->mr_len); + ++seg; ++i; + /* Check for holes */ if ((i < nsegs && offset_in_page(seg->mr_offset)) || offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) break; } - dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", - __func__, mw, i, len); - - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); - fastreg_wr.wr.wr_id = (unsigned long)(void *)mw; - fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.iova_start = seg1->mr_dma + pageoff; - fastreg_wr.page_list = frmr->fr_pgl; - fastreg_wr.page_shift = PAGE_SHIFT; - fastreg_wr.page_list_len = page_no; - fastreg_wr.length = len; - fastreg_wr.access_flags = writing ? - IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : - IB_ACCESS_REMOTE_READ; - mr = frmr->fr_mr; + frmr->sg_nents = i; + + dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction); + if (!dma_nents) { + pr_err("RPC: %s: failed to dma map sg %p sg_nents %u\n", + __func__, frmr->sg, frmr->sg_nents); + return -ENOMEM; + } + + n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); + if (unlikely(n != frmr->sg_nents)) { + pr_err("RPC: %s: failed to map mr %p (%u/%u)\n", + __func__, frmr->fr_mr, n, frmr->sg_nents); + rc = n < 0 ? n : -EINVAL; + goto out_senderr; + } + + dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n", + __func__, mw, frmr->sg_nents, mr->length); + key = (u8)(mr->rkey & 0x000000FF); ib_update_fast_reg_key(mr, ++key); - fastreg_wr.rkey = mr->rkey; + + reg_wr.wr.next = NULL; + reg_wr.wr.opcode = IB_WR_REG_MR; + reg_wr.wr.wr_id = (uintptr_t)mw; + reg_wr.wr.num_sge = 0; + reg_wr.wr.send_flags = 0; + reg_wr.mr = mr; + reg_wr.key = mr->rkey; + reg_wr.access = writing ? + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : + IB_ACCESS_REMOTE_READ; DECR_CQCOUNT(&r_xprt->rx_ep); - rc = ib_post_send(ia->ri_id->qp, &fastreg_wr.wr, &bad_wr); + rc = ib_post_send(ia->ri_id->qp, ®_wr.wr, &bad_wr); if (rc) goto out_senderr; + seg1->mr_dir = direction; seg1->rl_mw = mw; seg1->mr_rkey = mr->rkey; - seg1->mr_base = seg1->mr_dma + pageoff; - seg1->mr_nsegs = i; - seg1->mr_len = len; - return i; + seg1->mr_base = mr->iova; + seg1->mr_nsegs = frmr->sg_nents; + seg1->mr_len = mr->length; + + return frmr->sg_nents; out_senderr: dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); - while (i--) - rpcrdma_unmap_one(device, --seg); + ib_dma_unmap_sg(device, frmr->sg, dma_nents, direction); __frwr_queue_recovery(mw); return rc; } @@ -403,22 +419,22 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) struct rpcrdma_mr_seg *seg1 = seg; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_mw *mw = seg1->rl_mw; + struct rpcrdma_frmr *frmr = &mw->r.frmr; struct ib_send_wr invalidate_wr, *bad_wr; int rc, nsegs = seg->mr_nsegs; dprintk("RPC: %s: FRMR %p\n", __func__, mw); seg1->rl_mw = NULL; - mw->r.frmr.fr_state = FRMR_IS_INVALID; + frmr->fr_state = FRMR_IS_INVALID; memset(&invalidate_wr, 0, sizeof(invalidate_wr)); invalidate_wr.wr_id = (unsigned long)(void *)mw; invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey; + invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); - while (seg1->mr_nsegs--) - rpcrdma_unmap_one(ia->ri_device, seg++); + ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); read_lock(&ia->ri_qplock); rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); read_unlock(&ia->ri_qplock); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c09414e..c82abf4 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -193,7 +193,8 @@ enum rpcrdma_frmr_state { }; struct rpcrdma_frmr { - struct ib_fast_reg_page_list *fr_pgl; + struct scatterlist *sg; + int sg_nents; struct ib_mr *fr_mr; enum rpcrdma_frmr_state fr_state; struct work_struct fr_work; -- cgit v1.1 From 412a15c0fe537c59c794d4e8134580b9cb984a0c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:36 +0300 Subject: svcrdma: Port to new memory registration API Instead of maintaining a fastreg page list, keep an sg table and convert an array of pages to a sg list. Then call ib_map_mr_sg and construct ib_reg_wr. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Tested-by: Steve Wise Tested-by: Selvin Xavier Signed-off-by: Doug Ledford --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 76 ++++++++++++++++++-------------- net/sunrpc/xprtrdma/svc_rdma_transport.c | 34 +++++--------- 2 files changed, 53 insertions(+), 57 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 7be42d0..cb09913 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -220,12 +220,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, { struct ib_rdma_wr read_wr; struct ib_send_wr inv_wr; - struct ib_fast_reg_wr fastreg_wr; + struct ib_reg_wr reg_wr; u8 key; - int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; + int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); - int ret, read, pno; + int ret, read, pno, dma_nents, n; u32 pg_off = *page_offset; u32 pg_no = *page_no; @@ -234,16 +234,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, ctxt->direction = DMA_FROM_DEVICE; ctxt->frmr = frmr; - pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len); - read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); + nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len); + read = min_t(int, nents << PAGE_SHIFT, rs_length); - frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]); frmr->direction = DMA_FROM_DEVICE; frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); - frmr->map_len = pages_needed << PAGE_SHIFT; - frmr->page_list_len = pages_needed; + frmr->sg_nents = nents; - for (pno = 0; pno < pages_needed; pno++) { + for (pno = 0; pno < nents; pno++) { int len = min_t(int, rs_length, PAGE_SIZE - pg_off); head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; @@ -251,17 +249,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, head->arg.len += len; if (!pg_off) head->count++; + + sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no], + len, pg_off); + rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; rqstp->rq_next_page = rqstp->rq_respages + 1; - frmr->page_list->page_list[pno] = - ib_dma_map_page(xprt->sc_cm_id->device, - head->arg.pages[pg_no], 0, - PAGE_SIZE, DMA_FROM_DEVICE); - ret = ib_dma_mapping_error(xprt->sc_cm_id->device, - frmr->page_list->page_list[pno]); - if (ret) - goto err; - atomic_inc(&xprt->sc_dma_used); /* adjust offset and wrap to next page if needed */ pg_off += len; @@ -277,28 +270,42 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, else clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device, + frmr->sg, frmr->sg_nents, + frmr->direction); + if (!dma_nents) { + pr_err("svcrdma: failed to dma map sg %p\n", + frmr->sg); + return -ENOMEM; + } + atomic_inc(&xprt->sc_dma_used); + + n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); + if (unlikely(n != frmr->sg_nents)) { + pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n", + frmr->mr, n, frmr->sg_nents); + return n < 0 ? n : -EINVAL; + } + /* Bump the key */ key = (u8)(frmr->mr->lkey & 0x000000FF); ib_update_fast_reg_key(frmr->mr, ++key); - ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset; + ctxt->sge[0].addr = frmr->mr->iova; ctxt->sge[0].lkey = frmr->mr->lkey; - ctxt->sge[0].length = read; + ctxt->sge[0].length = frmr->mr->length; ctxt->count = 1; ctxt->read_hdr = head; - /* Prepare FASTREG WR */ - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); - fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.wr.send_flags = IB_SEND_SIGNALED; - fastreg_wr.iova_start = (unsigned long)frmr->kva; - fastreg_wr.page_list = frmr->page_list; - fastreg_wr.page_list_len = frmr->page_list_len; - fastreg_wr.page_shift = PAGE_SHIFT; - fastreg_wr.length = frmr->map_len; - fastreg_wr.access_flags = frmr->access_flags; - fastreg_wr.rkey = frmr->mr->lkey; - fastreg_wr.wr.next = &read_wr.wr; + /* Prepare REG WR */ + reg_wr.wr.opcode = IB_WR_REG_MR; + reg_wr.wr.wr_id = 0; + reg_wr.wr.send_flags = IB_SEND_SIGNALED; + reg_wr.wr.num_sge = 0; + reg_wr.mr = frmr->mr; + reg_wr.key = frmr->mr->lkey; + reg_wr.access = frmr->access_flags; + reg_wr.wr.next = &read_wr.wr; /* Prepare RDMA_READ */ memset(&read_wr, 0, sizeof(read_wr)); @@ -324,7 +331,7 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, ctxt->wr_op = read_wr.wr.opcode; /* Post the chain */ - ret = svc_rdma_send(xprt, &fastreg_wr.wr); + ret = svc_rdma_send(xprt, ®_wr.wr); if (ret) { pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); @@ -338,7 +345,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, atomic_inc(&rdma_stat_read); return ret; err: - svc_rdma_unmap_dma(ctxt); + ib_dma_unmap_sg(xprt->sc_cm_id->device, + frmr->sg, frmr->sg_nents, frmr->direction); svc_rdma_put_context(ctxt, 0); svc_rdma_put_frmr(xprt, frmr); return ret; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 4a41122..a266e87 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -732,7 +732,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) { struct ib_mr *mr; - struct ib_fast_reg_page_list *pl; + struct scatterlist *sg; struct svc_rdma_fastreg_mr *frmr; u32 num_sg; @@ -745,13 +745,14 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) if (IS_ERR(mr)) goto err_free_frmr; - pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, - num_sg); - if (IS_ERR(pl)) + sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL); + if (!sg) goto err_free_mr; + sg_init_table(sg, RPCSVC_MAXPAGES); + frmr->mr = mr; - frmr->page_list = pl; + frmr->sg = sg; INIT_LIST_HEAD(&frmr->frmr_list); return frmr; @@ -771,8 +772,8 @@ static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt) frmr = list_entry(xprt->sc_frmr_q.next, struct svc_rdma_fastreg_mr, frmr_list); list_del_init(&frmr->frmr_list); + kfree(frmr->sg); ib_dereg_mr(frmr->mr); - ib_free_fast_reg_page_list(frmr->page_list); kfree(frmr); } } @@ -786,8 +787,7 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma) frmr = list_entry(rdma->sc_frmr_q.next, struct svc_rdma_fastreg_mr, frmr_list); list_del_init(&frmr->frmr_list); - frmr->map_len = 0; - frmr->page_list_len = 0; + frmr->sg_nents = 0; } spin_unlock_bh(&rdma->sc_frmr_q_lock); if (frmr) @@ -796,25 +796,13 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma) return rdma_alloc_frmr(rdma); } -static void frmr_unmap_dma(struct svcxprt_rdma *xprt, - struct svc_rdma_fastreg_mr *frmr) -{ - int page_no; - for (page_no = 0; page_no < frmr->page_list_len; page_no++) { - dma_addr_t addr = frmr->page_list->page_list[page_no]; - if (ib_dma_mapping_error(frmr->mr->device, addr)) - continue; - atomic_dec(&xprt->sc_dma_used); - ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE, - frmr->direction); - } -} - void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, struct svc_rdma_fastreg_mr *frmr) { if (frmr) { - frmr_unmap_dma(rdma, frmr); + ib_dma_unmap_sg(rdma->sc_cm_id->device, + frmr->sg, frmr->sg_nents, frmr->direction); + atomic_dec(&rdma->sc_dma_used); spin_lock_bh(&rdma->sc_frmr_q_lock); WARN_ON_ONCE(!list_empty(&frmr->frmr_list)); list_add(&frmr->frmr_list, &rdma->sc_frmr_q); -- cgit v1.1 From 9ddc87374af90ee694bcc8d3412c6f2b51df1fea Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:37 +0300 Subject: RDS/IW: Convert to new memory registration API Get rid of fast_reg page list and its construction. Instead, just pass the RDS sg list to ib_map_mr_sg and post the new ib_reg_wr. This is done both for server IW RDMA_READ registration and the client remote key registration. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Acked-by: Santosh Shilimkar Signed-off-by: Doug Ledford --- net/rds/iw.h | 5 +-- net/rds/iw_rdma.c | 128 +++++++++++++++++++----------------------------------- net/rds/iw_send.c | 57 ++++++++++++------------ 3 files changed, 75 insertions(+), 115 deletions(-) (limited to 'net') diff --git a/net/rds/iw.h b/net/rds/iw.h index fe858e5..5af01d1 100644 --- a/net/rds/iw.h +++ b/net/rds/iw.h @@ -74,13 +74,12 @@ struct rds_iw_send_work { struct rm_rdma_op *s_op; struct rds_iw_mapping *s_mapping; struct ib_mr *s_mr; - struct ib_fast_reg_page_list *s_page_list; unsigned char s_remap_count; union { struct ib_send_wr s_send_wr; struct ib_rdma_wr s_rdma_wr; - struct ib_fast_reg_wr s_fast_reg_wr; + struct ib_reg_wr s_reg_wr; }; struct ib_sge s_sge[RDS_IW_MAX_SGE]; unsigned long s_queued; @@ -199,7 +198,7 @@ struct rds_iw_device { /* Magic WR_ID for ACKs */ #define RDS_IW_ACK_WR_ID ((u64)0xffffffffffffffffULL) -#define RDS_IW_FAST_REG_WR_ID ((u64)0xefefefefefefefefULL) +#define RDS_IW_REG_WR_ID ((u64)0xefefefefefefefefULL) #define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL) struct rds_iw_statistics { diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index f8a612c..47bd684 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -47,7 +47,6 @@ struct rds_iw_mr { struct rdma_cm_id *cm_id; struct ib_mr *mr; - struct ib_fast_reg_page_list *page_list; struct rds_iw_mapping mapping; unsigned char remap_count; @@ -77,8 +76,8 @@ struct rds_iw_mr_pool { static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all); static void rds_iw_mr_pool_flush_worker(struct work_struct *work); -static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); -static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, +static int rds_iw_init_reg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); +static int rds_iw_map_reg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr, struct scatterlist *sg, unsigned int nents); static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); @@ -258,19 +257,18 @@ static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg, sg->bytes = 0; } -static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev, - struct rds_iw_scatterlist *sg) +static int rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev, + struct rds_iw_scatterlist *sg) { struct ib_device *dev = rds_iwdev->dev; - u64 *dma_pages = NULL; - int i, j, ret; + int i, ret; WARN_ON(sg->dma_len); sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL); if (unlikely(!sg->dma_len)) { printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n"); - return ERR_PTR(-EBUSY); + return -EBUSY; } sg->bytes = 0; @@ -303,31 +301,14 @@ static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev, if (sg->dma_npages > fastreg_message_size) goto out_unmap; - dma_pages = kmalloc(sizeof(u64) * sg->dma_npages, GFP_ATOMIC); - if (!dma_pages) { - ret = -ENOMEM; - goto out_unmap; - } - for (i = j = 0; i < sg->dma_len; ++i) { - unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]); - u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]); - u64 end_addr; - end_addr = dma_addr + dma_len; - dma_addr &= ~PAGE_MASK; - for (; dma_addr < end_addr; dma_addr += PAGE_SIZE) - dma_pages[j++] = dma_addr; - BUG_ON(j > sg->dma_npages); - } - - return dma_pages; + return 0; out_unmap: ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL); sg->dma_len = 0; - kfree(dma_pages); - return ERR_PTR(ret); + return ret; } @@ -440,7 +421,7 @@ static struct rds_iw_mr *rds_iw_alloc_mr(struct rds_iw_device *rds_iwdev) INIT_LIST_HEAD(&ibmr->mapping.m_list); ibmr->mapping.m_mr = ibmr; - err = rds_iw_init_fastreg(pool, ibmr); + err = rds_iw_init_reg(pool, ibmr); if (err) goto out_no_cigar; @@ -622,7 +603,7 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents, ibmr->cm_id = cm_id; ibmr->device = rds_iwdev; - ret = rds_iw_map_fastreg(rds_iwdev->mr_pool, ibmr, sg, nents); + ret = rds_iw_map_reg(rds_iwdev->mr_pool, ibmr, sg, nents); if (ret == 0) *key_ret = ibmr->mr->rkey; else @@ -638,7 +619,7 @@ out: } /* - * iWARP fastreg handling + * iWARP reg handling * * The life cycle of a fastreg registration is a bit different from * FMRs. @@ -650,7 +631,7 @@ out: * This creates a bit of a problem for us, as we do not have the destination * IP in GET_MR, so the connection must be setup prior to the GET_MR call for * RDMA to be correctly setup. If a fastreg request is present, rds_iw_xmit - * will try to queue a LOCAL_INV (if needed) and a FAST_REG_MR work request + * will try to queue a LOCAL_INV (if needed) and a REG_MR work request * before queuing the SEND. When completions for these arrive, they are * dispatched to the MR has a bit set showing that RDMa can be performed. * @@ -659,11 +640,10 @@ out: * The expectation there is that this invalidation step includes ALL * PREVIOUSLY FREED MRs. */ -static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, - struct rds_iw_mr *ibmr) +static int rds_iw_init_reg(struct rds_iw_mr_pool *pool, + struct rds_iw_mr *ibmr) { struct rds_iw_device *rds_iwdev = pool->device; - struct ib_fast_reg_page_list *page_list = NULL; struct ib_mr *mr; int err; @@ -676,56 +656,44 @@ static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, return err; } - /* FIXME - this is overkill, but mapping->m_sg.dma_len/mapping->m_sg.dma_npages - * is not filled in. - */ - page_list = ib_alloc_fast_reg_page_list(rds_iwdev->dev, pool->max_message_size); - if (IS_ERR(page_list)) { - err = PTR_ERR(page_list); - - printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed (err=%d)\n", err); - ib_dereg_mr(mr); - return err; - } - - ibmr->page_list = page_list; ibmr->mr = mr; return 0; } -static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping) +static int rds_iw_rdma_reg_mr(struct rds_iw_mapping *mapping) { struct rds_iw_mr *ibmr = mapping->m_mr; - struct ib_fast_reg_wr f_wr; + struct rds_iw_scatterlist *m_sg = &mapping->m_sg; + struct ib_reg_wr reg_wr; struct ib_send_wr *failed_wr; - int ret; + int ret, n; + + n = ib_map_mr_sg_zbva(ibmr->mr, m_sg->list, m_sg->len, PAGE_SIZE); + if (unlikely(n != m_sg->len)) + return n < 0 ? n : -EINVAL; + + reg_wr.wr.next = NULL; + reg_wr.wr.opcode = IB_WR_REG_MR; + reg_wr.wr.wr_id = RDS_IW_REG_WR_ID; + reg_wr.wr.num_sge = 0; + reg_wr.mr = ibmr->mr; + reg_wr.key = mapping->m_rkey; + reg_wr.access = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE; /* - * Perform a WR for the fast_reg_mr. Each individual page + * Perform a WR for the reg_mr. Each individual page * in the sg list is added to the fast reg page list and placed - * inside the fast_reg_mr WR. The key used is a rolling 8bit + * inside the reg_mr WR. The key used is a rolling 8bit * counter, which should guarantee uniqueness. */ ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++); mapping->m_rkey = ibmr->mr->rkey; - memset(&f_wr, 0, sizeof(f_wr)); - f_wr.wr.wr_id = RDS_IW_FAST_REG_WR_ID; - f_wr.wr.opcode = IB_WR_FAST_REG_MR; - f_wr.length = mapping->m_sg.bytes; - f_wr.rkey = mapping->m_rkey; - f_wr.page_list = ibmr->page_list; - f_wr.page_list_len = mapping->m_sg.dma_len; - f_wr.page_shift = PAGE_SHIFT; - f_wr.access_flags = IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE; - f_wr.iova_start = 0; - f_wr.wr.send_flags = IB_SEND_SIGNALED; - - failed_wr = &f_wr.wr; - ret = ib_post_send(ibmr->cm_id->qp, &f_wr.wr, &failed_wr); - BUG_ON(failed_wr != &f_wr.wr); + failed_wr = ®_wr.wr; + ret = ib_post_send(ibmr->cm_id->qp, ®_wr.wr, &failed_wr); + BUG_ON(failed_wr != ®_wr.wr); if (ret) printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", __func__, __LINE__, ret); @@ -757,21 +725,20 @@ out: return ret; } -static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, - struct rds_iw_mr *ibmr, - struct scatterlist *sg, - unsigned int sg_len) +static int rds_iw_map_reg(struct rds_iw_mr_pool *pool, + struct rds_iw_mr *ibmr, + struct scatterlist *sg, + unsigned int sg_len) { struct rds_iw_device *rds_iwdev = pool->device; struct rds_iw_mapping *mapping = &ibmr->mapping; u64 *dma_pages; - int i, ret = 0; + int ret = 0; rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len); - dma_pages = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg); - if (IS_ERR(dma_pages)) { - ret = PTR_ERR(dma_pages); + ret = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg); + if (ret) { dma_pages = NULL; goto out; } @@ -781,10 +748,7 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, goto out; } - for (i = 0; i < mapping->m_sg.dma_npages; ++i) - ibmr->page_list->page_list[i] = dma_pages[i]; - - ret = rds_iw_rdma_build_fastreg(mapping); + ret = rds_iw_rdma_reg_mr(mapping); if (ret) goto out; @@ -870,8 +834,6 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr) { - if (ibmr->page_list) - ib_free_fast_reg_page_list(ibmr->page_list); if (ibmr->mr) ib_dereg_mr(ibmr->mr); } diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index f6e23c5..e20bd50 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -159,13 +159,6 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic) printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n"); break; } - - send->s_page_list = ib_alloc_fast_reg_page_list( - ic->i_cm_id->device, fastreg_message_size); - if (IS_ERR(send->s_page_list)) { - printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed\n"); - break; - } } } @@ -177,8 +170,6 @@ void rds_iw_send_clear_ring(struct rds_iw_connection *ic) for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { BUG_ON(!send->s_mr); ib_dereg_mr(send->s_mr); - BUG_ON(!send->s_page_list); - ib_free_fast_reg_page_list(send->s_page_list); if (send->s_send_wr.opcode == 0xdead) continue; if (send->s_rm) @@ -227,7 +218,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) continue; } - if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) { + if (wc.opcode == IB_WC_REG_MR && wc.wr_id == RDS_IW_REG_WR_ID) { ic->i_fastreg_posted = 1; continue; } @@ -252,7 +243,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) if (send->s_rm) rds_iw_send_unmap_rm(ic, send, wc.status); break; - case IB_WR_FAST_REG_MR: + case IB_WR_REG_MR: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_READ: case IB_WR_RDMA_READ_WITH_INV: @@ -770,24 +761,26 @@ out: return ret; } -static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rds_iw_connection *ic, struct rds_iw_send_work *send, int nent, int len, u64 sg_addr) +static int rds_iw_build_send_reg(struct rds_iw_send_work *send, + struct scatterlist *sg, + int sg_nents) { - BUG_ON(nent > send->s_page_list->max_page_list_len); - /* - * Perform a WR for the fast_reg_mr. Each individual page - * in the sg list is added to the fast reg page list and placed - * inside the fast_reg_mr WR. - */ - send->s_fast_reg_wr.wr.opcode = IB_WR_FAST_REG_MR; - send->s_fast_reg_wr.length = len; - send->s_fast_reg_wr.rkey = send->s_mr->rkey; - send->s_fast_reg_wr.page_list = send->s_page_list; - send->s_fast_reg_wr.page_list_len = nent; - send->s_fast_reg_wr.page_shift = PAGE_SHIFT; - send->s_fast_reg_wr.access_flags = IB_ACCESS_REMOTE_WRITE; - send->s_fast_reg_wr.iova_start = sg_addr; + int n; + + n = ib_map_mr_sg(send->s_mr, sg, sg_nents, PAGE_SIZE); + if (unlikely(n != sg_nents)) + return n < 0 ? n : -EINVAL; + + send->s_reg_wr.wr.opcode = IB_WR_REG_MR; + send->s_reg_wr.wr.wr_id = 0; + send->s_reg_wr.wr.num_sge = 0; + send->s_reg_wr.mr = send->s_mr; + send->s_reg_wr.key = send->s_mr->rkey; + send->s_reg_wr.access = IB_ACCESS_REMOTE_WRITE; ib_update_fast_reg_key(send->s_mr, send->s_remap_count++); + + return 0; } int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) @@ -808,6 +801,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) int sent; int ret; int num_sge; + int sg_nents; rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); @@ -861,6 +855,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) scat = &op->op_sg[0]; sent = 0; num_sge = op->op_count; + sg_nents = 0; for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) { send->s_rdma_wr.wr.send_flags = 0; @@ -904,7 +899,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) len = ib_sg_dma_len(ic->i_cm_id->device, scat); if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV) - send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat); + sg_nents++; else { send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat); send->s_sge[j].length = len; @@ -951,8 +946,12 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) * fastreg_mr (or possibly a dma_mr) */ if (!op->op_write) { - rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos], - op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr); + ret = rds_iw_build_send_reg(&ic->i_sends[fr_pos], + &op->op_sg[0], sg_nents); + if (ret) { + printk(KERN_WARNING "RDS/IW: failed to reg send mem\n"); + goto out; + } work_alloc++; } -- cgit v1.1