From e622f2f4ad2142d2a613a57fb85f8cf737935ef5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Oct 2015 09:16:33 +0100 Subject: IB: split struct ib_send_wr This patch split up struct ib_send_wr so that all non-trivial verbs use their own structure which embedds struct ib_send_wr. This dramaticly shrinks the size of a WR for most common operations: sizeof(struct ib_send_wr) (old): 96 sizeof(struct ib_send_wr): 48 sizeof(struct ib_rdma_wr): 64 sizeof(struct ib_atomic_wr): 96 sizeof(struct ib_ud_wr): 88 sizeof(struct ib_fast_reg_wr): 88 sizeof(struct ib_bind_mw_wr): 96 sizeof(struct ib_sig_handover_wr): 80 And with Sagi's pending MR rework the fast registration WR will also be down to a reasonable size: sizeof(struct ib_fastreg_wr): 64 Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche [srp, srpt] Reviewed-by: Chuck Lever [sunrpc] Tested-by: Haggai Eran Tested-by: Sagi Grimberg Tested-by: Steve Wise --- drivers/infiniband/core/agent.c | 2 +- drivers/infiniband/core/mad.c | 40 +++++----- drivers/infiniband/core/mad_priv.h | 2 +- drivers/infiniband/core/uverbs_cmd.c | 143 ++++++++++++++++++++--------------- 4 files changed, 104 insertions(+), 83 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 0429040..4fa524d 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -126,7 +126,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh * mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); - mad_send_wr->send_wr.wr.ud.port_num = port_num; + mad_send_wr->send_wr.port_num = port_num; } if (ib_post_send_mad(send_buf, NULL)) { diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 4b5c723..844d9bb 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -752,7 +752,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_device *device = mad_agent_priv->agent.device; u8 port_num; struct ib_wc mad_wc; - struct ib_send_wr *send_wr = &mad_send_wr->send_wr; + struct ib_ud_wr *send_wr = &mad_send_wr->send_wr; size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv); u16 out_mad_pkey_index = 0; u16 drslid; @@ -761,7 +761,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, if (rdma_cap_ib_switch(device) && smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - port_num = send_wr->wr.ud.port_num; + port_num = send_wr->port_num; else port_num = mad_agent_priv->agent.port_num; @@ -832,9 +832,9 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, } build_smp_wc(mad_agent_priv->agent.qp, - send_wr->wr_id, drslid, - send_wr->wr.ud.pkey_index, - send_wr->wr.ud.port_num, &mad_wc); + send_wr->wr.wr_id, drslid, + send_wr->pkey_index, + send_wr->port_num, &mad_wc); if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) { mad_wc.byte_len = mad_send_wr->send_buf.hdr_len @@ -894,7 +894,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, local->mad_send_wr = mad_send_wr; if (opa) { - local->mad_send_wr->send_wr.wr.ud.pkey_index = out_mad_pkey_index; + local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index; local->return_wc_byte_len = mad_size; } /* Reference MAD agent until send side of local completion handled */ @@ -1039,14 +1039,14 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey; - mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; - mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; - mad_send_wr->send_wr.num_sge = 2; - mad_send_wr->send_wr.opcode = IB_WR_SEND; - mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED; - mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn; - mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; - mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; + mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr; + mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list; + mad_send_wr->send_wr.wr.num_sge = 2; + mad_send_wr->send_wr.wr.opcode = IB_WR_SEND; + mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED; + mad_send_wr->send_wr.remote_qpn = remote_qpn; + mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY; + mad_send_wr->send_wr.pkey_index = pkey_index; if (rmpp_active) { ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask); @@ -1151,7 +1151,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) /* Set WR ID to find mad_send_wr upon completion */ qp_info = mad_send_wr->mad_agent_priv->qp_info; - mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; + mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list; mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; mad_agent = mad_send_wr->send_buf.mad_agent; @@ -1179,7 +1179,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { - ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr, + ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, &bad_send_wr); list = &qp_info->send_queue.list; } else { @@ -1244,7 +1244,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, * request associated with the completion */ next_send_buf = send_buf->next; - mad_send_wr->send_wr.wr.ud.ah = send_buf->ah; + mad_send_wr->send_wr.ah = send_buf->ah; if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { @@ -2457,7 +2457,7 @@ retry: ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); if (queued_send_wr) { - ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr, + ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, &bad_send_wr); if (ret) { dev_err(&port_priv->device->dev, @@ -2515,7 +2515,7 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv, struct ib_send_wr *bad_send_wr; mad_send_wr->retry = 0; - ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr, + ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, &bad_send_wr); if (ret) ib_mad_send_done_handler(port_priv, wc); @@ -2713,7 +2713,7 @@ static void local_completions(struct work_struct *work) build_smp_wc(recv_mad_agent->agent.qp, (unsigned long) local->mad_send_wr, be16_to_cpu(IB_LID_PERMISSIVE), - local->mad_send_wr->send_wr.wr.ud.pkey_index, + local->mad_send_wr->send_wr.pkey_index, recv_mad_agent->agent.port_num, &wc); local->mad_priv->header.recv_wc.wc = &wc; diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 4a4f7aa..990698a 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -123,7 +123,7 @@ struct ib_mad_send_wr_private { struct ib_mad_send_buf send_buf; u64 header_mapping; u64 payload_mapping; - struct ib_send_wr send_wr; + struct ib_ud_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; __be64 tid; unsigned long timeout; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index be4cb9f..8adb71f 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2303,6 +2303,12 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, return in_len; } +static void *alloc_wr(size_t wr_size, __u32 num_sge) +{ + return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) + + num_sge * sizeof (struct ib_sge), GFP_KERNEL); +}; + ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, @@ -2351,14 +2357,83 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, goto out_put; } - next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + - user_wr->num_sge * sizeof (struct ib_sge), - GFP_KERNEL); - if (!next) { - ret = -ENOMEM; + if (is_ud) { + struct ib_ud_wr *ud; + + if (user_wr->opcode != IB_WR_SEND && + user_wr->opcode != IB_WR_SEND_WITH_IMM) { + ret = -EINVAL; + goto out_put; + } + + ud = alloc_wr(sizeof(*ud), user_wr->num_sge); + if (!ud) { + ret = -ENOMEM; + goto out_put; + } + + ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext); + if (!ud->ah) { + kfree(ud); + ret = -EINVAL; + goto out_put; + } + ud->remote_qpn = user_wr->wr.ud.remote_qpn; + ud->remote_qkey = user_wr->wr.ud.remote_qkey; + + next = &ud->wr; + } else if (user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || + user_wr->opcode == IB_WR_RDMA_WRITE || + user_wr->opcode == IB_WR_RDMA_READ) { + struct ib_rdma_wr *rdma; + + rdma = alloc_wr(sizeof(*rdma), user_wr->num_sge); + if (!rdma) { + ret = -ENOMEM; + goto out_put; + } + + rdma->remote_addr = user_wr->wr.rdma.remote_addr; + rdma->rkey = user_wr->wr.rdma.rkey; + + next = &rdma->wr; + } else if (user_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { + struct ib_atomic_wr *atomic; + + atomic = alloc_wr(sizeof(*atomic), user_wr->num_sge); + if (!atomic) { + ret = -ENOMEM; + goto out_put; + } + + atomic->remote_addr = user_wr->wr.atomic.remote_addr; + atomic->compare_add = user_wr->wr.atomic.compare_add; + atomic->swap = user_wr->wr.atomic.swap; + atomic->rkey = user_wr->wr.atomic.rkey; + + next = &atomic->wr; + } else if (user_wr->opcode == IB_WR_SEND || + user_wr->opcode == IB_WR_SEND_WITH_IMM || + user_wr->opcode == IB_WR_SEND_WITH_INV) { + next = alloc_wr(sizeof(*next), user_wr->num_sge); + if (!next) { + ret = -ENOMEM; + goto out_put; + } + } else { + ret = -EINVAL; goto out_put; } + if (user_wr->opcode == IB_WR_SEND_WITH_IMM || + user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { + next->ex.imm_data = + (__be32 __force) user_wr->ex.imm_data; + } else if (user_wr->opcode == IB_WR_SEND_WITH_INV) { + next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey; + } + if (!last) wr = next; else @@ -2371,60 +2446,6 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, next->opcode = user_wr->opcode; next->send_flags = user_wr->send_flags; - if (is_ud) { - if (next->opcode != IB_WR_SEND && - next->opcode != IB_WR_SEND_WITH_IMM) { - ret = -EINVAL; - goto out_put; - } - - next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah, - file->ucontext); - if (!next->wr.ud.ah) { - ret = -EINVAL; - goto out_put; - } - next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; - next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; - if (next->opcode == IB_WR_SEND_WITH_IMM) - next->ex.imm_data = - (__be32 __force) user_wr->ex.imm_data; - } else { - switch (next->opcode) { - case IB_WR_RDMA_WRITE_WITH_IMM: - next->ex.imm_data = - (__be32 __force) user_wr->ex.imm_data; - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_READ: - next->wr.rdma.remote_addr = - user_wr->wr.rdma.remote_addr; - next->wr.rdma.rkey = - user_wr->wr.rdma.rkey; - break; - case IB_WR_SEND_WITH_IMM: - next->ex.imm_data = - (__be32 __force) user_wr->ex.imm_data; - break; - case IB_WR_SEND_WITH_INV: - next->ex.invalidate_rkey = - user_wr->ex.invalidate_rkey; - break; - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: - next->wr.atomic.remote_addr = - user_wr->wr.atomic.remote_addr; - next->wr.atomic.compare_add = - user_wr->wr.atomic.compare_add; - next->wr.atomic.swap = user_wr->wr.atomic.swap; - next->wr.atomic.rkey = user_wr->wr.atomic.rkey; - case IB_WR_SEND: - break; - default: - ret = -EINVAL; - goto out_put; - } - } - if (next->num_sge) { next->sg_list = (void *) next + ALIGN(sizeof *next, sizeof (struct ib_sge)); @@ -2458,8 +2479,8 @@ out_put: put_qp_read(qp); while (wr) { - if (is_ud && wr->wr.ud.ah) - put_ah_read(wr->wr.ud.ah); + if (is_ud && ud_wr(wr)->ah) + put_ah_read(ud_wr(wr)->ah); next = wr->next; kfree(wr); wr = next; -- cgit v1.1