diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx4')
-rw-r--r-- | drivers/infiniband/hw/mlx4/cq.c | 319 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/mad.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/main.c | 25 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/mlx4_ib.h | 15 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/qp.c | 117 |
5 files changed, 411 insertions, 67 deletions
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 7360bba..3557e7e 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -85,6 +85,82 @@ static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq) return get_sw_cqe(cq, cq->mcq.cons_index); } +int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) +{ + struct mlx4_ib_cq *mcq = to_mcq(cq); + struct mlx4_ib_dev *dev = to_mdev(cq->device); + + return mlx4_cq_modify(dev->dev, &mcq->mcq, cq_count, cq_period); +} + +static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int nent) +{ + int err; + + err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe), + PAGE_SIZE * 2, &buf->buf); + + if (err) + goto out; + + err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift, + &buf->mtt); + if (err) + goto err_buf; + + err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf); + if (err) + goto err_mtt; + + return 0; + +err_mtt: + mlx4_mtt_cleanup(dev->dev, &buf->mtt); + +err_buf: + mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe), + &buf->buf); + +out: + return err; +} + +static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe) +{ + mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf); +} + +static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context, + struct mlx4_ib_cq_buf *buf, struct ib_umem **umem, + u64 buf_addr, int cqe) +{ + int err; + + *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe), + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(*umem)) + return PTR_ERR(*umem); + + err = mlx4_mtt_init(dev->dev, ib_umem_page_count(*umem), + ilog2((*umem)->page_size), &buf->mtt); + if (err) + goto err_buf; + + err = mlx4_ib_umem_write_mtt(dev, &buf->mtt, *umem); + if (err) + goto err_mtt; + + return 0; + +err_mtt: + mlx4_mtt_cleanup(dev->dev, &buf->mtt); + +err_buf: + ib_umem_release(*umem); + + return err; +} + struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, struct ib_ucontext *context, struct ib_udata *udata) @@ -92,7 +168,6 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_cq *cq; struct mlx4_uar *uar; - int buf_size; int err; if (entries < 1 || entries > dev->dev->caps.max_cqes) @@ -104,8 +179,10 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector entries = roundup_pow_of_two(entries + 1); cq->ibcq.cqe = entries - 1; - buf_size = entries * sizeof (struct mlx4_cqe); + mutex_init(&cq->resize_mutex); spin_lock_init(&cq->lock); + cq->resize_buf = NULL; + cq->resize_umem = NULL; if (context) { struct mlx4_ib_create_cq ucmd; @@ -115,21 +192,10 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector goto err_cq; } - cq->umem = ib_umem_get(context, ucmd.buf_addr, buf_size, - IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(cq->umem)) { - err = PTR_ERR(cq->umem); - goto err_cq; - } - - err = mlx4_mtt_init(dev->dev, ib_umem_page_count(cq->umem), - ilog2(cq->umem->page_size), &cq->buf.mtt); - if (err) - goto err_buf; - - err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem); + err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem, + ucmd.buf_addr, entries); if (err) - goto err_mtt; + goto err_cq; err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr, &cq->db); @@ -147,19 +213,9 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector *cq->mcq.set_ci_db = 0; *cq->mcq.arm_db = 0; - if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &cq->buf.buf)) { - err = -ENOMEM; - goto err_db; - } - - err = mlx4_mtt_init(dev->dev, cq->buf.buf.npages, cq->buf.buf.page_shift, - &cq->buf.mtt); + err = mlx4_ib_alloc_cq_buf(dev, &cq->buf, entries); if (err) - goto err_buf; - - err = mlx4_buf_write_mtt(dev->dev, &cq->buf.mtt, &cq->buf.buf); - if (err) - goto err_mtt; + goto err_db; uar = &dev->priv_uar; } @@ -187,12 +243,10 @@ err_dbmap: err_mtt: mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt); -err_buf: if (context) ib_umem_release(cq->umem); else - mlx4_buf_free(dev->dev, entries * sizeof (struct mlx4_cqe), - &cq->buf.buf); + mlx4_ib_free_cq_buf(dev, &cq->buf, entries); err_db: if (!context) @@ -204,6 +258,170 @@ err_cq: return ERR_PTR(err); } +static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq, + int entries) +{ + int err; + + if (cq->resize_buf) + return -EBUSY; + + cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); + if (!cq->resize_buf) + return -ENOMEM; + + err = mlx4_ib_alloc_cq_buf(dev, &cq->resize_buf->buf, entries); + if (err) { + kfree(cq->resize_buf); + cq->resize_buf = NULL; + return err; + } + + cq->resize_buf->cqe = entries - 1; + + return 0; +} + +static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq, + int entries, struct ib_udata *udata) +{ + struct mlx4_ib_resize_cq ucmd; + int err; + + if (cq->resize_umem) + return -EBUSY; + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return -EFAULT; + + cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC); + if (!cq->resize_buf) + return -ENOMEM; + + err = mlx4_ib_get_cq_umem(dev, cq->umem->context, &cq->resize_buf->buf, + &cq->resize_umem, ucmd.buf_addr, entries); + if (err) { + kfree(cq->resize_buf); + cq->resize_buf = NULL; + return err; + } + + cq->resize_buf->cqe = entries - 1; + + return 0; +} + +static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq) +{ + u32 i; + + i = cq->mcq.cons_index; + while (get_sw_cqe(cq, i & cq->ibcq.cqe)) + ++i; + + return i - cq->mcq.cons_index; +} + +static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq) +{ + struct mlx4_cqe *cqe; + int i; + + i = cq->mcq.cons_index; + cqe = get_cqe(cq, i & cq->ibcq.cqe); + while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { + memcpy(get_cqe_from_buf(&cq->resize_buf->buf, + (i + 1) & cq->resize_buf->cqe), + get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe)); + cqe = get_cqe(cq, ++i & cq->ibcq.cqe); + } + ++cq->mcq.cons_index; +} + +int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(ibcq->device); + struct mlx4_ib_cq *cq = to_mcq(ibcq); + int outst_cqe; + int err; + + mutex_lock(&cq->resize_mutex); + + if (entries < 1 || entries > dev->dev->caps.max_cqes) { + err = -EINVAL; + goto out; + } + + entries = roundup_pow_of_two(entries + 1); + if (entries == ibcq->cqe + 1) { + err = 0; + goto out; + } + + if (ibcq->uobject) { + err = mlx4_alloc_resize_umem(dev, cq, entries, udata); + if (err) + goto out; + } else { + /* Can't be smaller then the number of outstanding CQEs */ + outst_cqe = mlx4_ib_get_outstanding_cqes(cq); + if (entries < outst_cqe + 1) { + err = 0; + goto out; + } + + err = mlx4_alloc_resize_buf(dev, cq, entries); + if (err) + goto out; + } + + err = mlx4_cq_resize(dev->dev, &cq->mcq, entries, &cq->resize_buf->buf.mtt); + if (err) + goto err_buf; + + if (ibcq->uobject) { + cq->buf = cq->resize_buf->buf; + cq->ibcq.cqe = cq->resize_buf->cqe; + ib_umem_release(cq->umem); + cq->umem = cq->resize_umem; + + kfree(cq->resize_buf); + cq->resize_buf = NULL; + cq->resize_umem = NULL; + } else { + spin_lock_irq(&cq->lock); + if (cq->resize_buf) { + mlx4_ib_cq_resize_copy_cqes(cq); + mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); + cq->buf = cq->resize_buf->buf; + cq->ibcq.cqe = cq->resize_buf->cqe; + + kfree(cq->resize_buf); + cq->resize_buf = NULL; + } + spin_unlock_irq(&cq->lock); + } + + goto out; + +err_buf: + if (!ibcq->uobject) + mlx4_ib_free_cq_buf(dev, &cq->resize_buf->buf, + cq->resize_buf->cqe); + + kfree(cq->resize_buf); + cq->resize_buf = NULL; + + if (cq->resize_umem) { + ib_umem_release(cq->resize_umem); + cq->resize_umem = NULL; + } + +out: + mutex_unlock(&cq->resize_mutex); + return err; +} + int mlx4_ib_destroy_cq(struct ib_cq *cq) { struct mlx4_ib_dev *dev = to_mdev(cq->device); @@ -216,8 +434,7 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq) mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db); ib_umem_release(mcq->umem); } else { - mlx4_buf_free(dev->dev, (cq->cqe + 1) * sizeof (struct mlx4_cqe), - &mcq->buf.buf); + mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe + 1); mlx4_ib_db_free(dev, &mcq->db); } @@ -297,6 +514,20 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe, wc->vendor_err = cqe->vendor_err_syndrome; } +static int mlx4_ib_ipoib_csum_ok(__be32 status, __be16 checksum) +{ + return ((status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4 | + MLX4_CQE_IPOIB_STATUS_IPV4F | + MLX4_CQE_IPOIB_STATUS_IPV4OPT | + MLX4_CQE_IPOIB_STATUS_IPV6 | + MLX4_CQE_IPOIB_STATUS_IPOK)) == + cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4 | + MLX4_CQE_IPOIB_STATUS_IPOK)) && + (status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_UDP | + MLX4_CQE_IPOIB_STATUS_TCP)) && + checksum == cpu_to_be16(0xffff); +} + static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_ib_qp **cur_qp, struct ib_wc *wc) @@ -310,6 +541,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, u32 g_mlpath_rqpn; u16 wqe_ctr; +repoll: cqe = next_cqe_sw(cq); if (!cqe) return -EAGAIN; @@ -332,6 +564,22 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, return -EINVAL; } + /* Resize CQ in progress */ + if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) { + if (cq->resize_buf) { + struct mlx4_ib_dev *dev = to_mdev(cq->ibcq.device); + + mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); + cq->buf = cq->resize_buf->buf; + cq->ibcq.cqe = cq->resize_buf->cqe; + + kfree(cq->resize_buf); + cq->resize_buf = NULL; + } + + goto repoll; + } + if (!*cur_qp || (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) { /* @@ -406,6 +654,9 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, case MLX4_OPCODE_BIND_MW: wc->opcode = IB_WC_BIND_MW; break; + case MLX4_OPCODE_LSO: + wc->opcode = IB_WC_LSO; + break; } } else { wc->byte_len = be32_to_cpu(cqe->byte_cnt); @@ -434,6 +685,8 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0; wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; + wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->ipoib_status, + cqe->checksum); } return 0; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 0ed02b7..4c1e72f 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -165,7 +165,7 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad) event.device = ibdev; event.element.port_num = port_num; - if(pinfo->clientrereg_resv_subnetto & 0x80) + if (pinfo->clientrereg_resv_subnetto & 0x80) event.event = IB_EVENT_CLIENT_REREGISTER; else event.event = IB_EVENT_LID_CHANGE; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 96a39b5..136c76c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -44,8 +44,8 @@ #include "user.h" #define DRV_NAME "mlx4_ib" -#define DRV_VERSION "0.01" -#define DRV_RELDATE "May 1, 2006" +#define DRV_VERSION "1.0" +#define DRV_RELDATE "April 4, 2008" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); @@ -99,6 +99,10 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT) props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) + props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; + if (dev->dev->caps.max_gso_sz) + props->device_cap_flags |= IB_DEVICE_UD_TSO; props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; @@ -567,6 +571,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_DEREG_MR) | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | @@ -605,6 +610,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.post_send = mlx4_ib_post_send; ibdev->ib_dev.post_recv = mlx4_ib_post_recv; ibdev->ib_dev.create_cq = mlx4_ib_create_cq; + ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq; + ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq; ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq; ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq; ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; @@ -675,18 +682,20 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) } static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, - enum mlx4_dev_event event, int subtype, - int port) + enum mlx4_dev_event event, int port) { struct ib_event ibev; switch (event) { - case MLX4_EVENT_TYPE_PORT_CHANGE: - ibev.event = subtype == MLX4_PORT_CHANGE_SUBTYPE_ACTIVE ? - IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + case MLX4_DEV_EVENT_PORT_UP: + ibev.event = IB_EVENT_PORT_ACTIVE; break; - case MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR: + case MLX4_DEV_EVENT_PORT_DOWN: + ibev.event = IB_EVENT_PORT_ERR; + break; + + case MLX4_DEV_EVENT_CATASTROPHIC_ERROR: ibev.event = IB_EVENT_DEVICE_FATAL; break; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 3726e45..9e63732 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -78,13 +78,21 @@ struct mlx4_ib_cq_buf { struct mlx4_mtt mtt; }; +struct mlx4_ib_cq_resize { + struct mlx4_ib_cq_buf buf; + int cqe; +}; + struct mlx4_ib_cq { struct ib_cq ibcq; struct mlx4_cq mcq; struct mlx4_ib_cq_buf buf; + struct mlx4_ib_cq_resize *resize_buf; struct mlx4_ib_db db; spinlock_t lock; + struct mutex resize_mutex; struct ib_umem *umem; + struct ib_umem *resize_umem; }; struct mlx4_ib_mr { @@ -110,6 +118,10 @@ struct mlx4_ib_wq { unsigned tail; }; +enum mlx4_ib_qp_flags { + MLX4_IB_QP_LSO = 1 << 0 +}; + struct mlx4_ib_qp { struct ib_qp ibqp; struct mlx4_qp mqp; @@ -129,6 +141,7 @@ struct mlx4_ib_qp { struct mlx4_mtt mtt; int buf_size; struct mutex mutex; + u32 flags; u8 port; u8 alt_port; u8 atomic_rd_en; @@ -249,6 +262,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr); +int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); +int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, struct ib_ucontext *context, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 958e205..b75efae 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -71,6 +71,7 @@ enum { static const __be32 mlx4_ib_opcode[] = { [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND), + [IB_WR_LSO] = __constant_cpu_to_be32(MLX4_OPCODE_LSO), [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM), [IB_WR_RDMA_WRITE] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), [IB_WR_RDMA_WRITE_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), @@ -122,7 +123,7 @@ static void *get_send_wqe(struct mlx4_ib_qp *qp, int n) */ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) { - u32 *wqe; + __be32 *wqe; int i; int s; int ind; @@ -143,7 +144,7 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); for (i = 64; i < s; i += 64) { wqe = buf + i; - *wqe = 0xffffffff; + *wqe = cpu_to_be32(0xffffffff); } } } @@ -242,7 +243,7 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) } } -static int send_wqe_overhead(enum ib_qp_type type) +static int send_wqe_overhead(enum ib_qp_type type, u32 flags) { /* * UD WQEs must have a datagram segment. @@ -253,7 +254,8 @@ static int send_wqe_overhead(enum ib_qp_type type) switch (type) { case IB_QPT_UD: return sizeof (struct mlx4_wqe_ctrl_seg) + - sizeof (struct mlx4_wqe_datagram_seg); + sizeof (struct mlx4_wqe_datagram_seg) + + ((flags & MLX4_IB_QP_LSO) ? 64 : 0); case IB_QPT_UC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_raddr_seg); @@ -315,7 +317,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, /* Sanity check SQ size before proceeding */ if (cap->max_send_wr > dev->dev->caps.max_wqes || cap->max_send_sge > dev->dev->caps.max_sq_sg || - cap->max_inline_data + send_wqe_overhead(type) + + cap->max_inline_data + send_wqe_overhead(type, qp->flags) + sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) return -EINVAL; @@ -329,7 +331,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg), cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) + - send_wqe_overhead(type); + send_wqe_overhead(type, qp->flags); /* * Hermon supports shrinking WQEs, such that a single work @@ -394,7 +396,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, } qp->sq.max_gs = ((qp->sq_max_wqes_per_wr << qp->sq.wqe_shift) - - send_wqe_overhead(type)) / sizeof (struct mlx4_wqe_data_seg); + send_wqe_overhead(type, qp->flags)) / + sizeof (struct mlx4_wqe_data_seg); qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + (qp->sq.wqe_cnt << qp->sq.wqe_shift); @@ -503,6 +506,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } else { qp->sq_no_prefetch = 0; + if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) + qp->flags |= MLX4_IB_QP_LSO; + err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); if (err) goto err; @@ -673,6 +679,13 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp; int err; + /* We only support LSO, and only for kernel UD QPs. */ + if (init_attr->create_flags & ~IB_QP_CREATE_IPOIB_UD_LSO) + return ERR_PTR(-EINVAL); + if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO && + (pd->uobject || init_attr->qp_type != IB_QPT_UD)) + return ERR_PTR(-EINVAL); + switch (init_attr->qp_type) { case IB_QPT_RC: case IB_QPT_UC: @@ -876,10 +889,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || - ibqp->qp_type == IB_QPT_UD) + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; - else if (attr_mask & IB_QP_PATH_MTU) { + else if (ibqp->qp_type == IB_QPT_UD) { + if (qp->flags & MLX4_IB_QP_LSO) + context->mtu_msgmax = (IB_MTU_4096 << 5) | + ilog2(dev->dev->caps.max_gso_sz); + else + context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; + } else if (attr_mask & IB_QP_PATH_MTU) { if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { printk(KERN_ERR "path MTU (%u) is invalid\n", attr->path_mtu); @@ -1182,7 +1200,7 @@ out: } static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, - void *wqe) + void *wqe, unsigned *mlx_seg_len) { struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev; struct mlx4_wqe_mlx_seg *mlx = wqe; @@ -1231,7 +1249,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, case IB_WR_SEND_WITH_IMM: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; sqp->ud_header.immediate_present = 1; - sqp->ud_header.immediate_data = wr->imm_data; + sqp->ud_header.immediate_data = wr->ex.imm_data; break; default: return -EINVAL; @@ -1303,7 +1321,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, i = 2; } - return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); + *mlx_seg_len = + ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); + return 0; } static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq) @@ -1396,6 +1416,34 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) dseg->addr = cpu_to_be64(sg->addr); } +static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr, + struct mlx4_ib_qp *qp, unsigned *lso_seg_len) +{ + unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); + + /* + * This is a temporary limitation and will be removed in + * a forthcoming FW release: + */ + if (unlikely(halign > 64)) + return -EINVAL; + + if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) && + wr->num_sge > qp->sq.max_gs - (halign >> 4))) + return -EINVAL; + + memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); + + /* make sure LSO header is written before overwriting stamping */ + wmb(); + + wqe->mss_hdr_size = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 | + wr->wr.ud.hlen); + + *lso_seg_len = halign; + return 0; +} + int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -1409,6 +1457,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, unsigned ind; int uninitialized_var(stamp); int uninitialized_var(size); + unsigned seglen; int i; spin_lock_irqsave(&qp->sq.lock, flags); @@ -1436,11 +1485,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) | (wr->send_flags & IB_SEND_SOLICITED ? cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) | + ((wr->send_flags & IB_SEND_IP_CSUM) ? + cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM | + MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) | qp->sq_signal_bits; if (wr->opcode == IB_WR_SEND_WITH_IMM || wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) - ctrl->imm = wr->imm_data; + ctrl->imm = wr->ex.imm_data; else ctrl->imm = 0; @@ -1484,19 +1536,27 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, set_datagram_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; + + if (wr->opcode == IB_WR_LSO) { + err = build_lso_seg(wqe, wr, qp, &seglen); + if (unlikely(err)) { + *bad_wr = wr; + goto out; + } + wqe += seglen; + size += seglen / 16; + } break; case IB_QPT_SMI: case IB_QPT_GSI: - err = build_mlx_header(to_msqp(qp), wr, ctrl); - if (err < 0) { + err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); + if (unlikely(err)) { *bad_wr = wr; goto out; } - wqe += err; - size += err / 16; - - err = 0; + wqe += seglen; + size += seglen / 16; break; default: @@ -1725,7 +1785,9 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_qp_context context; int mlx4_state; - int err; + int err = 0; + + mutex_lock(&qp->mutex); if (qp->state == IB_QPS_RESET) { qp_attr->qp_state = IB_QPS_RESET; @@ -1733,12 +1795,15 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr } err = mlx4_qp_query(dev->dev, &qp->mqp, &context); - if (err) - return -EINVAL; + if (err) { + err = -EINVAL; + goto out; + } mlx4_state = be32_to_cpu(context.flags) >> 28; - qp_attr->qp_state = to_ib_qp_state(mlx4_state); + qp->state = to_ib_qp_state(mlx4_state); + qp_attr->qp_state = qp->state; qp_attr->path_mtu = context.mtu_msgmax >> 5; qp_attr->path_mig_state = to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3); @@ -1797,6 +1862,8 @@ done: qp_init_attr->cap = qp_attr->cap; - return 0; +out: + mutex_unlock(&qp->mutex); + return err; } |