diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-06 17:35:43 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-06 17:35:43 -0700 |
commit | 19fd08b85bc7e0502b55cd726f466df82ee7e777 (patch) | |
tree | b042de4b9a8a9478c528ea950b14d34487375695 /drivers/infiniband/hw/mlx5/qp.c | |
parent | 28da7be5ebc096ada5e6bc526c623bdd8c47800a (diff) | |
parent | efc365e7290d040fbd43f60b0e97653489a739d4 (diff) | |
download | op-kernel-dev-19fd08b85bc7e0502b55cd726f466df82ee7e777.zip op-kernel-dev-19fd08b85bc7e0502b55cd726f466df82ee7e777.tar.gz |
Merge tag 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"Doug and I are at a conference next week so if another PR is sent I
expect it to only be bug fixes. Parav noted yesterday that there are
some fringe case behavior changes in his work that he would like to
fix, and I see that Intel has a number of rc looking patches for HFI1
they posted yesterday.
Parav is again the biggest contributor by patch count with his ongoing
work to enable container support in the RDMA stack, followed by Leon
doing syzkaller inspired cleanups, though most of the actual fixing
went to RC.
There is one uncomfortable series here fixing the user ABI to actually
work as intended in 32 bit mode. There are lots of notes in the commit
messages, but the basic summary is we don't think there is an actual
32 bit kernel user of drivers/infiniband for several good reasons.
However we are seeing people want to use a 32 bit user space with 64
bit kernel, which didn't completely work today. So in fixing it we
required a 32 bit rxe user to upgrade their userspace. rxe users are
still already quite rare and we think a 32 bit one is non-existing.
- Fix RDMA uapi headers to actually compile in userspace and be more
complete
- Three shared with netdev pull requests from Mellanox:
* 7 patches, mostly to net with 1 IB related one at the back).
This series addresses an IRQ performance issue (patch 1),
cleanups related to the fix for the IRQ performance problem
(patches 2-6), and then extends the fragmented completion queue
support that already exists in the net side of the driver to the
ib side of the driver (patch 7).
* Mostly IB, with 5 patches to net that are needed to support the
remaining 10 patches to the IB subsystem. This series extends
the current 'representor' framework when the mlx5 driver is in
switchdev mode from being a netdev only construct to being a
netdev/IB dev construct. The IB dev is limited to raw Eth queue
pairs only, but by having an IB dev of this type attached to the
representor for a switchdev port, it enables DPDK to work on the
switchdev device.
* All net related, but needed as infrastructure for the rdma
driver
- Updates for the hns, i40iw, bnxt_re, cxgb3, cxgb4, hns drivers
- SRP performance updates
- IB uverbs write path cleanup patch series from Leon
- Add RDMA_CM support to ib_srpt. This is disabled by default. Users
need to set the port for ib_srpt to listen on in configfs in order
for it to be enabled
(/sys/kernel/config/target/srpt/discovery_auth/rdma_cm_port)
- TSO and Scatter FCS support in mlx4
- Refactor of modify_qp routine to resolve problems seen while
working on new code that is forthcoming
- More refactoring and updates of RDMA CM for containers support from
Parav
- mlx5 'fine grained packet pacing', 'ipsec offload' and 'device
memory' user API features
- Infrastructure updates for the new IOCTL interface, based on
increased usage
- ABI compatibility bug fixes to fully support 32 bit userspace on 64
bit kernel as was originally intended. See the commit messages for
extensive details
- Syzkaller bugs and code cleanups motivated by them"
* tag 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (199 commits)
IB/rxe: Fix for oops in rxe_register_device on ppc64le arch
IB/mlx5: Device memory mr registration support
net/mlx5: Mkey creation command adjustments
IB/mlx5: Device memory support in mlx5_ib
net/mlx5: Query device memory capabilities
IB/uverbs: Add device memory registration ioctl support
IB/uverbs: Add alloc/free dm uverbs ioctl support
IB/uverbs: Add device memory capabilities reporting
IB/uverbs: Expose device memory capabilities to user
RDMA/qedr: Fix wmb usage in qedr
IB/rxe: Removed GID add/del dummy routines
RDMA/qedr: Zero stack memory before copying to user space
IB/mlx5: Add ability to hash by IPSEC_SPI when creating a TIR
IB/mlx5: Add information for querying IPsec capabilities
IB/mlx5: Add IPsec support for egress and ingress
{net,IB}/mlx5: Add ipsec helper
IB/mlx5: Add modify_flow_action_esp verb
IB/mlx5: Add implementation for create and destroy action_xfrm
IB/uverbs: Introduce ESP steering match filter
IB/uverbs: Add modify ESP flow_action
...
Diffstat (limited to 'drivers/infiniband/hw/mlx5/qp.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 156 |
1 files changed, 115 insertions, 41 deletions
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0d0b0b8..7ed4b70 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -86,7 +86,9 @@ struct mlx5_modify_raw_qp_param { u16 operation; u32 set_mask; /* raw_qp_set_mask_map */ - u32 rate_limit; + + struct mlx5_rate_limit rl; + u8 rq_q_ctr_id; }; @@ -878,7 +880,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err_free; } - err = ib_copy_to_udata(udata, resp, sizeof(*resp)); + err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp))); if (err) { mlx5_ib_dbg(dev, "copy failed\n"); goto err_unmap; @@ -1411,6 +1413,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, void *tirc; void *hfso; u32 selected_fields = 0; + u32 outer_l4; size_t min_resp_len; u32 tdn = mucontext->tdn; struct mlx5_ib_create_qp_rss ucmd = {}; @@ -1466,7 +1469,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EOPNOTSUPP; } - err = ib_copy_to_udata(udata, &resp, min_resp_len); + err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (err) { mlx5_ib_dbg(dev, "copy failed\n"); return -EINVAL; @@ -1541,10 +1544,14 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); - if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) && - ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))) { + outer_l4 = ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) << 0 | + ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) << 1 | + (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2; + + /* Check that only one l4 protocol is set */ + if (outer_l4 & (outer_l4 - 1)) { err = -EINVAL; goto err; } @@ -1575,6 +1582,9 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT; + if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) + selected_fields |= MLX5_HASH_FIELD_SEL_IPSEC_SPI; + MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields); create_tir: @@ -2774,8 +2784,9 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, const struct mlx5_modify_raw_qp_param *raw_qp_param) { struct mlx5_ib_qp *ibqp = sq->base.container_mibqp; - u32 old_rate = ibqp->rate_limit; - u32 new_rate = old_rate; + struct mlx5_rate_limit old_rl = ibqp->rl; + struct mlx5_rate_limit new_rl = old_rl; + bool new_rate_added = false; u16 rl_index = 0; void *in; void *sqc; @@ -2797,39 +2808,43 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n", __func__); else - new_rate = raw_qp_param->rate_limit; + new_rl = raw_qp_param->rl; } - if (old_rate != new_rate) { - if (new_rate) { - err = mlx5_rl_add_rate(dev, new_rate, &rl_index); + if (!mlx5_rl_are_equal(&old_rl, &new_rl)) { + if (new_rl.rate) { + err = mlx5_rl_add_rate(dev, &rl_index, &new_rl); if (err) { - pr_err("Failed configuring rate %u: %d\n", - new_rate, err); + pr_err("Failed configuring rate limit(err %d): \ + rate %u, max_burst_sz %u, typical_pkt_sz %u\n", + err, new_rl.rate, new_rl.max_burst_sz, + new_rl.typical_pkt_sz); + goto out; } + new_rate_added = true; } MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); + /* index 0 means no limit */ MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); } err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen); if (err) { /* Remove new rate from table if failed */ - if (new_rate && - old_rate != new_rate) - mlx5_rl_remove_rate(dev, new_rate); + if (new_rate_added) + mlx5_rl_remove_rate(dev, &new_rl); goto out; } /* Only remove the old rate after new rate was set */ - if ((old_rate && - (old_rate != new_rate)) || + if ((old_rl.rate && + !mlx5_rl_are_equal(&old_rl, &new_rl)) || (new_state != MLX5_SQC_STATE_RDY)) - mlx5_rl_remove_rate(dev, old_rate); + mlx5_rl_remove_rate(dev, &old_rl); - ibqp->rate_limit = new_rate; + ibqp->rl = new_rl; sq->state = new_state; out: @@ -2906,7 +2921,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, - enum ib_qp_state cur_state, enum ib_qp_state new_state) + enum ib_qp_state cur_state, enum ib_qp_state new_state, + const struct mlx5_ib_modify_qp *ucmd) { static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = { [MLX5_QP_STATE_RST] = { @@ -2959,18 +2975,16 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, u16 op; u8 tx_affinity = 0; + mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ? + qp->qp_sub_type : ibqp->qp_type); + if (mlx5_st < 0) + return -EINVAL; + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return -ENOMEM; - err = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ? - qp->qp_sub_type : ibqp->qp_type); - if (err < 0) { - mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type); - goto out; - } - - context->flags = cpu_to_be32(err << 16); + context->flags = cpu_to_be32(mlx5_st << 16); if (!(attr_mask & IB_QP_PATH_MIG_STATE)) { context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); @@ -3124,10 +3138,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, mlx5_cur = to_mlx5_state(cur_state); mlx5_new = to_mlx5_state(new_state); - mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ? - qp->qp_sub_type : ibqp->qp_type); - if (mlx5_st < 0) - goto out; if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE || !optab[mlx5_cur][mlx5_new]) { @@ -3150,7 +3160,30 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_RATE_LIMIT) { - raw_qp_param.rate_limit = attr->rate_limit; + raw_qp_param.rl.rate = attr->rate_limit; + + if (ucmd->burst_info.max_burst_sz) { + if (attr->rate_limit && + MLX5_CAP_QOS(dev->mdev, packet_pacing_burst_bound)) { + raw_qp_param.rl.max_burst_sz = + ucmd->burst_info.max_burst_sz; + } else { + err = -EINVAL; + goto out; + } + } + + if (ucmd->burst_info.typical_pkt_sz) { + if (attr->rate_limit && + MLX5_CAP_QOS(dev->mdev, packet_pacing_typical_size)) { + raw_qp_param.rl.typical_pkt_sz = + ucmd->burst_info.typical_pkt_sz; + } else { + err = -EINVAL; + goto out; + } + } + raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT; } @@ -3178,7 +3211,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, * If we moved a kernel QP to RESET, clean up all old CQ * entries and reinitialize the QP. */ - if (new_state == IB_QPS_RESET && !ibqp->uobject) { + if (new_state == IB_QPS_RESET && + !ibqp->uobject && ibqp->qp_type != IB_QPT_XRC_TGT) { mlx5_ib_cq_clean(recv_cq, base->mqp.qpn, ibqp->srq ? to_msrq(ibqp->srq) : NULL); if (send_cq != recv_cq) @@ -3337,8 +3371,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_modify_qp ucmd = {}; enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; + size_t required_cmd_sz; int err = -EINVAL; int port; enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; @@ -3346,6 +3382,28 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (ibqp->rwq_ind_tbl) return -ENOSYS; + if (udata && udata->inlen) { + required_cmd_sz = offsetof(typeof(ucmd), reserved) + + sizeof(ucmd.reserved); + if (udata->inlen < required_cmd_sz) + return -EINVAL; + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) + return -EOPNOTSUPP; + + if (ib_copy_from_udata(&ucmd, udata, + min(udata->inlen, sizeof(ucmd)))) + return -EFAULT; + + if (ucmd.comp_mask || + memchr_inv(&ucmd.reserved, 0, sizeof(ucmd.reserved)) || + memchr_inv(&ucmd.burst_info.reserved, 0, + sizeof(ucmd.burst_info.reserved))) + return -EOPNOTSUPP; + } + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); @@ -3426,7 +3484,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, + new_state, &ucmd); out: mutex_unlock(&qp->mutex); @@ -3646,8 +3705,19 @@ static __be64 get_umr_update_pd_mask(void) return cpu_to_be64(result); } -static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, - struct ib_send_wr *wr, int atomic) +static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) +{ + if ((mask & MLX5_MKEY_MASK_PAGE_SIZE && + MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) || + (mask & MLX5_MKEY_MASK_A && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))) + return -EPERM; + return 0; +} + +static int set_reg_umr_segment(struct mlx5_ib_dev *dev, + struct mlx5_wqe_umr_ctrl_seg *umr, + struct ib_send_wr *wr, int atomic) { struct mlx5_umr_wr *umrwr = umr_wr(wr); @@ -3679,6 +3749,8 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, if (!wr->num_sge) umr->flags |= MLX5_UMR_INLINE; + + return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask)); } static u8 get_umr_flags(int acc) @@ -4501,7 +4573,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey); - set_reg_umr_segment(seg, wr, !!(MLX5_CAP_GEN(mdev, atomic))); + err = set_reg_umr_segment(dev, seg, wr, !!(MLX5_CAP_GEN(mdev, atomic))); + if (unlikely(err)) + goto out; seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((seg == qend))) |