diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-24 16:49:49 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-24 16:49:49 -0700 |
commit | e0456717e483bb8a9431b80a5bdc99a928b9b003 (patch) | |
tree | 5eb5add2bafd1f20326d70f5cb3b711d00a40b10 /drivers/infiniband/hw/mlx5/main.c | |
parent | 98ec21a01896751b673b6c731ca8881daa8b2c6d (diff) | |
parent | 1ea2d020ba477cb7011a7174e8501a9e04a325d4 (diff) | |
download | op-kernel-dev-e0456717e483bb8a9431b80a5bdc99a928b9b003.zip op-kernel-dev-e0456717e483bb8a9431b80a5bdc99a928b9b003.tar.gz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) Add TX fast path in mac80211, from Johannes Berg.
2) Add TSO/GRO support to ibmveth, from Thomas Falcon
3) Move away from cached routes in ipv6, just like ipv4, from Martin
KaFai Lau.
4) Lots of new rhashtable tests, from Thomas Graf.
5) Run ingress qdisc lockless, from Alexei Starovoitov.
6) Allow servers to fetch TCP packet headers for SYN packets of new
connections, for fingerprinting. From Eric Dumazet.
7) Add mode parameter to pktgen, for testing receive. From Alexei
Starovoitov.
8) Cache access optimizations via simplifications of build_skb(), from
Alexander Duyck.
9) Move page frag allocator under mm/, also from Alexander.
10) Add xmit_more support to hv_netvsc, from KY Srinivasan.
11) Add a counter guard in case we try to perform endless reclassify
loops in the packet scheduler.
12) Extern flow dissector to be programmable and use it in new "Flower"
classifier. From Jiri Pirko.
13) AF_PACKET fanout rollover fixes, performance improvements, and new
statistics. From Willem de Bruijn.
14) Add netdev driver for GENEVE tunnels, from John W Linville.
15) Add ingress netfilter hooks and filtering, from Pablo Neira Ayuso.
16) Fix handling of epoll edge triggers in TCP, from Eric Dumazet.
17) Add an ECN retry fallback for the initial TCP handshake, from Daniel
Borkmann.
18) Add tail call support to BPF, from Alexei Starovoitov.
19) Add several pktgen helper scripts, from Jesper Dangaard Brouer.
20) Add zerocopy support to AF_UNIX, from Hannes Frederic Sowa.
21) Favor even port numbers for allocation to connect() requests, and
odd port numbers for bind(0), in an effort to help avoid
ip_local_port_range exhaustion. From Eric Dumazet.
22) Add Cavium ThunderX driver, from Sunil Goutham.
23) Allow bpf programs to access skb_iif and dev->ifindex SKB metadata,
from Alexei Starovoitov.
24) Add support for T6 chips in cxgb4vf driver, from Hariprasad Shenai.
25) Double TCP Small Queues default to 256K to accomodate situations
like the XEN driver and wireless aggregation. From Wei Liu.
26) Add more entropy inputs to flow dissector, from Tom Herbert.
27) Add CDG congestion control algorithm to TCP, from Kenneth Klette
Jonassen.
28) Convert ipset over to RCU locking, from Jozsef Kadlecsik.
29) Track and act upon link status of ipv4 route nexthops, from Andy
Gospodarek.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1670 commits)
bridge: vlan: flush the dynamically learned entries on port vlan delete
bridge: multicast: add a comment to br_port_state_selection about blocking state
net: inet_diag: export IPV6_V6ONLY sockopt
stmmac: troubleshoot unexpected bits in des0 & des1
net: ipv4 sysctl option to ignore routes when nexthop link is down
net: track link-status of ipv4 nexthops
net: switchdev: ignore unsupported bridge flags
net: Cavium: Fix MAC address setting in shutdown state
drivers: net: xgene: fix for ACPI support without ACPI
ip: report the original address of ICMP messages
net/mlx5e: Prefetch skb data on RX
net/mlx5e: Pop cq outside mlx5e_get_cqe
net/mlx5e: Remove mlx5e_cq.sqrq back-pointer
net/mlx5e: Remove extra spaces
net/mlx5e: Avoid TX CQE generation if more xmit packets expected
net/mlx5e: Avoid redundant dev_kfree_skb() upon NOP completion
net/mlx5e: Remove re-assignment of wq type in mlx5e_enable_rq()
net/mlx5e: Use skb_shinfo(skb)->gso_segs rather than counting them
net/mlx5e: Static mapping of netdev priv resources to/from netdev TX queues
net/mlx4_en: Use HW counters for rx/tx bytes/packets in PF device
...
Diffstat (limited to 'drivers/infiniband/hw/mlx5/main.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 646 |
1 files changed, 417 insertions, 229 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c6cb26e..085c24b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -40,6 +40,7 @@ #include <linux/io-mapping.h> #include <linux/sched.h> #include <rdma/ib_user_verbs.h> +#include <linux/mlx5/vport.h> #include <rdma/ib_smi.h> #include <rdma/ib_umem.h> #include "user.h" @@ -62,36 +63,172 @@ static char mlx5_version[] = DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; +static enum rdma_link_layer +mlx5_ib_port_link_layer(struct ib_device *device) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + + switch (MLX5_CAP_GEN(dev->mdev, port_type)) { + case MLX5_CAP_PORT_TYPE_IB: + return IB_LINK_LAYER_INFINIBAND; + case MLX5_CAP_PORT_TYPE_ETH: + return IB_LINK_LAYER_ETHERNET; + default: + return IB_LINK_LAYER_UNSPECIFIED; + } +} + +static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) +{ + return !dev->mdev->issi; +} + +enum { + MLX5_VPORT_ACCESS_METHOD_MAD, + MLX5_VPORT_ACCESS_METHOD_HCA, + MLX5_VPORT_ACCESS_METHOD_NIC, +}; + +static int mlx5_get_vport_access_method(struct ib_device *ibdev) +{ + if (mlx5_use_mad_ifc(to_mdev(ibdev))) + return MLX5_VPORT_ACCESS_METHOD_MAD; + + if (mlx5_ib_port_link_layer(ibdev) == + IB_LINK_LAYER_ETHERNET) + return MLX5_VPORT_ACCESS_METHOD_NIC; + + return MLX5_VPORT_ACCESS_METHOD_HCA; +} + +static int mlx5_query_system_image_guid(struct ib_device *ibdev, + __be64 *sys_image_guid) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; + u64 tmp; + int err; + + switch (mlx5_get_vport_access_method(ibdev)) { + case MLX5_VPORT_ACCESS_METHOD_MAD: + return mlx5_query_mad_ifc_system_image_guid(ibdev, + sys_image_guid); + + case MLX5_VPORT_ACCESS_METHOD_HCA: + err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); + if (!err) + *sys_image_guid = cpu_to_be64(tmp); + return err; + + default: + return -EINVAL; + } +} + +static int mlx5_query_max_pkeys(struct ib_device *ibdev, + u16 *max_pkeys) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; + + switch (mlx5_get_vport_access_method(ibdev)) { + case MLX5_VPORT_ACCESS_METHOD_MAD: + return mlx5_query_mad_ifc_max_pkeys(ibdev, max_pkeys); + + case MLX5_VPORT_ACCESS_METHOD_HCA: + case MLX5_VPORT_ACCESS_METHOD_NIC: + *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, + pkey_table_size)); + return 0; + + default: + return -EINVAL; + } +} + +static int mlx5_query_vendor_id(struct ib_device *ibdev, + u32 *vendor_id) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + + switch (mlx5_get_vport_access_method(ibdev)) { + case MLX5_VPORT_ACCESS_METHOD_MAD: + return mlx5_query_mad_ifc_vendor_id(ibdev, vendor_id); + + case MLX5_VPORT_ACCESS_METHOD_HCA: + case MLX5_VPORT_ACCESS_METHOD_NIC: + return mlx5_core_query_vendor_id(dev->mdev, vendor_id); + + default: + return -EINVAL; + } +} + +static int mlx5_query_node_guid(struct mlx5_ib_dev *dev, + __be64 *node_guid) +{ + u64 tmp; + int err; + + switch (mlx5_get_vport_access_method(&dev->ib_dev)) { + case MLX5_VPORT_ACCESS_METHOD_MAD: + return mlx5_query_mad_ifc_node_guid(dev, node_guid); + + case MLX5_VPORT_ACCESS_METHOD_HCA: + err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp); + if (!err) + *node_guid = cpu_to_be64(tmp); + return err; + + default: + return -EINVAL; + } +} + +struct mlx5_reg_node_desc { + u8 desc[64]; +}; + +static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc) +{ + struct mlx5_reg_node_desc in; + + if (mlx5_use_mad_ifc(dev)) + return mlx5_query_mad_ifc_node_desc(dev, node_desc); + + memset(&in, 0, sizeof(in)); + + return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc, + sizeof(struct mlx5_reg_node_desc), + MLX5_REG_NODE_DESC, 0, 0); +} + static int mlx5_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - struct mlx5_general_caps *gen; + struct mlx5_core_dev *mdev = dev->mdev; int err = -ENOMEM; int max_rq_sg; int max_sq_sg; - u64 flags; if (uhw->inlen || uhw->outlen) return -EINVAL; - gen = &dev->mdev->caps.gen; - in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); - out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); - if (!in_mad || !out_mad) - goto out; - - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + memset(props, 0, sizeof(*props)); + err = mlx5_query_system_image_guid(ibdev, + &props->sys_image_guid); + if (err) + return err; - err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad); + err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys); if (err) - goto out; + return err; - memset(props, 0, sizeof(*props)); + err = mlx5_query_vendor_id(ibdev, &props->vendor_id); + if (err) + return err; props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) | (fw_rev_min(dev->mdev) << 16) | @@ -100,18 +237,18 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN; - flags = gen->flags; - if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR) + + if (MLX5_CAP_GEN(mdev, pkv)) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; - if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR) + if (MLX5_CAP_GEN(mdev, qkv)) props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; - if (flags & MLX5_DEV_CAP_FLAG_APM) + if (MLX5_CAP_GEN(mdev, apm)) props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; - if (flags & MLX5_DEV_CAP_FLAG_XRC) + if (MLX5_CAP_GEN(mdev, xrc)) props->device_cap_flags |= IB_DEVICE_XRC; props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; - if (flags & MLX5_DEV_CAP_FLAG_SIG_HAND_OVER) { + if (MLX5_CAP_GEN(mdev, sho)) { props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; /* At this stage no support for signature handover */ props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | @@ -120,221 +257,270 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->sig_guard_cap = IB_GUARD_T10DIF_CRC | IB_GUARD_T10DIF_CSUM; } - if (flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST) + if (MLX5_CAP_GEN(mdev, block_lb_mc)) props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; - props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) & - 0xffffff; - props->vendor_part_id = be16_to_cpup((__be16 *)(out_mad->data + 30)); - props->hw_ver = be32_to_cpup((__be32 *)(out_mad->data + 32)); - memcpy(&props->sys_image_guid, out_mad->data + 4, 8); + props->vendor_part_id = mdev->pdev->device; + props->hw_ver = mdev->pdev->revision; props->max_mr_size = ~0ull; - props->page_size_cap = gen->min_page_sz; - props->max_qp = 1 << gen->log_max_qp; - props->max_qp_wr = gen->max_wqes; - max_rq_sg = gen->max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg); - max_sq_sg = (gen->max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) / - sizeof(struct mlx5_wqe_data_seg); + props->page_size_cap = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); + props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp); + props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); + max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / + sizeof(struct mlx5_wqe_data_seg); + max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) - + sizeof(struct mlx5_wqe_ctrl_seg)) / + sizeof(struct mlx5_wqe_data_seg); props->max_sge = min(max_rq_sg, max_sq_sg); - props->max_cq = 1 << gen->log_max_cq; - props->max_cqe = gen->max_cqes - 1; - props->max_mr = 1 << gen->log_max_mkey; - props->max_pd = 1 << gen->log_max_pd; - props->max_qp_rd_atom = 1 << gen->log_max_ra_req_qp; - props->max_qp_init_rd_atom = 1 << gen->log_max_ra_res_qp; - props->max_srq = 1 << gen->log_max_srq; - props->max_srq_wr = gen->max_srq_wqes - 1; - props->local_ca_ack_delay = gen->local_ca_ack_delay; + props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); + props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1; + props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); + props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd); + props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp); + props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp); + props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq); + props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1; + props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq_sge = max_rq_sg - 1; props->max_fast_reg_page_list_len = (unsigned int)-1; - props->local_ca_ack_delay = gen->local_ca_ack_delay; props->atomic_cap = IB_ATOMIC_NONE; props->masked_atomic_cap = IB_ATOMIC_NONE; - props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28)); - props->max_mcast_grp = 1 << gen->log_max_mcg; - props->max_mcast_qp_attach = gen->max_qp_mcg; + props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); + props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG) + if (MLX5_CAP_GEN(mdev, pg)) props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; props->odp_caps = dev->odp_caps; #endif -out: - kfree(in_mad); - kfree(out_mad); - - return err; + return 0; } -int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props) +enum mlx5_ib_width { + MLX5_IB_WIDTH_1X = 1 << 0, + MLX5_IB_WIDTH_2X = 1 << 1, + MLX5_IB_WIDTH_4X = 1 << 2, + MLX5_IB_WIDTH_8X = 1 << 3, + MLX5_IB_WIDTH_12X = 1 << 4 +}; + +static int translate_active_width(struct ib_device *ibdev, u8 active_width, + u8 *ib_width) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - struct mlx5_general_caps *gen; - int ext_active_speed; - int err = -ENOMEM; - - gen = &dev->mdev->caps.gen; - if (port < 1 || port > gen->num_ports) { - mlx5_ib_warn(dev, "invalid port number %d\n", port); - return -EINVAL; + int err = 0; + + if (active_width & MLX5_IB_WIDTH_1X) { + *ib_width = IB_WIDTH_1X; + } else if (active_width & MLX5_IB_WIDTH_2X) { + mlx5_ib_dbg(dev, "active_width %d is not supported by IB spec\n", + (int)active_width); + err = -EINVAL; + } else if (active_width & MLX5_IB_WIDTH_4X) { + *ib_width = IB_WIDTH_4X; + } else if (active_width & MLX5_IB_WIDTH_8X) { + *ib_width = IB_WIDTH_8X; + } else if (active_width & MLX5_IB_WIDTH_12X) { + *ib_width = IB_WIDTH_12X; + } else { + mlx5_ib_dbg(dev, "Invalid active_width %d\n", + (int)active_width); + err = -EINVAL; } - in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); - out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); - if (!in_mad || !out_mad) - goto out; - - memset(props, 0, sizeof(*props)); - - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; - in_mad->attr_mod = cpu_to_be32(port); + return err; +} - err = mlx5_MAD_IFC(dev, 1, 1, port, NULL, NULL, in_mad, out_mad); - if (err) { - mlx5_ib_warn(dev, "err %d\n", err); - goto out; +static int mlx5_mtu_to_ib_mtu(int mtu) +{ + switch (mtu) { + case 256: return 1; + case 512: return 2; + case 1024: return 3; + case 2048: return 4; + case 4096: return 5; + default: + pr_warn("invalid mtu\n"); + return -1; } +} +enum ib_max_vl_num { + __IB_MAX_VL_0 = 1, + __IB_MAX_VL_0_1 = 2, + __IB_MAX_VL_0_3 = 3, + __IB_MAX_VL_0_7 = 4, + __IB_MAX_VL_0_14 = 5, +}; - props->lid = be16_to_cpup((__be16 *)(out_mad->data + 16)); - props->lmc = out_mad->data[34] & 0x7; - props->sm_lid = be16_to_cpup((__be16 *)(out_mad->data + 18)); - props->sm_sl = out_mad->data[36] & 0xf; - props->state = out_mad->data[32] & 0xf; - props->phys_state = out_mad->data[33] >> 4; - props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20)); - props->gid_tbl_len = out_mad->data[50]; - props->max_msg_sz = 1 << gen->log_max_msg; - props->pkey_tbl_len = gen->port[port - 1].pkey_table_len; - props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46)); - props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48)); - props->active_width = out_mad->data[31] & 0xf; - props->active_speed = out_mad->data[35] >> 4; - props->max_mtu = out_mad->data[41] & 0xf; - props->active_mtu = out_mad->data[36] >> 4; - props->subnet_timeout = out_mad->data[51] & 0x1f; - props->max_vl_num = out_mad->data[37] >> 4; - props->init_type_reply = out_mad->data[41] >> 4; - - /* Check if extended speeds (EDR/FDR/...) are supported */ - if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { - ext_active_speed = out_mad->data[62] >> 4; - - switch (ext_active_speed) { - case 1: - props->active_speed = 16; /* FDR */ - break; - case 2: - props->active_speed = 32; /* EDR */ - break; - } - } +enum mlx5_vl_hw_cap { + MLX5_VL_HW_0 = 1, + MLX5_VL_HW_0_1 = 2, + MLX5_VL_HW_0_2 = 3, + MLX5_VL_HW_0_3 = 4, + MLX5_VL_HW_0_4 = 5, + MLX5_VL_HW_0_5 = 6, + MLX5_VL_HW_0_6 = 7, + MLX5_VL_HW_0_7 = 8, + MLX5_VL_HW_0_14 = 15 +}; - /* If reported active speed is QDR, check if is FDR-10 */ - if (props->active_speed == 4) { - if (gen->ext_port_cap[port - 1] & - MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { - init_query_mad(in_mad); - in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; - in_mad->attr_mod = cpu_to_be32(port); - - err = mlx5_MAD_IFC(dev, 1, 1, port, - NULL, NULL, in_mad, out_mad); - if (err) - goto out; - - /* Checking LinkSpeedActive for FDR-10 */ - if (out_mad->data[15] & 0x1) - props->active_speed = 8; - } - } +static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap, + u8 *max_vl_num) +{ + switch (vl_hw_cap) { + case MLX5_VL_HW_0: + *max_vl_num = __IB_MAX_VL_0; + break; + case MLX5_VL_HW_0_1: + *max_vl_num = __IB_MAX_VL_0_1; + break; + case MLX5_VL_HW_0_3: + *max_vl_num = __IB_MAX_VL_0_3; + break; + case MLX5_VL_HW_0_7: + *max_vl_num = __IB_MAX_VL_0_7; + break; + case MLX5_VL_HW_0_14: + *max_vl_num = __IB_MAX_VL_0_14; + break; -out: - kfree(in_mad); - kfree(out_mad); + default: + return -EINVAL; + } - return err; + return 0; } -static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, - union ib_gid *gid) +static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_hca_vport_context *rep; + int max_mtu; + int oper_mtu; + int err; + u8 ib_link_width_oper; + u8 vl_hw_cap; - in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); - out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); - if (!in_mad || !out_mad) + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) { + err = -ENOMEM; goto out; + } - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; - in_mad->attr_mod = cpu_to_be32(port); + memset(props, 0, sizeof(*props)); - err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep); if (err) goto out; - memcpy(gid->raw, out_mad->data + 8, 8); - - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; - in_mad->attr_mod = cpu_to_be32(index / 8); + props->lid = rep->lid; + props->lmc = rep->lmc; + props->sm_lid = rep->sm_lid; + props->sm_sl = rep->sm_sl; + props->state = rep->vport_state; + props->phys_state = rep->port_physical_state; + props->port_cap_flags = rep->cap_mask1; + props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size)); + props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); + props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size)); + props->bad_pkey_cntr = rep->pkey_violation_counter; + props->qkey_viol_cntr = rep->qkey_violation_counter; + props->subnet_timeout = rep->subnet_timeout; + props->init_type_reply = rep->init_type_reply; + + err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); + if (err) + goto out; - err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + err = translate_active_width(ibdev, ib_link_width_oper, + &props->active_width); + if (err) + goto out; + err = mlx5_query_port_proto_oper(mdev, &props->active_speed, MLX5_PTYS_IB, + port); if (err) goto out; - memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); + mlx5_query_port_max_mtu(mdev, &max_mtu, port); + + props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu); + + mlx5_query_port_oper_mtu(mdev, &oper_mtu, port); + + props->active_mtu = mlx5_mtu_to_ib_mtu(oper_mtu); + err = mlx5_query_port_vl_hw_cap(mdev, &vl_hw_cap, port); + if (err) + goto out; + + err = translate_max_vl_num(ibdev, vl_hw_cap, + &props->max_vl_num); out: - kfree(in_mad); - kfree(out_mad); + kfree(rep); return err; } -static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey) +int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; + switch (mlx5_get_vport_access_method(ibdev)) { + case MLX5_VPORT_ACCESS_METHOD_MAD: + return mlx5_query_mad_ifc_port(ibdev, port, props); - in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); - out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); - if (!in_mad || !out_mad) - goto out; + case MLX5_VPORT_ACCESS_METHOD_HCA: + return mlx5_query_hca_port(ibdev, port, props); + + default: + return -EINVAL; + } +} - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; - in_mad->attr_mod = cpu_to_be32(index / 32); +static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; - err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); - if (err) - goto out; + switch (mlx5_get_vport_access_method(ibdev)) { + case MLX5_VPORT_ACCESS_METHOD_MAD: + return mlx5_query_mad_ifc_gids(ibdev, port, index, gid); - *pkey = be16_to_cpu(((__be16 *)out_mad->data)[index % 32]); + case MLX5_VPORT_ACCESS_METHOD_HCA: + return mlx5_query_hca_vport_gid(mdev, 0, port, 0, index, gid); + + default: + return -EINVAL; + } -out: - kfree(in_mad); - kfree(out_mad); - return err; } -struct mlx5_reg_node_desc { - u8 desc[64]; -}; +static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_core_dev *mdev = dev->mdev; + + switch (mlx5_get_vport_access_method(ibdev)) { + case MLX5_VPORT_ACCESS_METHOD_MAD: + return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey); + + case MLX5_VPORT_ACCESS_METHOD_HCA: + case MLX5_VPORT_ACCESS_METHOD_NIC: + return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index, + pkey); + default: + return -EINVAL; + } +} static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props) @@ -396,7 +582,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct mlx5_ib_alloc_ucontext_req_v2 req; struct mlx5_ib_alloc_ucontext_resp resp; struct mlx5_ib_ucontext *context; - struct mlx5_general_caps *gen; struct mlx5_uuar_info *uuari; struct mlx5_uar *uars; int gross_uuars; @@ -407,7 +592,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, int i; size_t reqlen; - gen = &dev->mdev->caps.gen; if (!dev->ib_active) return ERR_PTR(-EAGAIN); @@ -440,14 +624,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; - resp.qp_tab_size = 1 << gen->log_max_qp; - resp.bf_reg_size = gen->bf_reg_size; - resp.cache_line_size = L1_CACHE_BYTES; - resp.max_sq_desc_sz = gen->max_sq_desc_sz; - resp.max_rq_desc_sz = gen->max_rq_desc_sz; - resp.max_send_wqebb = gen->max_wqes; - resp.max_recv_wr = gen->max_wqes; - resp.max_srq_recv_wr = gen->max_srq_wqes; + resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); + resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); + resp.cache_line_size = L1_CACHE_BYTES; + resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); + resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); + resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) @@ -497,7 +681,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, mutex_init(&context->db_page_mutex); resp.tot_uuars = req.total_num_uuars; - resp.num_ports = gen->num_ports; + resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports); err = ib_copy_to_udata(udata, &resp, sizeof(resp) - sizeof(resp.reserved)); if (err) @@ -735,37 +919,15 @@ static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) static int init_node_data(struct mlx5_ib_dev *dev) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; - - in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); - out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); - if (!in_mad || !out_mad) - goto out; - - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; - - err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); - if (err) - goto out; - - memcpy(dev->ib_dev.node_desc, out_mad->data, 64); - - in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + int err; - err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc); if (err) - goto out; + return err; - dev->mdev->rev_id = be32_to_cpup((__be32 *)(out_mad->data + 32)); - memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); + dev->mdev->rev_id = dev->mdev->pdev->revision; -out: - kfree(in_mad); - kfree(out_mad); - return err; + return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid); } static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr, @@ -899,11 +1061,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, static void get_ext_port_caps(struct mlx5_ib_dev *dev) { - struct mlx5_general_caps *gen; int port; - gen = &dev->mdev->caps.gen; - for (port = 1; port <= gen->num_ports; port++) + for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) mlx5_query_ext_port_caps(dev, port); } @@ -911,12 +1071,10 @@ static int get_port_caps(struct mlx5_ib_dev *dev) { struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; - struct mlx5_general_caps *gen; int err = -ENOMEM; int port; struct ib_udata uhw = {.inlen = 0, .outlen = 0}; - gen = &dev->mdev->caps.gen; pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); if (!pprops) goto out; @@ -931,14 +1089,17 @@ static int get_port_caps(struct mlx5_ib_dev *dev) goto out; } - for (port = 1; port <= gen->num_ports; port++) { + for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) { err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); if (err) { - mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err); + mlx5_ib_warn(dev, "query_port %d failed %d\n", + port, err); break; } - gen->port[port - 1].pkey_table_len = dprops->max_pkeys; - gen->port[port - 1].gid_table_len = pprops->gid_tbl_len; + dev->mdev->port_caps[port - 1].pkey_table_len = + dprops->max_pkeys; + dev->mdev->port_caps[port - 1].gid_table_len = + pprops->gid_tbl_len; mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", dprops->max_pkeys, pprops->gid_tbl_len); } @@ -1167,8 +1328,29 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s0->usecnt, 0); + memset(&attr, 0, sizeof(attr)); + attr.attr.max_sge = 1; + attr.attr.max_wr = 1; + attr.srq_type = IB_SRQT_BASIC; + devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL); + if (IS_ERR(devr->s1)) { + ret = PTR_ERR(devr->s1); + goto error5; + } + devr->s1->device = &dev->ib_dev; + devr->s1->pd = devr->p0; + devr->s1->uobject = NULL; + devr->s1->event_handler = NULL; + devr->s1->srq_context = NULL; + devr->s1->srq_type = IB_SRQT_BASIC; + devr->s1->ext.xrc.cq = devr->c0; + atomic_inc(&devr->p0->usecnt); + atomic_set(&devr->s0->usecnt, 0); + return 0; +error5: + mlx5_ib_destroy_srq(devr->s0); error4: mlx5_ib_dealloc_xrcd(devr->x1); error3: @@ -1183,6 +1365,7 @@ error0: static void destroy_dev_resources(struct mlx5_ib_resources *devr) { + mlx5_ib_destroy_srq(devr->s1); mlx5_ib_destroy_srq(devr->s0); mlx5_ib_dealloc_xrcd(devr->x0); mlx5_ib_dealloc_xrcd(devr->x1); @@ -1214,6 +1397,10 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) int err; int i; + /* don't create IB instance over Eth ports, no RoCE yet! */ + if (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) + return NULL; + printk_once(KERN_INFO "%s", mlx5_version); dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); @@ -1226,15 +1413,16 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (err) goto err_dealloc; - get_ext_port_caps(dev); + if (mlx5_use_mad_ifc(dev)) + get_ext_port_caps(dev); MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock); strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.node_type = RDMA_NODE_IB_CA; - dev->ib_dev.local_dma_lkey = mdev->caps.gen.reserved_lkey; - dev->num_ports = mdev->caps.gen.num_ports; + dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; + dev->num_ports = MLX5_CAP_GEN(mdev, num_ports); dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = dev->mdev->priv.eq_table.num_comp_vectors; @@ -1313,9 +1501,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; dev->ib_dev.get_port_immutable = mlx5_port_immutable; - mlx5_ib_internal_query_odp_caps(dev); + mlx5_ib_internal_fill_odp_caps(dev); - if (mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_XRC) { + if (MLX5_CAP_GEN(mdev, xrc)) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; dev->ib_dev.uverbs_cmd_mask |= |