diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-13 12:57:21 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-13 12:57:21 -0700 |
commit | c55244137306b626bc64023fd7160985443205a7 (patch) | |
tree | 459acfb5c9b41e3e1616fb36aafda68a07ddbf54 | |
parent | 858655116bfc722837e3aec0909b8e9d08f96996 (diff) | |
parent | e04abfa2436e3ab016b23eb1afb2c5578b8dc2cf (diff) | |
download | op-kernel-dev-c55244137306b626bc64023fd7160985443205a7.zip op-kernel-dev-c55244137306b626bc64023fd7160985443205a7.tar.gz |
Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
Pull InfiniBand/RDMA changes from Roland Dreier:
- AF_IB (native IB addressing) for CMA from Sean Hefty
- new mlx5 driver for Mellanox Connect-IB adapters (including post
merge request fixes)
- SRP fixes from Bart Van Assche (including fix to first merge request)
- qib HW driver updates
- resurrection of ocrdma HW driver development
- uverbs conversion to create fds with O_CLOEXEC set
- other small changes and fixes
* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (66 commits)
mlx5: Return -EFAULT instead of -EPERM
IB/qib: Log all SDMA errors unconditionally
IB/qib: Fix module-level leak
mlx5_core: Adjust hca_cap.uar_page_sz to conform to Connect-IB spec
IB/srp: Let srp_abort() return FAST_IO_FAIL if TL offline
IB/uverbs: Use get_unused_fd_flags(O_CLOEXEC) instead of get_unused_fd()
mlx5_core: Fixes for sparse warnings
IB/mlx5: Make profile[] static in main.c
mlx5: Fix parameter type of health_handler_t
mlx5: Add driver for Mellanox Connect-IB adapters
IB/core: Add reserved values to enums for low-level driver use
IB/srp: Bump driver version and release date
IB/srp: Make HCA completion vector configurable
IB/srp: Maintain a single connection per I_T nexus
IB/srp: Fail I/O fast if target offline
IB/srp: Skip host settle delay
IB/srp: Avoid skipping srp_reset_host() after a transport error
IB/srp: Fix remove_one crash due to resource exhaustion
IB/qib: New transmitter tunning settings for Dell 1.1 backplane
IB/core: Fix error return code in add_port()
...
85 files changed, 18436 insertions, 803 deletions
diff --git a/Documentation/ABI/stable/sysfs-driver-ib_srp b/Documentation/ABI/stable/sysfs-driver-ib_srp index 481aae9..5c53d28 100644 --- a/Documentation/ABI/stable/sysfs-driver-ib_srp +++ b/Documentation/ABI/stable/sysfs-driver-ib_srp @@ -54,6 +54,13 @@ Description: Interface for making ib_srp connect to a new target. ib_srp. Specifying a value that exceeds cmd_sg_entries is only safe with partial memory descriptor list support enabled (allow_ext_sg=1). + * comp_vector, a number in the range 0..n-1 specifying the + MSI-X completion vector. Some HCA's allocate multiple (n) + MSI-X vectors per HCA port. If the IRQ affinity masks of + these interrupts have been configured such that each MSI-X + interrupt is handled by a different CPU then the comp_vector + parameter can be used to spread the SRP completion workload + over multiple CPU's. What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/ibdev Date: January 2, 2006 diff --git a/MAINTAINERS b/MAINTAINERS index b41a9fc..705681e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5430,6 +5430,28 @@ W: http://linuxtv.org S: Odd Fixes F: drivers/media/radio/radio-miropcm20* +Mellanox MLX5 core VPI driver +M: Eli Cohen <eli@mellanox.com> +L: netdev@vger.kernel.org +L: linux-rdma@vger.kernel.org +W: http://www.mellanox.com +Q: http://patchwork.ozlabs.org/project/netdev/list/ +Q: http://patchwork.kernel.org/project/linux-rdma/list/ +T: git://openfabrics.org/~eli/connect-ib.git +S: Supported +F: drivers/net/ethernet/mellanox/mlx5/core/ +F: include/linux/mlx5/ + +Mellanox MLX5 IB driver +M: Eli Cohen <eli@mellanox.com> +L: linux-rdma@vger.kernel.org +W: http://www.mellanox.com +Q: http://patchwork.kernel.org/project/linux-rdma/list/ +T: git://openfabrics.org/~eli/connect-ib.git +S: Supported +F: include/linux/mlx5/ +F: drivers/infiniband/hw/mlx5/ + MODULE SUPPORT M: Rusty Russell <rusty@rustcorp.com.au> S: Maintained diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index c85b56c..5ceda71 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -50,6 +50,7 @@ source "drivers/infiniband/hw/amso1100/Kconfig" source "drivers/infiniband/hw/cxgb3/Kconfig" source "drivers/infiniband/hw/cxgb4/Kconfig" source "drivers/infiniband/hw/mlx4/Kconfig" +source "drivers/infiniband/hw/mlx5/Kconfig" source "drivers/infiniband/hw/nes/Kconfig" source "drivers/infiniband/hw/ocrdma/Kconfig" diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index b126fef..1fe6988 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/ obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/ obj-$(CONFIG_INFINIBAND_CXGB4) += hw/cxgb4/ obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/ +obj-$(CONFIG_MLX5_INFINIBAND) += hw/mlx5/ obj-$(CONFIG_INFINIBAND_NES) += hw/nes/ obj-$(CONFIG_INFINIBAND_OCRDMA) += hw/ocrdma/ obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index eaec8d7..e90f2b2 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -45,6 +45,7 @@ #include <net/addrconf.h> #include <net/ip6_route.h> #include <rdma/ib_addr.h> +#include <rdma/ib.h> MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("IB Address Translation"); @@ -70,6 +71,21 @@ static LIST_HEAD(req_list); static DECLARE_DELAYED_WORK(work, process_req); static struct workqueue_struct *addr_wq; +int rdma_addr_size(struct sockaddr *addr) +{ + switch (addr->sa_family) { + case AF_INET: + return sizeof(struct sockaddr_in); + case AF_INET6: + return sizeof(struct sockaddr_in6); + case AF_IB: + return sizeof(struct sockaddr_ib); + default: + return 0; + } +} +EXPORT_SYMBOL(rdma_addr_size); + void rdma_addr_register_client(struct rdma_addr_client *client) { atomic_set(&client->refcount, 1); @@ -369,12 +385,12 @@ int rdma_resolve_ip(struct rdma_addr_client *client, goto err; } - memcpy(src_in, src_addr, ip_addr_size(src_addr)); + memcpy(src_in, src_addr, rdma_addr_size(src_addr)); } else { src_in->sa_family = dst_addr->sa_family; } - memcpy(dst_in, dst_addr, ip_addr_size(dst_addr)); + memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr)); req->addr = addr; req->callback = callback; req->context = context; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 34fbc2f..f1c279f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -50,6 +50,7 @@ #include <rdma/rdma_cm.h> #include <rdma/rdma_cm_ib.h> #include <rdma/rdma_netlink.h> +#include <rdma/ib.h> #include <rdma/ib_cache.h> #include <rdma/ib_cm.h> #include <rdma/ib_sa.h> @@ -79,7 +80,6 @@ static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); static struct workqueue_struct *cma_wq; -static DEFINE_IDR(sdp_ps); static DEFINE_IDR(tcp_ps); static DEFINE_IDR(udp_ps); static DEFINE_IDR(ipoib_ps); @@ -195,24 +195,7 @@ struct cma_hdr { union cma_ip_addr dst_addr; }; -struct sdp_hh { - u8 bsdh[16]; - u8 sdp_version; /* Major version: 7:4 */ - u8 ip_version; /* IP version: 7:4 */ - u8 sdp_specific1[10]; - __be16 port; - __be16 sdp_specific2; - union cma_ip_addr src_addr; - union cma_ip_addr dst_addr; -}; - -struct sdp_hah { - u8 bsdh[16]; - u8 sdp_version; -}; - #define CMA_VERSION 0x00 -#define SDP_MAJ_VERSION 0x2 static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) { @@ -261,21 +244,6 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); } -static inline u8 sdp_get_majv(u8 sdp_version) -{ - return sdp_version >> 4; -} - -static inline u8 sdp_get_ip_ver(struct sdp_hh *hh) -{ - return hh->ip_version >> 4; -} - -static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver) -{ - hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF); -} - static void cma_attach_to_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { @@ -310,16 +278,40 @@ static void cma_release_dev(struct rdma_id_private *id_priv) mutex_unlock(&lock); } -static int cma_set_qkey(struct rdma_id_private *id_priv) +static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) +{ + return (struct sockaddr *) &id_priv->id.route.addr.src_addr; +} + +static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) +{ + return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; +} + +static inline unsigned short cma_family(struct rdma_id_private *id_priv) +{ + return id_priv->id.route.addr.src_addr.ss_family; +} + +static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) { struct ib_sa_mcmember_rec rec; int ret = 0; - if (id_priv->qkey) + if (id_priv->qkey) { + if (qkey && id_priv->qkey != qkey) + return -EINVAL; + return 0; + } + + if (qkey) { + id_priv->qkey = qkey; return 0; + } switch (id_priv->id.ps) { case RDMA_PS_UDP: + case RDMA_PS_IB: id_priv->qkey = RDMA_UDP_QKEY; break; case RDMA_PS_IPOIB: @@ -358,6 +350,27 @@ static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_nu return -EADDRNOTAVAIL; } +static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) +{ + dev_addr->dev_type = ARPHRD_INFINIBAND; + rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); + ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); +} + +static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) +{ + int ret; + + if (addr->sa_family != AF_IB) { + ret = rdma_translate_ip(addr, dev_addr); + } else { + cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); + ret = 0; + } + + return ret; +} + static int cma_acquire_dev(struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; @@ -401,6 +414,61 @@ out: return ret; } +/* + * Select the source IB device and address to reach the destination IB address. + */ +static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) +{ + struct cma_device *cma_dev, *cur_dev; + struct sockaddr_ib *addr; + union ib_gid gid, sgid, *dgid; + u16 pkey, index; + u8 port, p; + int i; + + cma_dev = NULL; + addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); + dgid = (union ib_gid *) &addr->sib_addr; + pkey = ntohs(addr->sib_pkey); + + list_for_each_entry(cur_dev, &dev_list, list) { + if (rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB) + continue; + + for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { + if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) + continue; + + for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) { + if (!memcmp(&gid, dgid, sizeof(gid))) { + cma_dev = cur_dev; + sgid = gid; + port = p; + goto found; + } + + if (!cma_dev && (gid.global.subnet_prefix == + dgid->global.subnet_prefix)) { + cma_dev = cur_dev; + sgid = gid; + port = p; + } + } + } + } + + if (!cma_dev) + return -ENODEV; + +found: + cma_attach_to_dev(id_priv, cma_dev); + id_priv->id.port_num = port; + addr = (struct sockaddr_ib *) cma_src_addr(id_priv); + memcpy(&addr->sib_addr, &sgid, sizeof sgid); + cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); + return 0; +} + static void cma_deref_id(struct rdma_id_private *id_priv) { if (atomic_dec_and_test(&id_priv->refcount)) @@ -630,7 +698,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; if (id_priv->id.qp_type == IB_QPT_UD) { - ret = cma_set_qkey(id_priv); + ret = cma_set_qkey(id_priv, 0); if (ret) return ret; @@ -679,26 +747,30 @@ EXPORT_SYMBOL(rdma_init_qp_attr); static inline int cma_zero_addr(struct sockaddr *addr) { - struct in6_addr *ip6; - - if (addr->sa_family == AF_INET) - return ipv4_is_zeronet( - ((struct sockaddr_in *)addr)->sin_addr.s_addr); - else { - ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr; - return (ip6->s6_addr32[0] | ip6->s6_addr32[1] | - ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0; + switch (addr->sa_family) { + case AF_INET: + return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); + case AF_INET6: + return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); + case AF_IB: + return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); + default: + return 0; } } static inline int cma_loopback_addr(struct sockaddr *addr) { - if (addr->sa_family == AF_INET) - return ipv4_is_loopback( - ((struct sockaddr_in *) addr)->sin_addr.s_addr); - else - return ipv6_addr_loopback( - &((struct sockaddr_in6 *) addr)->sin6_addr); + switch (addr->sa_family) { + case AF_INET: + return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); + case AF_INET6: + return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); + case AF_IB: + return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); + default: + return 0; + } } static inline int cma_any_addr(struct sockaddr *addr) @@ -715,18 +787,31 @@ static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) case AF_INET: return ((struct sockaddr_in *) src)->sin_addr.s_addr != ((struct sockaddr_in *) dst)->sin_addr.s_addr; - default: + case AF_INET6: return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, &((struct sockaddr_in6 *) dst)->sin6_addr); + default: + return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, + &((struct sockaddr_ib *) dst)->sib_addr); } } -static inline __be16 cma_port(struct sockaddr *addr) +static __be16 cma_port(struct sockaddr *addr) { - if (addr->sa_family == AF_INET) + struct sockaddr_ib *sib; + + switch (addr->sa_family) { + case AF_INET: return ((struct sockaddr_in *) addr)->sin_port; - else + case AF_INET6: return ((struct sockaddr_in6 *) addr)->sin6_port; + case AF_IB: + sib = (struct sockaddr_ib *) addr; + return htons((u16) (be64_to_cpu(sib->sib_sid) & + be64_to_cpu(sib->sib_sid_mask))); + default: + return 0; + } } static inline int cma_any_port(struct sockaddr *addr) @@ -734,83 +819,92 @@ static inline int cma_any_port(struct sockaddr *addr) return !cma_port(addr); } -static int cma_get_net_info(void *hdr, enum rdma_port_space ps, - u8 *ip_ver, __be16 *port, - union cma_ip_addr **src, union cma_ip_addr **dst) +static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, + struct ib_sa_path_rec *path) { - switch (ps) { - case RDMA_PS_SDP: - if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) != - SDP_MAJ_VERSION) - return -EINVAL; + struct sockaddr_ib *listen_ib, *ib; - *ip_ver = sdp_get_ip_ver(hdr); - *port = ((struct sdp_hh *) hdr)->port; - *src = &((struct sdp_hh *) hdr)->src_addr; - *dst = &((struct sdp_hh *) hdr)->dst_addr; - break; - default: - if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION) - return -EINVAL; + listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; + ib = (struct sockaddr_ib *) &id->route.addr.src_addr; + ib->sib_family = listen_ib->sib_family; + ib->sib_pkey = path->pkey; + ib->sib_flowinfo = path->flow_label; + memcpy(&ib->sib_addr, &path->sgid, 16); + ib->sib_sid = listen_ib->sib_sid; + ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); + ib->sib_scope_id = listen_ib->sib_scope_id; - *ip_ver = cma_get_ip_ver(hdr); - *port = ((struct cma_hdr *) hdr)->port; - *src = &((struct cma_hdr *) hdr)->src_addr; - *dst = &((struct cma_hdr *) hdr)->dst_addr; - break; - } - - if (*ip_ver != 4 && *ip_ver != 6) - return -EINVAL; - return 0; + ib = (struct sockaddr_ib *) &id->route.addr.dst_addr; + ib->sib_family = listen_ib->sib_family; + ib->sib_pkey = path->pkey; + ib->sib_flowinfo = path->flow_label; + memcpy(&ib->sib_addr, &path->dgid, 16); } -static void cma_save_net_info(struct rdma_addr *addr, - struct rdma_addr *listen_addr, - u8 ip_ver, __be16 port, - union cma_ip_addr *src, union cma_ip_addr *dst) +static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, + struct cma_hdr *hdr) { struct sockaddr_in *listen4, *ip4; + + listen4 = (struct sockaddr_in *) &listen_id->route.addr.src_addr; + ip4 = (struct sockaddr_in *) &id->route.addr.src_addr; + ip4->sin_family = listen4->sin_family; + ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr; + ip4->sin_port = listen4->sin_port; + + ip4 = (struct sockaddr_in *) &id->route.addr.dst_addr; + ip4->sin_family = listen4->sin_family; + ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr; + ip4->sin_port = hdr->port; +} + +static void cma_save_ip6_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, + struct cma_hdr *hdr) +{ struct sockaddr_in6 *listen6, *ip6; - switch (ip_ver) { + listen6 = (struct sockaddr_in6 *) &listen_id->route.addr.src_addr; + ip6 = (struct sockaddr_in6 *) &id->route.addr.src_addr; + ip6->sin6_family = listen6->sin6_family; + ip6->sin6_addr = hdr->dst_addr.ip6; + ip6->sin6_port = listen6->sin6_port; + + ip6 = (struct sockaddr_in6 *) &id->route.addr.dst_addr; + ip6->sin6_family = listen6->sin6_family; + ip6->sin6_addr = hdr->src_addr.ip6; + ip6->sin6_port = hdr->port; +} + +static int cma_save_net_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, + struct ib_cm_event *ib_event) +{ + struct cma_hdr *hdr; + + if (listen_id->route.addr.src_addr.ss_family == AF_IB) { + cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path); + return 0; + } + + hdr = ib_event->private_data; + if (hdr->cma_version != CMA_VERSION) + return -EINVAL; + + switch (cma_get_ip_ver(hdr)) { case 4: - listen4 = (struct sockaddr_in *) &listen_addr->src_addr; - ip4 = (struct sockaddr_in *) &addr->src_addr; - ip4->sin_family = listen4->sin_family; - ip4->sin_addr.s_addr = dst->ip4.addr; - ip4->sin_port = listen4->sin_port; - - ip4 = (struct sockaddr_in *) &addr->dst_addr; - ip4->sin_family = listen4->sin_family; - ip4->sin_addr.s_addr = src->ip4.addr; - ip4->sin_port = port; + cma_save_ip4_info(id, listen_id, hdr); break; case 6: - listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr; - ip6 = (struct sockaddr_in6 *) &addr->src_addr; - ip6->sin6_family = listen6->sin6_family; - ip6->sin6_addr = dst->ip6; - ip6->sin6_port = listen6->sin6_port; - - ip6 = (struct sockaddr_in6 *) &addr->dst_addr; - ip6->sin6_family = listen6->sin6_family; - ip6->sin6_addr = src->ip6; - ip6->sin6_port = port; + cma_save_ip6_info(id, listen_id, hdr); break; default: - break; + return -EINVAL; } + return 0; } -static inline int cma_user_data_offset(enum rdma_port_space ps) +static inline int cma_user_data_offset(struct rdma_id_private *id_priv) { - switch (ps) { - case RDMA_PS_SDP: - return 0; - default: - return sizeof(struct cma_hdr); - } + return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); } static void cma_cancel_route(struct rdma_id_private *id_priv) @@ -861,8 +955,7 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv, cma_cancel_route(id_priv); break; case RDMA_CM_LISTEN: - if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr) - && !id_priv->cma_dev) + if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) cma_cancel_listens(id_priv); break; default: @@ -977,16 +1070,6 @@ reject: return ret; } -static int cma_verify_rep(struct rdma_id_private *id_priv, void *data) -{ - if (id_priv->id.ps == RDMA_PS_SDP && - sdp_get_majv(((struct sdp_hah *) data)->sdp_version) != - SDP_MAJ_VERSION) - return -EINVAL; - - return 0; -} - static void cma_set_rep_event_data(struct rdma_cm_event *event, struct ib_cm_rep_event_param *rep_data, void *private_data) @@ -1021,15 +1104,13 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) event.status = -ETIMEDOUT; break; case IB_CM_REP_RECEIVED: - event.status = cma_verify_rep(id_priv, ib_event->private_data); - if (event.status) - event.event = RDMA_CM_EVENT_CONNECT_ERROR; - else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) { + if (id_priv->id.qp) { event.status = cma_rep_recv(id_priv); event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : RDMA_CM_EVENT_ESTABLISHED; - } else + } else { event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; + } cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, ib_event->private_data); break; @@ -1085,22 +1166,16 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, struct rdma_id_private *id_priv; struct rdma_cm_id *id; struct rdma_route *rt; - union cma_ip_addr *src, *dst; - __be16 port; - u8 ip_ver; int ret; - if (cma_get_net_info(ib_event->private_data, listen_id->ps, - &ip_ver, &port, &src, &dst)) - return NULL; - id = rdma_create_id(listen_id->event_handler, listen_id->context, listen_id->ps, ib_event->param.req_rcvd.qp_type); if (IS_ERR(id)) return NULL; - cma_save_net_info(&id->route.addr, &listen_id->route.addr, - ip_ver, port, src, dst); + id_priv = container_of(id, struct rdma_id_private, id); + if (cma_save_net_info(id, listen_id, ib_event)) + goto err; rt = &id->route; rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; @@ -1113,19 +1188,17 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, if (rt->num_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; - if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) { + if (cma_any_addr(cma_src_addr(id_priv))) { rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); } else { - ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr, - &rt->addr.dev_addr); + ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); if (ret) goto err; } rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); - id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = RDMA_CM_CONNECT; return id_priv; @@ -1139,9 +1212,6 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, { struct rdma_id_private *id_priv; struct rdma_cm_id *id; - union cma_ip_addr *src, *dst; - __be16 port; - u8 ip_ver; int ret; id = rdma_create_id(listen_id->event_handler, listen_id->context, @@ -1149,22 +1219,16 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, if (IS_ERR(id)) return NULL; - - if (cma_get_net_info(ib_event->private_data, listen_id->ps, - &ip_ver, &port, &src, &dst)) + id_priv = container_of(id, struct rdma_id_private, id); + if (cma_save_net_info(id, listen_id, ib_event)) goto err; - cma_save_net_info(&id->route.addr, &listen_id->route.addr, - ip_ver, port, src, dst); - if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) { - ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, - &id->route.addr.dev_addr); + ret = cma_translate_addr(cma_src_addr(id_priv), &id->route.addr.dev_addr); if (ret) goto err; } - id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = RDMA_CM_CONNECT; return id_priv; err: @@ -1210,7 +1274,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) return -ECONNABORTED; memset(&event, 0, sizeof event); - offset = cma_user_data_offset(listen_id->id.ps); + offset = cma_user_data_offset(listen_id); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { conn_id = cma_new_udp_id(&listen_id->id, ib_event); @@ -1272,58 +1336,44 @@ err1: return ret; } -static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr) +__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) { - return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr))); + if (addr->sa_family == AF_IB) + return ((struct sockaddr_ib *) addr)->sib_sid; + + return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); } +EXPORT_SYMBOL(rdma_get_service_id); static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, struct ib_cm_compare_data *compare) { struct cma_hdr *cma_data, *cma_mask; - struct sdp_hh *sdp_data, *sdp_mask; __be32 ip4_addr; struct in6_addr ip6_addr; memset(compare, 0, sizeof *compare); cma_data = (void *) compare->data; cma_mask = (void *) compare->mask; - sdp_data = (void *) compare->data; - sdp_mask = (void *) compare->mask; switch (addr->sa_family) { case AF_INET: ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr; - if (ps == RDMA_PS_SDP) { - sdp_set_ip_ver(sdp_data, 4); - sdp_set_ip_ver(sdp_mask, 0xF); - sdp_data->dst_addr.ip4.addr = ip4_addr; - sdp_mask->dst_addr.ip4.addr = htonl(~0); - } else { - cma_set_ip_ver(cma_data, 4); - cma_set_ip_ver(cma_mask, 0xF); - if (!cma_any_addr(addr)) { - cma_data->dst_addr.ip4.addr = ip4_addr; - cma_mask->dst_addr.ip4.addr = htonl(~0); - } + cma_set_ip_ver(cma_data, 4); + cma_set_ip_ver(cma_mask, 0xF); + if (!cma_any_addr(addr)) { + cma_data->dst_addr.ip4.addr = ip4_addr; + cma_mask->dst_addr.ip4.addr = htonl(~0); } break; case AF_INET6: ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr; - if (ps == RDMA_PS_SDP) { - sdp_set_ip_ver(sdp_data, 6); - sdp_set_ip_ver(sdp_mask, 0xF); - sdp_data->dst_addr.ip6 = ip6_addr; - memset(&sdp_mask->dst_addr.ip6, 0xFF, - sizeof sdp_mask->dst_addr.ip6); - } else { - cma_set_ip_ver(cma_data, 6); - cma_set_ip_ver(cma_mask, 0xF); - if (!cma_any_addr(addr)) { - cma_data->dst_addr.ip6 = ip6_addr; - memset(&cma_mask->dst_addr.ip6, 0xFF, - sizeof cma_mask->dst_addr.ip6); - } + cma_set_ip_ver(cma_data, 6); + cma_set_ip_ver(cma_mask, 0xF); + if (!cma_any_addr(addr)) { + cma_data->dst_addr.ip6 = ip6_addr; + memset(&cma_mask->dst_addr.ip6, 0xFF, + sizeof cma_mask->dst_addr.ip6); } break; default: @@ -1347,9 +1397,9 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) event.event = RDMA_CM_EVENT_DISCONNECTED; break; case IW_CM_EVENT_CONNECT_REPLY: - sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; + sin = (struct sockaddr_in *) cma_src_addr(id_priv); *sin = iw_event->local_addr; - sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr; + sin = (struct sockaddr_in *) cma_dst_addr(id_priv); *sin = iw_event->remote_addr; switch (iw_event->status) { case 0: @@ -1447,9 +1497,9 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, cm_id->context = conn_id; cm_id->cm_handler = cma_iw_handler; - sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr; + sin = (struct sockaddr_in *) cma_src_addr(conn_id); *sin = iw_event->local_addr; - sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr; + sin = (struct sockaddr_in *) cma_dst_addr(conn_id); *sin = iw_event->remote_addr; ret = ib_query_device(conn_id->id.device, &attr); @@ -1506,8 +1556,8 @@ static int cma_ib_listen(struct rdma_id_private *id_priv) id_priv->cm_id.ib = id; - addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; - svc_id = cma_get_service_id(id_priv->id.ps, addr); + addr = cma_src_addr(id_priv); + svc_id = rdma_get_service_id(&id_priv->id, addr); if (cma_any_addr(addr) && !id_priv->afonly) ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL); else { @@ -1537,7 +1587,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) id_priv->cm_id.iw = id; - sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; + sin = (struct sockaddr_in *) cma_src_addr(id_priv); id_priv->cm_id.iw->local_addr = *sin; ret = iw_cm_listen(id_priv->cm_id.iw, backlog); @@ -1567,6 +1617,10 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, struct rdma_cm_id *id; int ret; + if (cma_family(id_priv) == AF_IB && + rdma_node_get_transport(cma_dev->device->node_type) != RDMA_TRANSPORT_IB) + return; + id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, id_priv->id.qp_type); if (IS_ERR(id)) @@ -1575,8 +1629,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, dev_id_priv = container_of(id, struct rdma_id_private, id); dev_id_priv->state = RDMA_CM_ADDR_BOUND; - memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr, - ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr)); + memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), + rdma_addr_size(cma_src_addr(id_priv))); cma_attach_to_dev(dev_id_priv, cma_dev); list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); @@ -1634,31 +1688,39 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, struct cma_work *work) { - struct rdma_addr *addr = &id_priv->id.route.addr; + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct ib_sa_path_rec path_rec; ib_sa_comp_mask comp_mask; struct sockaddr_in6 *sin6; + struct sockaddr_ib *sib; memset(&path_rec, 0, sizeof path_rec); - rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid); - rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid); - path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr)); + rdma_addr_get_sgid(dev_addr, &path_rec.sgid); + rdma_addr_get_dgid(dev_addr, &path_rec.dgid); + path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); path_rec.numb_path = 1; path_rec.reversible = 1; - path_rec.service_id = cma_get_service_id(id_priv->id.ps, - (struct sockaddr *) &addr->dst_addr); + path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; - if (addr->src_addr.ss_family == AF_INET) { + switch (cma_family(id_priv)) { + case AF_INET: path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); comp_mask |= IB_SA_PATH_REC_QOS_CLASS; - } else { - sin6 = (struct sockaddr_in6 *) &addr->src_addr; + break; + case AF_INET6: + sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; + break; + case AF_IB: + sib = (struct sockaddr_ib *) cma_src_addr(id_priv); + path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); + comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; + break; } id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, @@ -1800,14 +1862,9 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) struct rdma_addr *addr = &route->addr; struct cma_work *work; int ret; - struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr; - struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr; struct net_device *ndev = NULL; u16 vid; - if (src_addr->sin_family != dst_addr->sin_family) - return -EINVAL; - work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; @@ -1913,28 +1970,57 @@ err: } EXPORT_SYMBOL(rdma_resolve_route); +static void cma_set_loopback(struct sockaddr *addr) +{ + switch (addr->sa_family) { + case AF_INET: + ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + break; + case AF_INET6: + ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, + 0, 0, 0, htonl(1)); + break; + default: + ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, + 0, 0, 0, htonl(1)); + break; + } +} + static int cma_bind_loopback(struct rdma_id_private *id_priv) { - struct cma_device *cma_dev; + struct cma_device *cma_dev, *cur_dev; struct ib_port_attr port_attr; union ib_gid gid; u16 pkey; int ret; u8 p; + cma_dev = NULL; mutex_lock(&lock); - if (list_empty(&dev_list)) { + list_for_each_entry(cur_dev, &dev_list, list) { + if (cma_family(id_priv) == AF_IB && + rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB) + continue; + + if (!cma_dev) + cma_dev = cur_dev; + + for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { + if (!ib_query_port(cur_dev->device, p, &port_attr) && + port_attr.state == IB_PORT_ACTIVE) { + cma_dev = cur_dev; + goto port_found; + } + } + } + + if (!cma_dev) { ret = -ENODEV; goto out; } - list_for_each_entry(cma_dev, &dev_list, list) - for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p) - if (!ib_query_port(cma_dev->device, p, &port_attr) && - port_attr.state == IB_PORT_ACTIVE) - goto port_found; p = 1; - cma_dev = list_entry(dev_list.next, struct cma_device, list); port_found: ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid); @@ -1953,6 +2039,7 @@ port_found: ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); id_priv->id.port_num = p; cma_attach_to_dev(id_priv, cma_dev); + cma_set_loopback(cma_src_addr(id_priv)); out: mutex_unlock(&lock); return ret; @@ -1980,8 +2067,7 @@ static void addr_handler(int status, struct sockaddr *src_addr, event.event = RDMA_CM_EVENT_ADDR_ERROR; event.status = status; } else { - memcpy(&id_priv->id.route.addr.src_addr, src_addr, - ip_addr_size(src_addr)); + memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); event.event = RDMA_CM_EVENT_ADDR_RESOLVED; } @@ -2000,7 +2086,6 @@ out: static int cma_resolve_loopback(struct rdma_id_private *id_priv) { struct cma_work *work; - struct sockaddr *src, *dst; union ib_gid gid; int ret; @@ -2017,18 +2102,36 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); - src = (struct sockaddr *) &id_priv->id.route.addr.src_addr; - if (cma_zero_addr(src)) { - dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr; - if ((src->sa_family = dst->sa_family) == AF_INET) { - ((struct sockaddr_in *)src)->sin_addr = - ((struct sockaddr_in *)dst)->sin_addr; - } else { - ((struct sockaddr_in6 *)src)->sin6_addr = - ((struct sockaddr_in6 *)dst)->sin6_addr; - } + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler); + work->old_state = RDMA_CM_ADDR_QUERY; + work->new_state = RDMA_CM_ADDR_RESOLVED; + work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; + queue_work(cma_wq, &work->work); + return 0; +err: + kfree(work); + return ret; +} + +static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) +{ + struct cma_work *work; + int ret; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + if (!id_priv->cma_dev) { + ret = cma_resolve_ib_dev(id_priv); + if (ret) + goto err; } + rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) + &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); + work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); work->old_state = RDMA_CM_ADDR_QUERY; @@ -2046,9 +2149,13 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, { if (!src_addr || !src_addr->sa_family) { src_addr = (struct sockaddr *) &id->route.addr.src_addr; - if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) { + src_addr->sa_family = dst_addr->sa_family; + if (dst_addr->sa_family == AF_INET6) { ((struct sockaddr_in6 *) src_addr)->sin6_scope_id = ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id; + } else if (dst_addr->sa_family == AF_IB) { + ((struct sockaddr_ib *) src_addr)->sib_pkey = + ((struct sockaddr_ib *) dst_addr)->sib_pkey; } } return rdma_bind_addr(id, src_addr); @@ -2067,17 +2174,25 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, return ret; } + if (cma_family(id_priv) != dst_addr->sa_family) + return -EINVAL; + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) return -EINVAL; atomic_inc(&id_priv->refcount); - memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr)); - if (cma_any_addr(dst_addr)) + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); + if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv); - else - ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr, - dst_addr, &id->route.addr.dev_addr, - timeout_ms, addr_handler, id_priv); + } else { + if (dst_addr->sa_family == AF_IB) { + ret = cma_resolve_ib_addr(id_priv); + } else { + ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), + dst_addr, &id->route.addr.dev_addr, + timeout_ms, addr_handler, id_priv); + } + } if (ret) goto err; @@ -2097,7 +2212,7 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irqsave(&id_priv->lock, flags); - if (id_priv->state == RDMA_CM_IDLE) { + if (reuse || id_priv->state == RDMA_CM_IDLE) { id_priv->reuseaddr = reuse; ret = 0; } else { @@ -2131,10 +2246,29 @@ EXPORT_SYMBOL(rdma_set_afonly); static void cma_bind_port(struct rdma_bind_list *bind_list, struct rdma_id_private *id_priv) { - struct sockaddr_in *sin; + struct sockaddr *addr; + struct sockaddr_ib *sib; + u64 sid, mask; + __be16 port; - sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; - sin->sin_port = htons(bind_list->port); + addr = cma_src_addr(id_priv); + port = htons(bind_list->port); + + switch (addr->sa_family) { + case AF_INET: + ((struct sockaddr_in *) addr)->sin_port = port; + break; + case AF_INET6: + ((struct sockaddr_in6 *) addr)->sin6_port = port; + break; + case AF_IB: + sib = (struct sockaddr_ib *) addr; + sid = be64_to_cpu(sib->sib_sid); + mask = be64_to_cpu(sib->sib_sid_mask); + sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); + sib->sib_sid_mask = cpu_to_be64(~0ULL); + break; + } id_priv->bind_list = bind_list; hlist_add_head(&id_priv->node, &bind_list->owners); } @@ -2205,7 +2339,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list, struct rdma_id_private *cur_id; struct sockaddr *addr, *cur_addr; - addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; + addr = cma_src_addr(id_priv); hlist_for_each_entry(cur_id, &bind_list->owners, node) { if (id_priv == cur_id) continue; @@ -2214,7 +2348,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list, cur_id->reuseaddr) continue; - cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr; + cur_addr = cma_src_addr(cur_id); if (id_priv->afonly && cur_id->afonly && (addr->sa_family != cur_addr->sa_family)) continue; @@ -2234,7 +2368,7 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv) unsigned short snum; int ret; - snum = ntohs(cma_port((struct sockaddr *) &id_priv->id.route.addr.src_addr)); + snum = ntohs(cma_port(cma_src_addr(id_priv))); if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; @@ -2261,33 +2395,67 @@ static int cma_bind_listen(struct rdma_id_private *id_priv) return ret; } -static int cma_get_port(struct rdma_id_private *id_priv) +static struct idr *cma_select_inet_ps(struct rdma_id_private *id_priv) { - struct idr *ps; - int ret; - switch (id_priv->id.ps) { - case RDMA_PS_SDP: - ps = &sdp_ps; - break; case RDMA_PS_TCP: - ps = &tcp_ps; - break; + return &tcp_ps; case RDMA_PS_UDP: - ps = &udp_ps; - break; + return &udp_ps; case RDMA_PS_IPOIB: - ps = &ipoib_ps; - break; + return &ipoib_ps; case RDMA_PS_IB: - ps = &ib_ps; - break; + return &ib_ps; default: - return -EPROTONOSUPPORT; + return NULL; + } +} + +static struct idr *cma_select_ib_ps(struct rdma_id_private *id_priv) +{ + struct idr *ps = NULL; + struct sockaddr_ib *sib; + u64 sid_ps, mask, sid; + + sib = (struct sockaddr_ib *) cma_src_addr(id_priv); + mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; + sid = be64_to_cpu(sib->sib_sid) & mask; + + if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { + sid_ps = RDMA_IB_IP_PS_IB; + ps = &ib_ps; + } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && + (sid == (RDMA_IB_IP_PS_TCP & mask))) { + sid_ps = RDMA_IB_IP_PS_TCP; + ps = &tcp_ps; + } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && + (sid == (RDMA_IB_IP_PS_UDP & mask))) { + sid_ps = RDMA_IB_IP_PS_UDP; + ps = &udp_ps; } + if (ps) { + sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); + sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | + be64_to_cpu(sib->sib_sid_mask)); + } + return ps; +} + +static int cma_get_port(struct rdma_id_private *id_priv) +{ + struct idr *ps; + int ret; + + if (cma_family(id_priv) != AF_IB) + ps = cma_select_inet_ps(id_priv); + else + ps = cma_select_ib_ps(id_priv); + if (!ps) + return -EPROTONOSUPPORT; + mutex_lock(&lock); - if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr)) + if (cma_any_port(cma_src_addr(id_priv))) ret = cma_alloc_any_port(ps, id_priv); else ret = cma_use_port(ps, id_priv); @@ -2322,8 +2490,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) id_priv = container_of(id, struct rdma_id_private, id); if (id_priv->state == RDMA_CM_IDLE) { - ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET; - ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr); + id->route.addr.src_addr.ss_family = AF_INET; + ret = rdma_bind_addr(id, cma_src_addr(id_priv)); if (ret) return ret; } @@ -2370,7 +2538,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) struct rdma_id_private *id_priv; int ret; - if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) + if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && + addr->sa_family != AF_IB) return -EAFNOSUPPORT; id_priv = container_of(id, struct rdma_id_private, id); @@ -2382,7 +2551,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) goto err1; if (!cma_any_addr(addr)) { - ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); + ret = cma_translate_addr(addr, &id->route.addr.dev_addr); if (ret) goto err1; @@ -2391,7 +2560,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) goto err1; } - memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr)); + memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { if (addr->sa_family == AF_INET) id_priv->afonly = 1; @@ -2414,62 +2583,32 @@ err1: } EXPORT_SYMBOL(rdma_bind_addr); -static int cma_format_hdr(void *hdr, enum rdma_port_space ps, - struct rdma_route *route) +static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) { struct cma_hdr *cma_hdr; - struct sdp_hh *sdp_hdr; - if (route->addr.src_addr.ss_family == AF_INET) { + cma_hdr = hdr; + cma_hdr->cma_version = CMA_VERSION; + if (cma_family(id_priv) == AF_INET) { struct sockaddr_in *src4, *dst4; - src4 = (struct sockaddr_in *) &route->addr.src_addr; - dst4 = (struct sockaddr_in *) &route->addr.dst_addr; - - switch (ps) { - case RDMA_PS_SDP: - sdp_hdr = hdr; - if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) - return -EINVAL; - sdp_set_ip_ver(sdp_hdr, 4); - sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; - sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; - sdp_hdr->port = src4->sin_port; - break; - default: - cma_hdr = hdr; - cma_hdr->cma_version = CMA_VERSION; - cma_set_ip_ver(cma_hdr, 4); - cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; - cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; - cma_hdr->port = src4->sin_port; - break; - } - } else { + src4 = (struct sockaddr_in *) cma_src_addr(id_priv); + dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); + + cma_set_ip_ver(cma_hdr, 4); + cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; + cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; + cma_hdr->port = src4->sin_port; + } else if (cma_family(id_priv) == AF_INET6) { struct sockaddr_in6 *src6, *dst6; - src6 = (struct sockaddr_in6 *) &route->addr.src_addr; - dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr; - - switch (ps) { - case RDMA_PS_SDP: - sdp_hdr = hdr; - if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) - return -EINVAL; - sdp_set_ip_ver(sdp_hdr, 6); - sdp_hdr->src_addr.ip6 = src6->sin6_addr; - sdp_hdr->dst_addr.ip6 = dst6->sin6_addr; - sdp_hdr->port = src6->sin6_port; - break; - default: - cma_hdr = hdr; - cma_hdr->cma_version = CMA_VERSION; - cma_set_ip_ver(cma_hdr, 6); - cma_hdr->src_addr.ip6 = src6->sin6_addr; - cma_hdr->dst_addr.ip6 = dst6->sin6_addr; - cma_hdr->port = src6->sin6_port; - break; - } + src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); + dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); + + cma_set_ip_ver(cma_hdr, 6); + cma_hdr->src_addr.ip6 = src6->sin6_addr; + cma_hdr->dst_addr.ip6 = dst6->sin6_addr; + cma_hdr->port = src6->sin6_port; } return 0; } @@ -2499,15 +2638,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, event.status = ib_event->param.sidr_rep_rcvd.status; break; } - ret = cma_set_qkey(id_priv); + ret = cma_set_qkey(id_priv, rep->qkey); if (ret) { event.event = RDMA_CM_EVENT_ADDR_ERROR; - event.status = -EINVAL; - break; - } - if (id_priv->qkey != rep->qkey) { - event.event = RDMA_CM_EVENT_UNREACHABLE; - event.status = -EINVAL; + event.status = ret; break; } ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num, @@ -2542,27 +2676,31 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_cm_sidr_req_param req; - struct rdma_route *route; struct ib_cm_id *id; - int ret; + int offset, ret; - req.private_data_len = sizeof(struct cma_hdr) + - conn_param->private_data_len; + offset = cma_user_data_offset(id_priv); + req.private_data_len = offset + conn_param->private_data_len; if (req.private_data_len < conn_param->private_data_len) return -EINVAL; - req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC); - if (!req.private_data) - return -ENOMEM; + if (req.private_data_len) { + req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC); + if (!req.private_data) + return -ENOMEM; + } else { + req.private_data = NULL; + } if (conn_param->private_data && conn_param->private_data_len) - memcpy((void *) req.private_data + sizeof(struct cma_hdr), + memcpy((void *) req.private_data + offset, conn_param->private_data, conn_param->private_data_len); - route = &id_priv->id.route; - ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route); - if (ret) - goto out; + if (req.private_data) { + ret = cma_format_hdr((void *) req.private_data, id_priv); + if (ret) + goto out; + } id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, id_priv); @@ -2572,9 +2710,8 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, } id_priv->cm_id.ib = id; - req.path = route->path_rec; - req.service_id = cma_get_service_id(id_priv->id.ps, - (struct sockaddr *) &route->addr.dst_addr); + req.path = id_priv->id.route.path_rec; + req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); req.max_cm_retries = CMA_MAX_CM_RETRIES; @@ -2598,14 +2735,18 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, int offset, ret; memset(&req, 0, sizeof req); - offset = cma_user_data_offset(id_priv->id.ps); + offset = cma_user_data_offset(id_priv); req.private_data_len = offset + conn_param->private_data_len; if (req.private_data_len < conn_param->private_data_len) return -EINVAL; - private_data = kzalloc(req.private_data_len, GFP_ATOMIC); - if (!private_data) - return -ENOMEM; + if (req.private_data_len) { + private_data = kzalloc(req.private_data_len, GFP_ATOMIC); + if (!private_data) + return -ENOMEM; + } else { + private_data = NULL; + } if (conn_param->private_data && conn_param->private_data_len) memcpy(private_data + offset, conn_param->private_data, @@ -2619,17 +2760,18 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, id_priv->cm_id.ib = id; route = &id_priv->id.route; - ret = cma_format_hdr(private_data, id_priv->id.ps, route); - if (ret) - goto out; - req.private_data = private_data; + if (private_data) { + ret = cma_format_hdr(private_data, id_priv); + if (ret) + goto out; + req.private_data = private_data; + } req.primary_path = &route->path_rec[0]; if (route->num_paths == 2) req.alternate_path = &route->path_rec[1]; - req.service_id = cma_get_service_id(id_priv->id.ps, - (struct sockaddr *) &route->addr.dst_addr); + req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); req.qp_num = id_priv->qp_num; req.qp_type = id_priv->id.qp_type; req.starting_psn = id_priv->seq_num; @@ -2668,10 +2810,10 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, id_priv->cm_id.iw = cm_id; - sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr; + sin = (struct sockaddr_in *) cma_src_addr(id_priv); cm_id->local_addr = *sin; - sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; + sin = (struct sockaddr_in *) cma_dst_addr(id_priv); cm_id->remote_addr = *sin; ret = cma_modify_qp_rtr(id_priv, conn_param); @@ -2789,7 +2931,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv, } static int cma_send_sidr_rep(struct rdma_id_private *id_priv, - enum ib_cm_sidr_status status, + enum ib_cm_sidr_status status, u32 qkey, const void *private_data, int private_data_len) { struct ib_cm_sidr_rep_param rep; @@ -2798,7 +2940,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, memset(&rep, 0, sizeof rep); rep.status = status; if (status == IB_SIDR_SUCCESS) { - ret = cma_set_qkey(id_priv); + ret = cma_set_qkey(id_priv, qkey); if (ret) return ret; rep.qp_num = id_priv->qp_num; @@ -2832,11 +2974,12 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) if (id->qp_type == IB_QPT_UD) { if (conn_param) ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, + conn_param->qkey, conn_param->private_data, conn_param->private_data_len); else ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, - NULL, 0); + 0, NULL, 0); } else { if (conn_param) ret = cma_accept_ib(id_priv, conn_param); @@ -2897,7 +3040,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: if (id->qp_type == IB_QPT_UD) - ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, + ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, private_data, private_data_len); else ret = ib_send_cm_rej(id_priv->cm_id.ib, @@ -2958,6 +3101,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED)) return 0; + if (!status) + status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); mutex_lock(&id_priv->qp_mutex); if (!status && id_priv->id.qp) status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, @@ -3004,6 +3149,8 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, 0xFF10A01B)) { /* IPv6 address is an SA assigned MGID. */ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); + } else if (addr->sa_family == AF_IB) { + memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); } else if ((addr->sa_family == AF_INET6)) { ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) @@ -3031,9 +3178,12 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, if (ret) return ret; + ret = cma_set_qkey(id_priv, 0); + if (ret) + return ret; + cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); - if (id_priv->id.ps == RDMA_PS_UDP) - rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); + rec.qkey = cpu_to_be32(id_priv->qkey); rdma_addr_get_sgid(dev_addr, &rec.port_gid); rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.join_state = 1; @@ -3170,7 +3320,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, if (!mc) return -ENOMEM; - memcpy(&mc->addr, addr, ip_addr_size(addr)); + memcpy(&mc->addr, addr, rdma_addr_size(addr)); mc->context = context; mc->id_priv = id_priv; @@ -3215,7 +3365,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irq(&id_priv->lock); list_for_each_entry(mc, &id_priv->mc_list, list) { - if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) { + if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { list_del(&mc->list); spin_unlock_irq(&id_priv->lock); @@ -3436,33 +3586,16 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb) id_stats->bound_dev_if = id->route.addr.dev_addr.bound_dev_if; - if (id->route.addr.src_addr.ss_family == AF_INET) { - if (ibnl_put_attr(skb, nlh, - sizeof(struct sockaddr_in), - &id->route.addr.src_addr, - RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) { - goto out; - } - if (ibnl_put_attr(skb, nlh, - sizeof(struct sockaddr_in), - &id->route.addr.dst_addr, - RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) { - goto out; - } - } else if (id->route.addr.src_addr.ss_family == AF_INET6) { - if (ibnl_put_attr(skb, nlh, - sizeof(struct sockaddr_in6), - &id->route.addr.src_addr, - RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) { - goto out; - } - if (ibnl_put_attr(skb, nlh, - sizeof(struct sockaddr_in6), - &id->route.addr.dst_addr, - RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) { - goto out; - } - } + if (ibnl_put_attr(skb, nlh, + rdma_addr_size(cma_src_addr(id_priv)), + cma_src_addr(id_priv), + RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) + goto out; + if (ibnl_put_attr(skb, nlh, + rdma_addr_size(cma_src_addr(id_priv)), + cma_dst_addr(id_priv), + RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) + goto out; id_stats->pid = id_priv->owner; id_stats->port_space = id->ps; @@ -3527,7 +3660,6 @@ static void __exit cma_cleanup(void) rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); - idr_destroy(&sdp_ps); idr_destroy(&tcp_ps); idr_destroy(&udp_ps); idr_destroy(&ipoib_ps); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 934f45e..9838ca4 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -652,6 +652,12 @@ void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec) } EXPORT_SYMBOL(ib_sa_unpack_path); +void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute) +{ + ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute); +} +EXPORT_SYMBOL(ib_sa_pack_path); + static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 99904f7..cde1e7b 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -545,8 +545,10 @@ static int add_port(struct ib_device *device, int port_num, p->gid_group.name = "gids"; p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len); - if (!p->gid_group.attrs) + if (!p->gid_group.attrs) { + ret = -ENOMEM; goto err_remove_pma; + } ret = sysfs_create_group(&p->kobj, &p->gid_group); if (ret) @@ -555,8 +557,10 @@ static int add_port(struct ib_device *device, int port_num, p->pkey_group.name = "pkeys"; p->pkey_group.attrs = alloc_group_attrs(show_port_pkey, attr.pkey_tbl_len); - if (!p->pkey_group.attrs) + if (!p->pkey_group.attrs) { + ret = -ENOMEM; goto err_remove_gid; + } ret = sysfs_create_group(&p->kobj, &p->pkey_group); if (ret) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 5ca44cd..b0f189b 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -47,6 +47,8 @@ #include <rdma/ib_marshall.h> #include <rdma/rdma_cm.h> #include <rdma/rdma_cm_ib.h> +#include <rdma/ib_addr.h> +#include <rdma/ib.h> MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); @@ -510,10 +512,10 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, return ret; } -static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf, +static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { - struct rdma_ucm_bind_addr cmd; + struct rdma_ucm_bind_ip cmd; struct ucma_context *ctx; int ret; @@ -529,24 +531,75 @@ static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf, return ret; } +static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_bind cmd; + struct sockaddr *addr; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + addr = (struct sockaddr *) &cmd.addr; + if (cmd.reserved || !cmd.addr_size || (cmd.addr_size != rdma_addr_size(addr))) + return -EINVAL; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_bind_addr(ctx->cm_id, addr); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_resolve_ip(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_resolve_ip cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, + (struct sockaddr *) &cmd.dst_addr, + cmd.timeout_ms); + ucma_put_ctx(ctx); + return ret; +} + static ssize_t ucma_resolve_addr(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_resolve_addr cmd; + struct sockaddr *src, *dst; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; + src = (struct sockaddr *) &cmd.src_addr; + dst = (struct sockaddr *) &cmd.dst_addr; + if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) || + !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst))) + return -EINVAL; + ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, - (struct sockaddr *) &cmd.dst_addr, - cmd.timeout_ms); + ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms); ucma_put_ctx(ctx); return ret; } @@ -649,7 +702,7 @@ static ssize_t ucma_query_route(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { - struct rdma_ucm_query_route cmd; + struct rdma_ucm_query cmd; struct rdma_ucm_query_route_resp resp; struct ucma_context *ctx; struct sockaddr *addr; @@ -709,7 +762,162 @@ out: return ret; } -static void ucma_copy_conn_param(struct rdma_conn_param *dst, +static void ucma_query_device_addr(struct rdma_cm_id *cm_id, + struct rdma_ucm_query_addr_resp *resp) +{ + if (!cm_id->device) + return; + + resp->node_guid = (__force __u64) cm_id->device->node_guid; + resp->port_num = cm_id->port_num; + resp->pkey = (__force __u16) cpu_to_be16( + ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); +} + +static ssize_t ucma_query_addr(struct ucma_context *ctx, + void __user *response, int out_len) +{ + struct rdma_ucm_query_addr_resp resp; + struct sockaddr *addr; + int ret = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + memset(&resp, 0, sizeof resp); + + addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; + resp.src_size = rdma_addr_size(addr); + memcpy(&resp.src_addr, addr, resp.src_size); + + addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; + resp.dst_size = rdma_addr_size(addr); + memcpy(&resp.dst_addr, addr, resp.dst_size); + + ucma_query_device_addr(ctx->cm_id, &resp); + + if (copy_to_user(response, &resp, sizeof(resp))) + ret = -EFAULT; + + return ret; +} + +static ssize_t ucma_query_path(struct ucma_context *ctx, + void __user *response, int out_len) +{ + struct rdma_ucm_query_path_resp *resp; + int i, ret = 0; + + if (out_len < sizeof(*resp)) + return -ENOSPC; + + resp = kzalloc(out_len, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + resp->num_paths = ctx->cm_id->route.num_paths; + for (i = 0, out_len -= sizeof(*resp); + i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); + i++, out_len -= sizeof(struct ib_path_rec_data)) { + + resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | + IB_PATH_BIDIRECTIONAL; + ib_sa_pack_path(&ctx->cm_id->route.path_rec[i], + &resp->path_data[i].path_rec); + } + + if (copy_to_user(response, resp, + sizeof(*resp) + (i * sizeof(struct ib_path_rec_data)))) + ret = -EFAULT; + + kfree(resp); + return ret; +} + +static ssize_t ucma_query_gid(struct ucma_context *ctx, + void __user *response, int out_len) +{ + struct rdma_ucm_query_addr_resp resp; + struct sockaddr_ib *addr; + int ret = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + memset(&resp, 0, sizeof resp); + + ucma_query_device_addr(ctx->cm_id, &resp); + + addr = (struct sockaddr_ib *) &resp.src_addr; + resp.src_size = sizeof(*addr); + if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) { + memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size); + } else { + addr->sib_family = AF_IB; + addr->sib_pkey = (__force __be16) resp.pkey; + rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr, + (union ib_gid *) &addr->sib_addr); + addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) + &ctx->cm_id->route.addr.src_addr); + } + + addr = (struct sockaddr_ib *) &resp.dst_addr; + resp.dst_size = sizeof(*addr); + if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) { + memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size); + } else { + addr->sib_family = AF_IB; + addr->sib_pkey = (__force __be16) resp.pkey; + rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr, + (union ib_gid *) &addr->sib_addr); + addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) + &ctx->cm_id->route.addr.dst_addr); + } + + if (copy_to_user(response, &resp, sizeof(resp))) + ret = -EFAULT; + + return ret; +} + +static ssize_t ucma_query(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_query cmd; + struct ucma_context *ctx; + void __user *response; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + response = (void __user *)(unsigned long) cmd.response; + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + switch (cmd.option) { + case RDMA_USER_CM_QUERY_ADDR: + ret = ucma_query_addr(ctx, response, out_len); + break; + case RDMA_USER_CM_QUERY_PATH: + ret = ucma_query_path(ctx, response, out_len); + break; + case RDMA_USER_CM_QUERY_GID: + ret = ucma_query_gid(ctx, response, out_len); + break; + default: + ret = -ENOSYS; + break; + } + + ucma_put_ctx(ctx); + return ret; +} + +static void ucma_copy_conn_param(struct rdma_cm_id *id, + struct rdma_conn_param *dst, struct rdma_ucm_conn_param *src) { dst->private_data = src->private_data; @@ -721,6 +929,7 @@ static void ucma_copy_conn_param(struct rdma_conn_param *dst, dst->rnr_retry_count = src->rnr_retry_count; dst->srq = src->srq; dst->qp_num = src->qp_num; + dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0; } static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, @@ -741,7 +950,7 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); - ucma_copy_conn_param(&conn_param, &cmd.conn_param); + ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); ret = rdma_connect(ctx->cm_id, &conn_param); ucma_put_ctx(ctx); return ret; @@ -784,7 +993,7 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, return PTR_ERR(ctx); if (cmd.conn_param.valid) { - ucma_copy_conn_param(&conn_param, &cmd.conn_param); + ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); mutex_lock(&file->mut); ret = rdma_accept(ctx->cm_id, &conn_param); if (!ret) @@ -1020,23 +1229,23 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, return ret; } -static ssize_t ucma_join_multicast(struct ucma_file *file, - const char __user *inbuf, - int in_len, int out_len) +static ssize_t ucma_process_join(struct ucma_file *file, + struct rdma_ucm_join_mcast *cmd, int out_len) { - struct rdma_ucm_join_mcast cmd; struct rdma_ucm_create_id_resp resp; struct ucma_context *ctx; struct ucma_multicast *mc; + struct sockaddr *addr; int ret; if (out_len < sizeof(resp)) return -ENOSPC; - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; + addr = (struct sockaddr *) &cmd->addr; + if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) + return -EINVAL; - ctx = ucma_get_ctx(file, cmd.id); + ctx = ucma_get_ctx(file, cmd->id); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1047,14 +1256,14 @@ static ssize_t ucma_join_multicast(struct ucma_file *file, goto err1; } - mc->uid = cmd.uid; - memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr); + mc->uid = cmd->uid; + memcpy(&mc->addr, addr, cmd->addr_size); ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc); if (ret) goto err2; resp.id = mc->id; - if (copy_to_user((void __user *)(unsigned long)cmd.response, + if (copy_to_user((void __user *)(unsigned long) cmd->response, &resp, sizeof(resp))) { ret = -EFAULT; goto err3; @@ -1079,6 +1288,38 @@ err1: return ret; } +static ssize_t ucma_join_ip_multicast(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_join_ip_mcast cmd; + struct rdma_ucm_join_mcast join_cmd; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + join_cmd.response = cmd.response; + join_cmd.uid = cmd.uid; + join_cmd.id = cmd.id; + join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr); + join_cmd.reserved = 0; + memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); + + return ucma_process_join(file, &join_cmd, out_len); +} + +static ssize_t ucma_join_multicast(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_join_mcast cmd; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + return ucma_process_join(file, &cmd, out_len); +} + static ssize_t ucma_leave_multicast(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) @@ -1221,25 +1462,29 @@ file_put: static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) = { - [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, - [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, - [RDMA_USER_CM_CMD_BIND_ADDR] = ucma_bind_addr, - [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, - [RDMA_USER_CM_CMD_RESOLVE_ROUTE]= ucma_resolve_route, - [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, - [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, - [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, - [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, - [RDMA_USER_CM_CMD_REJECT] = ucma_reject, - [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, - [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, - [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, - [RDMA_USER_CM_CMD_GET_OPTION] = NULL, - [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, - [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, - [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, - [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, - [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id + [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, + [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, + [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip, + [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip, + [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route, + [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, + [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, + [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, + [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, + [RDMA_USER_CM_CMD_REJECT] = ucma_reject, + [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, + [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, + [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, + [RDMA_USER_CM_CMD_GET_OPTION] = NULL, + [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, + [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, + [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast, + [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, + [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id, + [RDMA_USER_CM_CMD_QUERY] = ucma_query, + [RDMA_USER_CM_CMD_BIND] = ucma_bind, + [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, + [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast }; static ssize_t ucma_write(struct file *filp, const char __user *buf, diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a7d00f6..b3c07b0 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -334,7 +334,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, resp.num_comp_vectors = file->device->num_comp_vectors; - ret = get_unused_fd(); + ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) goto err_free; resp.async_fd = ret; @@ -1184,7 +1184,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - ret = get_unused_fd(); + ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) return ret; resp.fd = ret; diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index e5649e8..b57c0be 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -883,7 +883,8 @@ u16 iwch_rqes_posted(struct iwch_qp *qhp) { union t3_wr *wqe = qhp->wq.queue; u16 count = 0; - while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { + + while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { count++; wqe++; } diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 982e3ef..cd8d290 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -211,6 +211,7 @@ static int ehca_create_slab_caches(void) if (!ctblk_cache) { ehca_gen_err("Cannot create ctblk SLAB cache."); ehca_cleanup_small_qp_cache(); + ret = -ENOMEM; goto create_slab_caches6; } #endif diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig new file mode 100644 index 0000000..8e6aebf --- /dev/null +++ b/drivers/infiniband/hw/mlx5/Kconfig @@ -0,0 +1,10 @@ +config MLX5_INFINIBAND + tristate "Mellanox Connect-IB HCA support" + depends on NETDEVICES && ETHERNET && PCI && X86 + select NET_VENDOR_MELLANOX + select MLX5_CORE + ---help--- + This driver provides low-level InfiniBand support for + Mellanox Connect-IB PCI Express host channel adapters (HCAs). + This is required to use InfiniBand protocols such as + IP-over-IB or SRP with these devices. diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile new file mode 100644 index 0000000..4ea0135 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o + +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c new file mode 100644 index 0000000..39ab0ca --- /dev/null +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx5_ib.h" + +struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, + struct mlx5_ib_ah *ah) +{ + if (ah_attr->ah_flags & IB_AH_GRH) { + memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16); + ah->av.grh_gid_fl = cpu_to_be32(ah_attr->grh.flow_label | + (1 << 30) | + ah_attr->grh.sgid_index << 20); + ah->av.hop_limit = ah_attr->grh.hop_limit; + ah->av.tclass = ah_attr->grh.traffic_class; + } + + ah->av.rlid = cpu_to_be16(ah_attr->dlid); + ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f; + ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf); + + return &ah->ibah; +} + +struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + struct mlx5_ib_ah *ah; + + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + return create_ib_ah(ah_attr, ah); /* never fails */ +} + +int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) +{ + struct mlx5_ib_ah *ah = to_mah(ibah); + u32 tmp; + + memset(ah_attr, 0, sizeof(*ah_attr)); + + tmp = be32_to_cpu(ah->av.grh_gid_fl); + if (tmp & (1 << 30)) { + ah_attr->ah_flags = IB_AH_GRH; + ah_attr->grh.sgid_index = (tmp >> 20) & 0xff; + ah_attr->grh.flow_label = tmp & 0xfffff; + memcpy(&ah_attr->grh.dgid, ah->av.rgid, 16); + ah_attr->grh.hop_limit = ah->av.hop_limit; + ah_attr->grh.traffic_class = ah->av.tclass; + } + ah_attr->dlid = be16_to_cpu(ah->av.rlid); + ah_attr->static_rate = ah->av.stat_rate_sl >> 4; + ah_attr->sl = ah->av.stat_rate_sl & 0xf; + + return 0; +} + +int mlx5_ib_destroy_ah(struct ib_ah *ah) +{ + kfree(to_mah(ah)); + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c new file mode 100644 index 0000000..344ab03 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -0,0 +1,843 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kref.h> +#include <rdma/ib_umem.h> +#include "mlx5_ib.h" +#include "user.h" + +static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq) +{ + struct ib_cq *ibcq = &to_mibcq(cq)->ibcq; + + ibcq->comp_handler(ibcq, ibcq->cq_context); +} + +static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type) +{ + struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq); + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct ib_cq *ibcq = &cq->ibcq; + struct ib_event event; + + if (type != MLX5_EVENT_TYPE_CQ_ERROR) { + mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n", + type, mcq->cqn); + return; + } + + if (ibcq->event_handler) { + event.device = &dev->ib_dev; + event.event = IB_EVENT_CQ_ERR; + event.element.cq = ibcq; + ibcq->event_handler(&event, ibcq->cq_context); + } +} + +static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size) +{ + return mlx5_buf_offset(&buf->buf, n * size); +} + +static void *get_cqe(struct mlx5_ib_cq *cq, int n) +{ + return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz); +} + +static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n) +{ + void *cqe = get_cqe(cq, n & cq->ibcq.cqe); + struct mlx5_cqe64 *cqe64; + + cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; + return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ + !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; +} + +static void *next_cqe_sw(struct mlx5_ib_cq *cq) +{ + return get_sw_cqe(cq, cq->mcq.cons_index); +} + +static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx) +{ + switch (wq->wr_data[idx]) { + case MLX5_IB_WR_UMR: + return 0; + + case IB_WR_LOCAL_INV: + return IB_WC_LOCAL_INV; + + case IB_WR_FAST_REG_MR: + return IB_WC_FAST_REG_MR; + + default: + pr_warn("unknown completion status\n"); + return 0; + } +} + +static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe, + struct mlx5_ib_wq *wq, int idx) +{ + wc->wc_flags = 0; + switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) { + case MLX5_OPCODE_RDMA_WRITE_IMM: + wc->wc_flags |= IB_WC_WITH_IMM; + case MLX5_OPCODE_RDMA_WRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case MLX5_OPCODE_SEND_IMM: + wc->wc_flags |= IB_WC_WITH_IMM; + case MLX5_OPCODE_SEND: + case MLX5_OPCODE_SEND_INVAL: + wc->opcode = IB_WC_SEND; + break; + case MLX5_OPCODE_RDMA_READ: + wc->opcode = IB_WC_RDMA_READ; + wc->byte_len = be32_to_cpu(cqe->byte_cnt); + break; + case MLX5_OPCODE_ATOMIC_CS: + wc->opcode = IB_WC_COMP_SWAP; + wc->byte_len = 8; + break; + case MLX5_OPCODE_ATOMIC_FA: + wc->opcode = IB_WC_FETCH_ADD; + wc->byte_len = 8; + break; + case MLX5_OPCODE_ATOMIC_MASKED_CS: + wc->opcode = IB_WC_MASKED_COMP_SWAP; + wc->byte_len = 8; + break; + case MLX5_OPCODE_ATOMIC_MASKED_FA: + wc->opcode = IB_WC_MASKED_FETCH_ADD; + wc->byte_len = 8; + break; + case MLX5_OPCODE_BIND_MW: + wc->opcode = IB_WC_BIND_MW; + break; + case MLX5_OPCODE_UMR: + wc->opcode = get_umr_comp(wq, idx); + break; + } +} + +enum { + MLX5_GRH_IN_BUFFER = 1, + MLX5_GRH_IN_CQE = 2, +}; + +static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, + struct mlx5_ib_qp *qp) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); + struct mlx5_ib_srq *srq; + struct mlx5_ib_wq *wq; + u16 wqe_ctr; + u8 g; + + if (qp->ibqp.srq || qp->ibqp.xrcd) { + struct mlx5_core_srq *msrq = NULL; + + if (qp->ibqp.xrcd) { + msrq = mlx5_core_get_srq(&dev->mdev, + be32_to_cpu(cqe->srqn)); + srq = to_mibsrq(msrq); + } else { + srq = to_msrq(qp->ibqp.srq); + } + if (srq) { + wqe_ctr = be16_to_cpu(cqe->wqe_counter); + wc->wr_id = srq->wrid[wqe_ctr]; + mlx5_ib_free_srq_wqe(srq, wqe_ctr); + if (msrq && atomic_dec_and_test(&msrq->refcount)) + complete(&msrq->free); + } + } else { + wq = &qp->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } + wc->byte_len = be32_to_cpu(cqe->byte_cnt); + + switch (cqe->op_own >> 4) { + case MLX5_CQE_RESP_WR_IMM: + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->imm_inval_pkey; + break; + case MLX5_CQE_RESP_SEND: + wc->opcode = IB_WC_RECV; + wc->wc_flags = 0; + break; + case MLX5_CQE_RESP_SEND_IMM: + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->imm_inval_pkey; + break; + case MLX5_CQE_RESP_SEND_INV: + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_INVALIDATE; + wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey); + break; + } + wc->slid = be16_to_cpu(cqe->slid); + wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf; + wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff; + wc->dlid_path_bits = cqe->ml_path; + g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; + wc->wc_flags |= g ? IB_WC_GRH : 0; + wc->pkey_index = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff; +} + +static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe) +{ + __be32 *p = (__be32 *)cqe; + int i; + + mlx5_ib_warn(dev, "dump error cqe\n"); + for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4) + pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]), + be32_to_cpu(p[1]), be32_to_cpu(p[2]), + be32_to_cpu(p[3])); +} + +static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, + struct mlx5_err_cqe *cqe, + struct ib_wc *wc) +{ + int dump = 1; + + switch (cqe->syndrome) { + case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR: + wc->status = IB_WC_LOC_LEN_ERR; + break; + case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR: + wc->status = IB_WC_LOC_QP_OP_ERR; + break; + case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR: + wc->status = IB_WC_LOC_PROT_ERR; + break; + case MLX5_CQE_SYNDROME_WR_FLUSH_ERR: + dump = 0; + wc->status = IB_WC_WR_FLUSH_ERR; + break; + case MLX5_CQE_SYNDROME_MW_BIND_ERR: + wc->status = IB_WC_MW_BIND_ERR; + break; + case MLX5_CQE_SYNDROME_BAD_RESP_ERR: + wc->status = IB_WC_BAD_RESP_ERR; + break; + case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: + wc->status = IB_WC_REM_INV_REQ_ERR; + break; + case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR: + wc->status = IB_WC_REM_ACCESS_ERR; + break; + case MLX5_CQE_SYNDROME_REMOTE_OP_ERR: + wc->status = IB_WC_REM_OP_ERR; + break; + case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: + wc->status = IB_WC_RETRY_EXC_ERR; + dump = 0; + break; + case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR: + wc->status = IB_WC_RNR_RETRY_EXC_ERR; + dump = 0; + break; + case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR: + wc->status = IB_WC_REM_ABORT_ERR; + break; + default: + wc->status = IB_WC_GENERAL_ERR; + break; + } + + wc->vendor_err = cqe->vendor_err_synd; + if (dump) + dump_cqe(dev, cqe); +} + +static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx) +{ + /* TBD: waiting decision + */ + return 0; +} + +static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx) +{ + struct mlx5_wqe_data_seg *dpseg; + void *addr; + + dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_raddr_seg) + + sizeof(struct mlx5_wqe_atomic_seg); + addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr); + return addr; +} + +static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, + uint16_t idx) +{ + void *addr; + int byte_count; + int i; + + if (!is_atomic_response(qp, idx)) + return; + + byte_count = be32_to_cpu(cqe64->byte_cnt); + addr = mlx5_get_atomic_laddr(qp, idx); + + if (byte_count == 4) { + *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr)); + } else { + for (i = 0; i < byte_count; i += 8) { + *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr)); + addr += 8; + } + } + + return; +} + +static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, + u16 tail, u16 head) +{ + int idx; + + do { + idx = tail & (qp->sq.wqe_cnt - 1); + handle_atomic(qp, cqe64, idx); + if (idx == head) + break; + + tail = qp->sq.w_list[idx].next; + } while (1); + tail = qp->sq.w_list[idx].next; + qp->sq.last_poll = tail; +} + +static int mlx5_poll_one(struct mlx5_ib_cq *cq, + struct mlx5_ib_qp **cur_qp, + struct ib_wc *wc) +{ + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct mlx5_err_cqe *err_cqe; + struct mlx5_cqe64 *cqe64; + struct mlx5_core_qp *mqp; + struct mlx5_ib_wq *wq; + uint8_t opcode; + uint32_t qpn; + u16 wqe_ctr; + void *cqe; + int idx; + + cqe = next_cqe_sw(cq); + if (!cqe) + return -EAGAIN; + + cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; + + ++cq->mcq.cons_index; + + /* Make sure we read CQ entry contents after we've checked the + * ownership bit. + */ + rmb(); + + /* TBD: resize CQ */ + + qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff; + if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) { + /* We do not have to take the QP table lock here, + * because CQs will be locked while QPs are removed + * from the table. + */ + mqp = __mlx5_qp_lookup(&dev->mdev, qpn); + if (unlikely(!mqp)) { + mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n", + cq->mcq.cqn, qpn); + return -EINVAL; + } + + *cur_qp = to_mibqp(mqp); + } + + wc->qp = &(*cur_qp)->ibqp; + opcode = cqe64->op_own >> 4; + switch (opcode) { + case MLX5_CQE_REQ: + wq = &(*cur_qp)->sq; + wqe_ctr = be16_to_cpu(cqe64->wqe_counter); + idx = wqe_ctr & (wq->wqe_cnt - 1); + handle_good_req(wc, cqe64, wq, idx); + handle_atomics(*cur_qp, cqe64, wq->last_poll, idx); + wc->wr_id = wq->wrid[idx]; + wq->tail = wq->wqe_head[idx] + 1; + wc->status = IB_WC_SUCCESS; + break; + case MLX5_CQE_RESP_WR_IMM: + case MLX5_CQE_RESP_SEND: + case MLX5_CQE_RESP_SEND_IMM: + case MLX5_CQE_RESP_SEND_INV: + handle_responder(wc, cqe64, *cur_qp); + wc->status = IB_WC_SUCCESS; + break; + case MLX5_CQE_RESIZE_CQ: + break; + case MLX5_CQE_REQ_ERR: + case MLX5_CQE_RESP_ERR: + err_cqe = (struct mlx5_err_cqe *)cqe64; + mlx5_handle_error_cqe(dev, err_cqe, wc); + mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n", + opcode == MLX5_CQE_REQ_ERR ? + "Requestor" : "Responder", cq->mcq.cqn); + mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n", + err_cqe->syndrome, err_cqe->vendor_err_synd); + if (opcode == MLX5_CQE_REQ_ERR) { + wq = &(*cur_qp)->sq; + wqe_ctr = be16_to_cpu(cqe64->wqe_counter); + idx = wqe_ctr & (wq->wqe_cnt - 1); + wc->wr_id = wq->wrid[idx]; + wq->tail = wq->wqe_head[idx] + 1; + } else { + struct mlx5_ib_srq *srq; + + if ((*cur_qp)->ibqp.srq) { + srq = to_msrq((*cur_qp)->ibqp.srq); + wqe_ctr = be16_to_cpu(cqe64->wqe_counter); + wc->wr_id = srq->wrid[wqe_ctr]; + mlx5_ib_free_srq_wqe(srq, wqe_ctr); + } else { + wq = &(*cur_qp)->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } + } + break; + } + + return 0; +} + +int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct mlx5_ib_cq *cq = to_mcq(ibcq); + struct mlx5_ib_qp *cur_qp = NULL; + unsigned long flags; + int npolled; + int err = 0; + + spin_lock_irqsave(&cq->lock, flags); + + for (npolled = 0; npolled < num_entries; npolled++) { + err = mlx5_poll_one(cq, &cur_qp, wc + npolled); + if (err) + break; + } + + if (npolled) + mlx5_cq_set_ci(&cq->mcq); + + spin_unlock_irqrestore(&cq->lock, flags); + + if (err == 0 || err == -EAGAIN) + return npolled; + else + return err; +} + +int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + mlx5_cq_arm(&to_mcq(ibcq)->mcq, + (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? + MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT, + to_mdev(ibcq->device)->mdev.priv.uuari.uars[0].map, + MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->mdev.priv.cq_uar_lock)); + + return 0; +} + +static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, + int nent, int cqe_size) +{ + int err; + + err = mlx5_buf_alloc(&dev->mdev, nent * cqe_size, + PAGE_SIZE * 2, &buf->buf); + if (err) + return err; + + buf->cqe_size = cqe_size; + + return 0; +} + +static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) +{ + mlx5_buf_free(&dev->mdev, &buf->buf); +} + +static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, + struct ib_ucontext *context, struct mlx5_ib_cq *cq, + int entries, struct mlx5_create_cq_mbox_in **cqb, + int *cqe_size, int *index, int *inlen) +{ + struct mlx5_ib_create_cq ucmd; + int page_shift; + int npages; + int ncont; + int err; + + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + return -EFAULT; + + if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128) + return -EINVAL; + + *cqe_size = ucmd.cqe_size; + + cq->buf.umem = ib_umem_get(context, ucmd.buf_addr, + entries * ucmd.cqe_size, + IB_ACCESS_LOCAL_WRITE, 1); + if (IS_ERR(cq->buf.umem)) { + err = PTR_ERR(cq->buf.umem); + return err; + } + + err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr, + &cq->db); + if (err) + goto err_umem; + + mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift, + &ncont, NULL); + mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", + ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); + + *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont; + *cqb = mlx5_vzalloc(*inlen); + if (!*cqb) { + err = -ENOMEM; + goto err_db; + } + mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0); + (*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; + + *index = to_mucontext(context)->uuari.uars[0].index; + + return 0; + +err_db: + mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); + +err_umem: + ib_umem_release(cq->buf.umem); + return err; +} + +static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context) +{ + mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); + ib_umem_release(cq->buf.umem); +} + +static void init_cq_buf(struct mlx5_ib_cq *cq, int nent) +{ + int i; + void *cqe; + struct mlx5_cqe64 *cqe64; + + for (i = 0; i < nent; i++) { + cqe = get_cqe(cq, i); + cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64; + cqe64->op_own = 0xf1; + } +} + +static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, + int entries, int cqe_size, + struct mlx5_create_cq_mbox_in **cqb, + int *index, int *inlen) +{ + int err; + + err = mlx5_db_alloc(&dev->mdev, &cq->db); + if (err) + return err; + + cq->mcq.set_ci_db = cq->db.db; + cq->mcq.arm_db = cq->db.db + 1; + *cq->mcq.set_ci_db = 0; + *cq->mcq.arm_db = 0; + cq->mcq.cqe_sz = cqe_size; + + err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size); + if (err) + goto err_db; + + init_cq_buf(cq, entries); + + *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages; + *cqb = mlx5_vzalloc(*inlen); + if (!*cqb) { + err = -ENOMEM; + goto err_buf; + } + mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas); + + (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT; + *index = dev->mdev.priv.uuari.uars[0].index; + + return 0; + +err_buf: + free_cq_buf(dev, &cq->buf); + +err_db: + mlx5_db_free(&dev->mdev, &cq->db); + return err; +} + +static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) +{ + free_cq_buf(dev, &cq->buf); + mlx5_db_free(&dev->mdev, &cq->db); +} + +struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, + int vector, struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx5_create_cq_mbox_in *cqb = NULL; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_cq *cq; + int uninitialized_var(index); + int uninitialized_var(inlen); + int cqe_size; + int irqn; + int eqn; + int err; + + entries = roundup_pow_of_two(entries + 1); + if (entries < 1 || entries > dev->mdev.caps.max_cqes) + return ERR_PTR(-EINVAL); + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + return ERR_PTR(-ENOMEM); + + cq->ibcq.cqe = entries - 1; + mutex_init(&cq->resize_mutex); + spin_lock_init(&cq->lock); + cq->resize_buf = NULL; + cq->resize_umem = NULL; + + if (context) { + err = create_cq_user(dev, udata, context, cq, entries, + &cqb, &cqe_size, &index, &inlen); + if (err) + goto err_create; + } else { + /* for now choose 64 bytes till we have a proper interface */ + cqe_size = 64; + err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb, + &index, &inlen); + if (err) + goto err_create; + } + + cq->cqe_size = cqe_size; + cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; + cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index); + err = mlx5_vector2eqn(dev, vector, &eqn, &irqn); + if (err) + goto err_cqb; + + cqb->ctx.c_eqn = cpu_to_be16(eqn); + cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma); + + err = mlx5_core_create_cq(&dev->mdev, &cq->mcq, cqb, inlen); + if (err) + goto err_cqb; + + mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); + cq->mcq.irqn = irqn; + cq->mcq.comp = mlx5_ib_cq_comp; + cq->mcq.event = mlx5_ib_cq_event; + + if (context) + if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { + err = -EFAULT; + goto err_cmd; + } + + + mlx5_vfree(cqb); + return &cq->ibcq; + +err_cmd: + mlx5_core_destroy_cq(&dev->mdev, &cq->mcq); + +err_cqb: + mlx5_vfree(cqb); + if (context) + destroy_cq_user(cq, context); + else + destroy_cq_kernel(dev, cq); + +err_create: + kfree(cq); + + return ERR_PTR(err); +} + + +int mlx5_ib_destroy_cq(struct ib_cq *cq) +{ + struct mlx5_ib_dev *dev = to_mdev(cq->device); + struct mlx5_ib_cq *mcq = to_mcq(cq); + struct ib_ucontext *context = NULL; + + if (cq->uobject) + context = cq->uobject->context; + + mlx5_core_destroy_cq(&dev->mdev, &mcq->mcq); + if (context) + destroy_cq_user(mcq, context); + else + destroy_cq_kernel(dev, mcq); + + kfree(mcq); + + return 0; +} + +static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq, + u32 rsn) +{ + u32 lrsn; + + if (srq) + lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff; + else + lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff; + + return rsn == lrsn; +} + +void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq) +{ + struct mlx5_cqe64 *cqe64, *dest64; + void *cqe, *dest; + u32 prod_index; + int nfreed = 0; + u8 owner_bit; + + if (!cq) + return; + + /* First we need to find the current producer index, so we + * know where to start cleaning from. It doesn't matter if HW + * adds new entries after this loop -- the QP we're worried + * about is already in RESET, so the new entries won't come + * from our QP and therefore don't need to be checked. + */ + for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++) + if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe) + break; + + /* Now sweep backwards through the CQ, removing CQ entries + * that match our QP by copying older entries on top of them. + */ + while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { + cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); + cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; + if (is_equal_rsn(cqe64, srq, rsn)) { + if (srq) + mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter)); + ++nfreed; + } else if (nfreed) { + dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); + dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64; + owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK; + memcpy(dest, cqe, cq->mcq.cqe_sz); + dest64->op_own = owner_bit | + (dest64->op_own & ~MLX5_CQE_OWNER_MASK); + } + } + + if (nfreed) { + cq->mcq.cons_index += nfreed; + /* Make sure update of buffer contents is done before + * updating consumer index. + */ + wmb(); + mlx5_cq_set_ci(&cq->mcq); + } +} + +void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq) +{ + if (!cq) + return; + + spin_lock_irq(&cq->lock); + __mlx5_ib_cq_clean(cq, qpn, srq); + spin_unlock_irq(&cq->lock); +} + +int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) +{ + return -ENOSYS; +} + +int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) +{ + return -ENOSYS; +} + +int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq) +{ + struct mlx5_ib_cq *cq; + + if (!ibcq) + return 128; + + cq = to_mcq(ibcq); + return cq->cqe_size; +} diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c new file mode 100644 index 0000000..256a233 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kref.h> +#include <linux/slab.h> +#include <rdma/ib_umem.h> + +#include "mlx5_ib.h" + +struct mlx5_ib_user_db_page { + struct list_head list; + struct ib_umem *umem; + unsigned long user_virt; + int refcnt; +}; + +int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, + struct mlx5_db *db) +{ + struct mlx5_ib_user_db_page *page; + struct ib_umem_chunk *chunk; + int err = 0; + + mutex_lock(&context->db_page_mutex); + + list_for_each_entry(page, &context->db_page_list, list) + if (page->user_virt == (virt & PAGE_MASK)) + goto found; + + page = kmalloc(sizeof(*page), GFP_KERNEL); + if (!page) { + err = -ENOMEM; + goto out; + } + + page->user_virt = (virt & PAGE_MASK); + page->refcnt = 0; + page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK, + PAGE_SIZE, 0, 0); + if (IS_ERR(page->umem)) { + err = PTR_ERR(page->umem); + kfree(page); + goto out; + } + + list_add(&page->list, &context->db_page_list); + +found: + chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list); + db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK); + db->u.user_page = page; + ++page->refcnt; + +out: + mutex_unlock(&context->db_page_mutex); + + return err; +} + +void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db) +{ + mutex_lock(&context->db_page_mutex); + + if (!--db->u.user_page->refcnt) { + list_del(&db->u.user_page->list); + ib_umem_release(db->u.user_page->umem); + kfree(db->u.user_page); + } + + mutex_unlock(&context->db_page_mutex); +} diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c new file mode 100644 index 0000000..5c8938b --- /dev/null +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/cmd.h> +#include <rdma/ib_mad.h> +#include <rdma/ib_smi.h> +#include "mlx5_ib.h" + +enum { + MLX5_IB_VENDOR_CLASS1 = 0x9, + MLX5_IB_VENDOR_CLASS2 = 0xa +}; + +int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, + int port, struct ib_wc *in_wc, struct ib_grh *in_grh, + void *in_mad, void *response_mad) +{ + u8 op_modifier = 0; + + /* Key check traps can't be generated unless we have in_wc to + * tell us where to send the trap. + */ + if (ignore_mkey || !in_wc) + op_modifier |= 0x1; + if (ignore_bkey || !in_wc) + op_modifier |= 0x2; + + return mlx5_core_mad_ifc(&dev->mdev, in_mad, response_mad, op_modifier, port); +} + +int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + u16 slid; + int err; + + slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) + return IB_MAD_RESULT_SUCCESS; + + /* Don't process SMInfo queries -- the SMA can't handle them. + */ + if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) + return IB_MAD_RESULT_SUCCESS; + } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || + in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1 || + in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2 || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) { + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) + return IB_MAD_RESULT_SUCCESS; + } else { + return IB_MAD_RESULT_SUCCESS; + } + + err = mlx5_MAD_IFC(to_mdev(ibdev), + mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, + port_num, in_wc, in_grh, in_mad, out_mad); + if (err) + return IB_MAD_RESULT_FAILURE; + + /* set return bit in status of directed route responses */ + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) + /* no response for trap repress */ + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + u16 packet_error; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + + packet_error = be16_to_cpu(out_mad->status); + + dev->mdev.caps.ext_port_cap[port - 1] = (!err && !packet_error) ? + MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0; + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c new file mode 100644 index 0000000..8000fff --- /dev/null +++ b/drivers/infiniband/hw/mlx5/main.c @@ -0,0 +1,1504 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <asm-generic/kmap_types.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/io-mapping.h> +#include <linux/sched.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_umem.h> +#include "user.h" +#include "mlx5_ib.h" + +#define DRIVER_NAME "mlx5_ib" +#define DRIVER_VERSION "1.0" +#define DRIVER_RELDATE "June 2013" + +MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRIVER_VERSION); + +static int prof_sel = 2; +module_param_named(prof_sel, prof_sel, int, 0444); +MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); + +static char mlx5_version[] = + DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" + DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; + +static struct mlx5_profile profile[] = { + [0] = { + .mask = 0, + }, + [1] = { + .mask = MLX5_PROF_MASK_QP_SIZE, + .log_max_qp = 12, + }, + [2] = { + .mask = MLX5_PROF_MASK_QP_SIZE | + MLX5_PROF_MASK_MR_CACHE, + .log_max_qp = 17, + .mr_cache[0] = { + .size = 500, + .limit = 250 + }, + .mr_cache[1] = { + .size = 500, + .limit = 250 + }, + .mr_cache[2] = { + .size = 500, + .limit = 250 + }, + .mr_cache[3] = { + .size = 500, + .limit = 250 + }, + .mr_cache[4] = { + .size = 500, + .limit = 250 + }, + .mr_cache[5] = { + .size = 500, + .limit = 250 + }, + .mr_cache[6] = { + .size = 500, + .limit = 250 + }, + .mr_cache[7] = { + .size = 500, + .limit = 250 + }, + .mr_cache[8] = { + .size = 500, + .limit = 250 + }, + .mr_cache[9] = { + .size = 500, + .limit = 250 + }, + .mr_cache[10] = { + .size = 500, + .limit = 250 + }, + .mr_cache[11] = { + .size = 500, + .limit = 250 + }, + .mr_cache[12] = { + .size = 64, + .limit = 32 + }, + .mr_cache[13] = { + .size = 32, + .limit = 16 + }, + .mr_cache[14] = { + .size = 16, + .limit = 8 + }, + .mr_cache[15] = { + .size = 8, + .limit = 4 + }, + }, +}; + +int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn) +{ + struct mlx5_eq_table *table = &dev->mdev.priv.eq_table; + struct mlx5_eq *eq, *n; + int err = -ENOENT; + + spin_lock(&table->lock); + list_for_each_entry_safe(eq, n, &dev->eqs_list, list) { + if (eq->index == vector) { + *eqn = eq->eqn; + *irqn = eq->irqn; + err = 0; + break; + } + } + spin_unlock(&table->lock); + + return err; +} + +static int alloc_comp_eqs(struct mlx5_ib_dev *dev) +{ + struct mlx5_eq_table *table = &dev->mdev.priv.eq_table; + struct mlx5_eq *eq, *n; + int ncomp_vec; + int nent; + int err; + int i; + + INIT_LIST_HEAD(&dev->eqs_list); + ncomp_vec = table->num_comp_vectors; + nent = MLX5_COMP_EQ_SIZE; + for (i = 0; i < ncomp_vec; i++) { + eq = kzalloc(sizeof(*eq), GFP_KERNEL); + if (!eq) { + err = -ENOMEM; + goto clean; + } + + snprintf(eq->name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); + err = mlx5_create_map_eq(&dev->mdev, eq, + i + MLX5_EQ_VEC_COMP_BASE, nent, 0, + eq->name, + &dev->mdev.priv.uuari.uars[0]); + if (err) { + kfree(eq); + goto clean; + } + mlx5_ib_dbg(dev, "allocated completion EQN %d\n", eq->eqn); + eq->index = i; + spin_lock(&table->lock); + list_add_tail(&eq->list, &dev->eqs_list); + spin_unlock(&table->lock); + } + + dev->num_comp_vectors = ncomp_vec; + return 0; + +clean: + spin_lock(&table->lock); + list_for_each_entry_safe(eq, n, &dev->eqs_list, list) { + list_del(&eq->list); + spin_unlock(&table->lock); + if (mlx5_destroy_unmap_eq(&dev->mdev, eq)) + mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn); + kfree(eq); + spin_lock(&table->lock); + } + spin_unlock(&table->lock); + return err; +} + +static void free_comp_eqs(struct mlx5_ib_dev *dev) +{ + struct mlx5_eq_table *table = &dev->mdev.priv.eq_table; + struct mlx5_eq *eq, *n; + + spin_lock(&table->lock); + list_for_each_entry_safe(eq, n, &dev->eqs_list, list) { + list_del(&eq->list); + spin_unlock(&table->lock); + if (mlx5_destroy_unmap_eq(&dev->mdev, eq)) + mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn); + kfree(eq); + spin_lock(&table->lock); + } + spin_unlock(&table->lock); +} + +static int mlx5_ib_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + int max_rq_sg; + int max_sq_sg; + u64 flags; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memset(props, 0, sizeof(*props)); + + props->fw_ver = ((u64)fw_rev_maj(&dev->mdev) << 32) | + (fw_rev_min(&dev->mdev) << 16) | + fw_rev_sub(&dev->mdev); + props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | + IB_DEVICE_PORT_ACTIVE_EVENT | + IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + flags = dev->mdev.caps.flags; + if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR) + props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; + if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR) + props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; + if (flags & MLX5_DEV_CAP_FLAG_APM) + props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; + props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; + if (flags & MLX5_DEV_CAP_FLAG_XRC) + props->device_cap_flags |= IB_DEVICE_XRC; + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + + props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) & + 0xffffff; + props->vendor_part_id = be16_to_cpup((__be16 *)(out_mad->data + 30)); + props->hw_ver = be32_to_cpup((__be32 *)(out_mad->data + 32)); + memcpy(&props->sys_image_guid, out_mad->data + 4, 8); + + props->max_mr_size = ~0ull; + props->page_size_cap = dev->mdev.caps.min_page_sz; + props->max_qp = 1 << dev->mdev.caps.log_max_qp; + props->max_qp_wr = dev->mdev.caps.max_wqes; + max_rq_sg = dev->mdev.caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg); + max_sq_sg = (dev->mdev.caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) / + sizeof(struct mlx5_wqe_data_seg); + props->max_sge = min(max_rq_sg, max_sq_sg); + props->max_cq = 1 << dev->mdev.caps.log_max_cq; + props->max_cqe = dev->mdev.caps.max_cqes - 1; + props->max_mr = 1 << dev->mdev.caps.log_max_mkey; + props->max_pd = 1 << dev->mdev.caps.log_max_pd; + props->max_qp_rd_atom = dev->mdev.caps.max_ra_req_qp; + props->max_qp_init_rd_atom = dev->mdev.caps.max_ra_res_qp; + props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; + props->max_srq = 1 << dev->mdev.caps.log_max_srq; + props->max_srq_wr = dev->mdev.caps.max_srq_wqes - 1; + props->max_srq_sge = max_rq_sg - 1; + props->max_fast_reg_page_list_len = (unsigned int)-1; + props->local_ca_ack_delay = dev->mdev.caps.local_ca_ack_delay; + props->atomic_cap = dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_ATOMIC ? + IB_ATOMIC_HCA : IB_ATOMIC_NONE; + props->masked_atomic_cap = IB_ATOMIC_HCA; + props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28)); + props->max_mcast_grp = 1 << dev->mdev.caps.log_max_mcg; + props->max_mcast_qp_attach = dev->mdev.caps.max_qp_mcg; + props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * + props->max_mcast_grp; + props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ + +out: + kfree(in_mad); + kfree(out_mad); + + return err; +} + +int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int ext_active_speed; + int err = -ENOMEM; + + if (port < 1 || port > dev->mdev.caps.num_ports) { + mlx5_ib_warn(dev, "invalid port number %d\n", port); + return -EINVAL; + } + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + memset(props, 0, sizeof(*props)); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx5_MAD_IFC(dev, 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) { + mlx5_ib_warn(dev, "err %d\n", err); + goto out; + } + + + props->lid = be16_to_cpup((__be16 *)(out_mad->data + 16)); + props->lmc = out_mad->data[34] & 0x7; + props->sm_lid = be16_to_cpup((__be16 *)(out_mad->data + 18)); + props->sm_sl = out_mad->data[36] & 0xf; + props->state = out_mad->data[32] & 0xf; + props->phys_state = out_mad->data[33] >> 4; + props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20)); + props->gid_tbl_len = out_mad->data[50]; + props->max_msg_sz = 1 << to_mdev(ibdev)->mdev.caps.log_max_msg; + props->pkey_tbl_len = to_mdev(ibdev)->mdev.caps.port[port - 1].pkey_table_len; + props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46)); + props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48)); + props->active_width = out_mad->data[31] & 0xf; + props->active_speed = out_mad->data[35] >> 4; + props->max_mtu = out_mad->data[41] & 0xf; + props->active_mtu = out_mad->data[36] >> 4; + props->subnet_timeout = out_mad->data[51] & 0x1f; + props->max_vl_num = out_mad->data[37] >> 4; + props->init_type_reply = out_mad->data[41] >> 4; + + /* Check if extended speeds (EDR/FDR/...) are supported */ + if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { + ext_active_speed = out_mad->data[62] >> 4; + + switch (ext_active_speed) { + case 1: + props->active_speed = 16; /* FDR */ + break; + case 2: + props->active_speed = 32; /* EDR */ + break; + } + } + + /* If reported active speed is QDR, check if is FDR-10 */ + if (props->active_speed == 4) { + if (dev->mdev.caps.ext_port_cap[port - 1] & + MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { + init_query_mad(in_mad); + in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx5_MAD_IFC(dev, 1, 1, port, + NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + /* Checking LinkSpeedActive for FDR-10 */ + if (out_mad->data[15] & 0x1) + props->active_speed = 8; + } + } + +out: + kfree(in_mad); + kfree(out_mad); + + return err; +} + +static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(gid->raw, out_mad->data + 8, 8); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; + in_mad->attr_mod = cpu_to_be32(index / 8); + + err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; + in_mad->attr_mod = cpu_to_be32(index / 32); + + err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + *pkey = be16_to_cpu(((__be16 *)out_mad->data)[index % 32]); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +struct mlx5_reg_node_desc { + u8 desc[64]; +}; + +static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, + struct ib_device_modify *props) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_reg_node_desc in; + struct mlx5_reg_node_desc out; + int err; + + if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) + return -EOPNOTSUPP; + + if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) + return 0; + + /* + * If possible, pass node desc to FW, so it can generate + * a 144 trap. If cmd fails, just ignore. + */ + memcpy(&in, props->node_desc, 64); + err = mlx5_core_access_reg(&dev->mdev, &in, sizeof(in), &out, + sizeof(out), MLX5_REG_NODE_DESC, 0, 1); + if (err) + return err; + + memcpy(ibdev->node_desc, props->node_desc, 64); + + return err; +} + +static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, + struct ib_port_modify *props) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct ib_port_attr attr; + u32 tmp; + int err; + + mutex_lock(&dev->cap_mask_mutex); + + err = mlx5_ib_query_port(ibdev, port, &attr); + if (err) + goto out; + + tmp = (attr.port_cap_flags | props->set_port_cap_mask) & + ~props->clr_port_cap_mask; + + err = mlx5_set_port_caps(&dev->mdev, port, tmp); + +out: + mutex_unlock(&dev->cap_mask_mutex); + return err; +} + +static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_alloc_ucontext_req req; + struct mlx5_ib_alloc_ucontext_resp resp; + struct mlx5_ib_ucontext *context; + struct mlx5_uuar_info *uuari; + struct mlx5_uar *uars; + int num_uars; + int uuarn; + int err; + int i; + + if (!dev->ib_active) + return ERR_PTR(-EAGAIN); + + err = ib_copy_from_udata(&req, udata, sizeof(req)); + if (err) + return ERR_PTR(err); + + if (req.total_num_uuars > MLX5_MAX_UUARS) + return ERR_PTR(-ENOMEM); + + if (req.total_num_uuars == 0) + return ERR_PTR(-EINVAL); + + req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE); + if (req.num_low_latency_uuars > req.total_num_uuars - 1) + return ERR_PTR(-EINVAL); + + num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE; + resp.qp_tab_size = 1 << dev->mdev.caps.log_max_qp; + resp.bf_reg_size = dev->mdev.caps.bf_reg_size; + resp.cache_line_size = L1_CACHE_BYTES; + resp.max_sq_desc_sz = dev->mdev.caps.max_sq_desc_sz; + resp.max_rq_desc_sz = dev->mdev.caps.max_rq_desc_sz; + resp.max_send_wqebb = dev->mdev.caps.max_wqes; + resp.max_recv_wr = dev->mdev.caps.max_wqes; + resp.max_srq_recv_wr = dev->mdev.caps.max_srq_wqes; + + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + uuari = &context->uuari; + mutex_init(&uuari->lock); + uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL); + if (!uars) { + err = -ENOMEM; + goto out_ctx; + } + + uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars), + sizeof(*uuari->bitmap), + GFP_KERNEL); + if (!uuari->bitmap) { + err = -ENOMEM; + goto out_uar_ctx; + } + /* + * clear all fast path uuars + */ + for (i = 0; i < req.total_num_uuars; i++) { + uuarn = i & 3; + if (uuarn == 2 || uuarn == 3) + set_bit(i, uuari->bitmap); + } + + uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL); + if (!uuari->count) { + err = -ENOMEM; + goto out_bitmap; + } + + for (i = 0; i < num_uars; i++) { + err = mlx5_cmd_alloc_uar(&dev->mdev, &uars[i].index); + if (err) + goto out_count; + } + + INIT_LIST_HEAD(&context->db_page_list); + mutex_init(&context->db_page_mutex); + + resp.tot_uuars = req.total_num_uuars; + resp.num_ports = dev->mdev.caps.num_ports; + err = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (err) + goto out_uars; + + uuari->num_low_latency_uuars = req.num_low_latency_uuars; + uuari->uars = uars; + uuari->num_uars = num_uars; + return &context->ibucontext; + +out_uars: + for (i--; i >= 0; i--) + mlx5_cmd_free_uar(&dev->mdev, uars[i].index); +out_count: + kfree(uuari->count); + +out_bitmap: + kfree(uuari->bitmap); + +out_uar_ctx: + kfree(uars); + +out_ctx: + kfree(context); + return ERR_PTR(err); +} + +static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) +{ + struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); + struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); + struct mlx5_uuar_info *uuari = &context->uuari; + int i; + + for (i = 0; i < uuari->num_uars; i++) { + if (mlx5_cmd_free_uar(&dev->mdev, uuari->uars[i].index)) + mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index); + } + + kfree(uuari->count); + kfree(uuari->bitmap); + kfree(uuari->uars); + kfree(context); + + return 0; +} + +static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index) +{ + return (pci_resource_start(dev->mdev.pdev, 0) >> PAGE_SHIFT) + index; +} + +static int get_command(unsigned long offset) +{ + return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK; +} + +static int get_arg(unsigned long offset) +{ + return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1); +} + +static int get_index(unsigned long offset) +{ + return get_arg(offset); +} + +static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) +{ + struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); + struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); + struct mlx5_uuar_info *uuari = &context->uuari; + unsigned long command; + unsigned long idx; + phys_addr_t pfn; + + command = get_command(vma->vm_pgoff); + switch (command) { + case MLX5_IB_MMAP_REGULAR_PAGE: + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + idx = get_index(vma->vm_pgoff); + pfn = uar_index2pfn(dev, uuari->uars[idx].index); + mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx, + (unsigned long long)pfn); + + if (idx >= uuari->num_uars) + return -EINVAL; + + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + if (io_remap_pfn_range(vma, vma->vm_start, pfn, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + + mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n", + vma->vm_start, + (unsigned long long)pfn << PAGE_SHIFT); + break; + + case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: + return -ENOSYS; + + default: + return -EINVAL; + } + + return 0; +} + +static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn) +{ + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_mkey_seg *seg; + struct mlx5_core_mr mr; + int err; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + seg = &in->seg; + seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA; + seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); + seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + seg->start_addr = 0; + + err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in)); + if (err) { + mlx5_ib_warn(dev, "failed to create mkey, %d\n", err); + goto err_in; + } + + kfree(in); + *key = mr.key; + + return 0; + +err_in: + kfree(in); + + return err; +} + +static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key) +{ + struct mlx5_core_mr mr; + int err; + + memset(&mr, 0, sizeof(mr)); + mr.key = key; + err = mlx5_core_destroy_mkey(&dev->mdev, &mr); + if (err) + mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key); +} + +static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx5_ib_alloc_pd_resp resp; + struct mlx5_ib_pd *pd; + int err; + + pd = kmalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + err = mlx5_core_alloc_pd(&to_mdev(ibdev)->mdev, &pd->pdn); + if (err) { + kfree(pd); + return ERR_PTR(err); + } + + if (context) { + resp.pdn = pd->pdn; + if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { + mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn); + kfree(pd); + return ERR_PTR(-EFAULT); + } + } else { + err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn); + if (err) { + mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn); + kfree(pd); + return ERR_PTR(err); + } + } + + return &pd->ibpd; +} + +static int mlx5_ib_dealloc_pd(struct ib_pd *pd) +{ + struct mlx5_ib_dev *mdev = to_mdev(pd->device); + struct mlx5_ib_pd *mpd = to_mpd(pd); + + if (!pd->uobject) + free_pa_mkey(mdev, mpd->pa_lkey); + + mlx5_core_dealloc_pd(&mdev->mdev, mpd->pdn); + kfree(mpd); + + return 0; +} + +static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + int err; + + err = mlx5_core_attach_mcg(&dev->mdev, gid, ibqp->qp_num); + if (err) + mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", + ibqp->qp_num, gid->raw); + + return err; +} + +static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + int err; + + err = mlx5_core_detach_mcg(&dev->mdev, gid, ibqp->qp_num); + if (err) + mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n", + ibqp->qp_num, gid->raw); + + return err; +} + +static int init_node_data(struct mlx5_ib_dev *dev) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; + + err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(dev->ib_dev.node_desc, out_mad->data, 64); + + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + dev->mdev.rev_id = be32_to_cpup((__be32 *)(out_mad->data + 32)); + memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + + return sprintf(buf, "%d\n", dev->mdev.priv.fw_pages); +} + +static ssize_t show_reg_pages(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + + return sprintf(buf, "%d\n", dev->mdev.priv.reg_pages); +} + +static ssize_t show_hca(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + return sprintf(buf, "MT%d\n", dev->mdev.pdev->device); +} + +static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(&dev->mdev), + fw_rev_min(&dev->mdev), fw_rev_sub(&dev->mdev)); +} + +static ssize_t show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + return sprintf(buf, "%x\n", dev->mdev.rev_id); +} + +static ssize_t show_board(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, + dev->mdev.board_id); +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); +static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); +static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL); + +static struct device_attribute *mlx5_class_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_fw_ver, + &dev_attr_hca_type, + &dev_attr_board_id, + &dev_attr_fw_pages, + &dev_attr_reg_pages, +}; + +static void mlx5_ib_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, + void *data) +{ + struct mlx5_ib_dev *ibdev = container_of(dev, struct mlx5_ib_dev, mdev); + struct ib_event ibev; + u8 port = 0; + + switch (event) { + case MLX5_DEV_EVENT_SYS_ERROR: + ibdev->ib_active = false; + ibev.event = IB_EVENT_DEVICE_FATAL; + break; + + case MLX5_DEV_EVENT_PORT_UP: + ibev.event = IB_EVENT_PORT_ACTIVE; + port = *(u8 *)data; + break; + + case MLX5_DEV_EVENT_PORT_DOWN: + ibev.event = IB_EVENT_PORT_ERR; + port = *(u8 *)data; + break; + + case MLX5_DEV_EVENT_PORT_INITIALIZED: + /* not used by ULPs */ + return; + + case MLX5_DEV_EVENT_LID_CHANGE: + ibev.event = IB_EVENT_LID_CHANGE; + port = *(u8 *)data; + break; + + case MLX5_DEV_EVENT_PKEY_CHANGE: + ibev.event = IB_EVENT_PKEY_CHANGE; + port = *(u8 *)data; + break; + + case MLX5_DEV_EVENT_GUID_CHANGE: + ibev.event = IB_EVENT_GID_CHANGE; + port = *(u8 *)data; + break; + + case MLX5_DEV_EVENT_CLIENT_REREG: + ibev.event = IB_EVENT_CLIENT_REREGISTER; + port = *(u8 *)data; + break; + } + + ibev.device = &ibdev->ib_dev; + ibev.element.port_num = port; + + if (ibdev->ib_active) + ib_dispatch_event(&ibev); +} + +static void get_ext_port_caps(struct mlx5_ib_dev *dev) +{ + int port; + + for (port = 1; port <= dev->mdev.caps.num_ports; port++) + mlx5_query_ext_port_caps(dev, port); +} + +static int get_port_caps(struct mlx5_ib_dev *dev) +{ + struct ib_device_attr *dprops = NULL; + struct ib_port_attr *pprops = NULL; + int err = 0; + int port; + + pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); + if (!pprops) + goto out; + + dprops = kmalloc(sizeof(*dprops), GFP_KERNEL); + if (!dprops) + goto out; + + err = mlx5_ib_query_device(&dev->ib_dev, dprops); + if (err) { + mlx5_ib_warn(dev, "query_device failed %d\n", err); + goto out; + } + + for (port = 1; port <= dev->mdev.caps.num_ports; port++) { + err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); + if (err) { + mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err); + break; + } + dev->mdev.caps.port[port - 1].pkey_table_len = dprops->max_pkeys; + dev->mdev.caps.port[port - 1].gid_table_len = pprops->gid_tbl_len; + mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", + dprops->max_pkeys, pprops->gid_tbl_len); + } + +out: + kfree(pprops); + kfree(dprops); + + return err; +} + +static void destroy_umrc_res(struct mlx5_ib_dev *dev) +{ + int err; + + err = mlx5_mr_cache_cleanup(dev); + if (err) + mlx5_ib_warn(dev, "mr cache cleanup failed\n"); + + mlx5_ib_destroy_qp(dev->umrc.qp); + ib_destroy_cq(dev->umrc.cq); + ib_dereg_mr(dev->umrc.mr); + ib_dealloc_pd(dev->umrc.pd); +} + +enum { + MAX_UMR_WR = 128, +}; + +static int create_umr_res(struct mlx5_ib_dev *dev) +{ + struct ib_qp_init_attr *init_attr = NULL; + struct ib_qp_attr *attr = NULL; + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_qp *qp; + struct ib_mr *mr; + int ret; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); + if (!attr || !init_attr) { + ret = -ENOMEM; + goto error_0; + } + + pd = ib_alloc_pd(&dev->ib_dev); + if (IS_ERR(pd)) { + mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); + ret = PTR_ERR(pd); + goto error_0; + } + + mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(mr)) { + mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n"); + ret = PTR_ERR(mr); + goto error_1; + } + + cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, 128, + 0); + if (IS_ERR(cq)) { + mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); + ret = PTR_ERR(cq); + goto error_2; + } + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + + init_attr->send_cq = cq; + init_attr->recv_cq = cq; + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->cap.max_send_wr = MAX_UMR_WR; + init_attr->cap.max_send_sge = 1; + init_attr->qp_type = MLX5_IB_QPT_REG_UMR; + init_attr->port_num = 1; + qp = mlx5_ib_create_qp(pd, init_attr, NULL); + if (IS_ERR(qp)) { + mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); + ret = PTR_ERR(qp); + goto error_3; + } + qp->device = &dev->ib_dev; + qp->real_qp = qp; + qp->uobject = NULL; + qp->qp_type = MLX5_IB_QPT_REG_UMR; + + attr->qp_state = IB_QPS_INIT; + attr->port_num = 1; + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | + IB_QP_PORT, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); + goto error_4; + } + + memset(attr, 0, sizeof(*attr)); + attr->qp_state = IB_QPS_RTR; + attr->path_mtu = IB_MTU_256; + + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); + goto error_4; + } + + memset(attr, 0, sizeof(*attr)); + attr->qp_state = IB_QPS_RTS; + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); + goto error_4; + } + + dev->umrc.qp = qp; + dev->umrc.cq = cq; + dev->umrc.mr = mr; + dev->umrc.pd = pd; + + sema_init(&dev->umrc.sem, MAX_UMR_WR); + ret = mlx5_mr_cache_init(dev); + if (ret) { + mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); + goto error_4; + } + + kfree(attr); + kfree(init_attr); + + return 0; + +error_4: + mlx5_ib_destroy_qp(qp); + +error_3: + ib_destroy_cq(cq); + +error_2: + ib_dereg_mr(mr); + +error_1: + ib_dealloc_pd(pd); + +error_0: + kfree(attr); + kfree(init_attr); + return ret; +} + +static int create_dev_resources(struct mlx5_ib_resources *devr) +{ + struct ib_srq_init_attr attr; + struct mlx5_ib_dev *dev; + int ret = 0; + + dev = container_of(devr, struct mlx5_ib_dev, devr); + + devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); + if (IS_ERR(devr->p0)) { + ret = PTR_ERR(devr->p0); + goto error0; + } + devr->p0->device = &dev->ib_dev; + devr->p0->uobject = NULL; + atomic_set(&devr->p0->usecnt, 0); + + devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL); + if (IS_ERR(devr->c0)) { + ret = PTR_ERR(devr->c0); + goto error1; + } + devr->c0->device = &dev->ib_dev; + devr->c0->uobject = NULL; + devr->c0->comp_handler = NULL; + devr->c0->event_handler = NULL; + devr->c0->cq_context = NULL; + atomic_set(&devr->c0->usecnt, 0); + + devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); + if (IS_ERR(devr->x0)) { + ret = PTR_ERR(devr->x0); + goto error2; + } + devr->x0->device = &dev->ib_dev; + devr->x0->inode = NULL; + atomic_set(&devr->x0->usecnt, 0); + mutex_init(&devr->x0->tgt_qp_mutex); + INIT_LIST_HEAD(&devr->x0->tgt_qp_list); + + devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); + if (IS_ERR(devr->x1)) { + ret = PTR_ERR(devr->x1); + goto error3; + } + devr->x1->device = &dev->ib_dev; + devr->x1->inode = NULL; + atomic_set(&devr->x1->usecnt, 0); + mutex_init(&devr->x1->tgt_qp_mutex); + INIT_LIST_HEAD(&devr->x1->tgt_qp_list); + + memset(&attr, 0, sizeof(attr)); + attr.attr.max_sge = 1; + attr.attr.max_wr = 1; + attr.srq_type = IB_SRQT_XRC; + attr.ext.xrc.cq = devr->c0; + attr.ext.xrc.xrcd = devr->x0; + + devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); + if (IS_ERR(devr->s0)) { + ret = PTR_ERR(devr->s0); + goto error4; + } + devr->s0->device = &dev->ib_dev; + devr->s0->pd = devr->p0; + devr->s0->uobject = NULL; + devr->s0->event_handler = NULL; + devr->s0->srq_context = NULL; + devr->s0->srq_type = IB_SRQT_XRC; + devr->s0->ext.xrc.xrcd = devr->x0; + devr->s0->ext.xrc.cq = devr->c0; + atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); + atomic_inc(&devr->s0->ext.xrc.cq->usecnt); + atomic_inc(&devr->p0->usecnt); + atomic_set(&devr->s0->usecnt, 0); + + return 0; + +error4: + mlx5_ib_dealloc_xrcd(devr->x1); +error3: + mlx5_ib_dealloc_xrcd(devr->x0); +error2: + mlx5_ib_destroy_cq(devr->c0); +error1: + mlx5_ib_dealloc_pd(devr->p0); +error0: + return ret; +} + +static void destroy_dev_resources(struct mlx5_ib_resources *devr) +{ + mlx5_ib_destroy_srq(devr->s0); + mlx5_ib_dealloc_xrcd(devr->x0); + mlx5_ib_dealloc_xrcd(devr->x1); + mlx5_ib_destroy_cq(devr->c0); + mlx5_ib_dealloc_pd(devr->p0); +} + +static int init_one(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct mlx5_core_dev *mdev; + struct mlx5_ib_dev *dev; + int err; + int i; + + printk_once(KERN_INFO "%s", mlx5_version); + + dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); + if (!dev) + return -ENOMEM; + + mdev = &dev->mdev; + mdev->event = mlx5_ib_event; + if (prof_sel >= ARRAY_SIZE(profile)) { + pr_warn("selected pofile out of range, selceting default\n"); + prof_sel = 0; + } + mdev->profile = &profile[prof_sel]; + err = mlx5_dev_init(mdev, pdev); + if (err) + goto err_free; + + err = get_port_caps(dev); + if (err) + goto err_cleanup; + + get_ext_port_caps(dev); + + err = alloc_comp_eqs(dev); + if (err) + goto err_cleanup; + + MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock); + + strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); + dev->ib_dev.owner = THIS_MODULE; + dev->ib_dev.node_type = RDMA_NODE_IB_CA; + dev->ib_dev.local_dma_lkey = mdev->caps.reserved_lkey; + dev->num_ports = mdev->caps.num_ports; + dev->ib_dev.phys_port_cnt = dev->num_ports; + dev->ib_dev.num_comp_vectors = dev->num_comp_vectors; + dev->ib_dev.dma_device = &mdev->pdev->dev; + + dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION; + dev->ib_dev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | + (1ull << IB_USER_VERBS_CMD_OPEN_QP); + + dev->ib_dev.query_device = mlx5_ib_query_device; + dev->ib_dev.query_port = mlx5_ib_query_port; + dev->ib_dev.query_gid = mlx5_ib_query_gid; + dev->ib_dev.query_pkey = mlx5_ib_query_pkey; + dev->ib_dev.modify_device = mlx5_ib_modify_device; + dev->ib_dev.modify_port = mlx5_ib_modify_port; + dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext; + dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext; + dev->ib_dev.mmap = mlx5_ib_mmap; + dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd; + dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd; + dev->ib_dev.create_ah = mlx5_ib_create_ah; + dev->ib_dev.query_ah = mlx5_ib_query_ah; + dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah; + dev->ib_dev.create_srq = mlx5_ib_create_srq; + dev->ib_dev.modify_srq = mlx5_ib_modify_srq; + dev->ib_dev.query_srq = mlx5_ib_query_srq; + dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq; + dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv; + dev->ib_dev.create_qp = mlx5_ib_create_qp; + dev->ib_dev.modify_qp = mlx5_ib_modify_qp; + dev->ib_dev.query_qp = mlx5_ib_query_qp; + dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; + dev->ib_dev.post_send = mlx5_ib_post_send; + dev->ib_dev.post_recv = mlx5_ib_post_recv; + dev->ib_dev.create_cq = mlx5_ib_create_cq; + dev->ib_dev.modify_cq = mlx5_ib_modify_cq; + dev->ib_dev.resize_cq = mlx5_ib_resize_cq; + dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq; + dev->ib_dev.poll_cq = mlx5_ib_poll_cq; + dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; + dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; + dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; + dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; + dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; + dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; + dev->ib_dev.process_mad = mlx5_ib_process_mad; + dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr; + dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; + dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; + + if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) { + dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; + dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; + dev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | + (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + } + + err = init_node_data(dev); + if (err) + goto err_eqs; + + mutex_init(&dev->cap_mask_mutex); + spin_lock_init(&dev->mr_lock); + + err = create_dev_resources(&dev->devr); + if (err) + goto err_eqs; + + if (ib_register_device(&dev->ib_dev, NULL)) + goto err_rsrc; + + err = create_umr_res(dev); + if (err) + goto err_dev; + + for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { + if (device_create_file(&dev->ib_dev.dev, + mlx5_class_attributes[i])) + goto err_umrc; + } + + dev->ib_active = true; + + return 0; + +err_umrc: + destroy_umrc_res(dev); + +err_dev: + ib_unregister_device(&dev->ib_dev); + +err_rsrc: + destroy_dev_resources(&dev->devr); + +err_eqs: + free_comp_eqs(dev); + +err_cleanup: + mlx5_dev_cleanup(mdev); + +err_free: + ib_dealloc_device((struct ib_device *)dev); + + return err; +} + +static void remove_one(struct pci_dev *pdev) +{ + struct mlx5_ib_dev *dev = mlx5_pci2ibdev(pdev); + + destroy_umrc_res(dev); + ib_unregister_device(&dev->ib_dev); + destroy_dev_resources(&dev->devr); + free_comp_eqs(dev); + mlx5_dev_cleanup(&dev->mdev); + ib_dealloc_device(&dev->ib_dev); +} + +static DEFINE_PCI_DEVICE_TABLE(mlx5_ib_pci_table) = { + { PCI_VDEVICE(MELLANOX, 4113) }, /* MT4113 Connect-IB */ + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, mlx5_ib_pci_table); + +static struct pci_driver mlx5_ib_driver = { + .name = DRIVER_NAME, + .id_table = mlx5_ib_pci_table, + .probe = init_one, + .remove = remove_one +}; + +static int __init mlx5_ib_init(void) +{ + return pci_register_driver(&mlx5_ib_driver); +} + +static void __exit mlx5_ib_cleanup(void) +{ + pci_unregister_driver(&mlx5_ib_driver); +} + +module_init(mlx5_ib_init); +module_exit(mlx5_ib_cleanup); diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c new file mode 100644 index 0000000..3a53228 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <rdma/ib_umem.h> +#include "mlx5_ib.h" + +/* @umem: umem object to scan + * @addr: ib virtual address requested by the user + * @count: number of PAGE_SIZE pages covered by umem + * @shift: page shift for the compound pages found in the region + * @ncont: number of compund pages + * @order: log2 of the number of compound pages + */ +void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, + int *ncont, int *order) +{ + struct ib_umem_chunk *chunk; + unsigned long tmp; + unsigned long m; + int i, j, k; + u64 base = 0; + int p = 0; + int skip; + int mask; + u64 len; + u64 pfn; + + addr = addr >> PAGE_SHIFT; + tmp = (unsigned long)addr; + m = find_first_bit(&tmp, sizeof(tmp)); + skip = 1 << m; + mask = skip - 1; + i = 0; + list_for_each_entry(chunk, &umem->chunk_list, list) + for (j = 0; j < chunk->nmap; j++) { + len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT; + pfn = sg_dma_address(&chunk->page_list[j]) >> PAGE_SHIFT; + for (k = 0; k < len; k++) { + if (!(i & mask)) { + tmp = (unsigned long)pfn; + m = min(m, find_first_bit(&tmp, sizeof(tmp))); + skip = 1 << m; + mask = skip - 1; + base = pfn; + p = 0; + } else { + if (base + p != pfn) { + tmp = (unsigned long)p; + m = find_first_bit(&tmp, sizeof(tmp)); + skip = 1 << m; + mask = skip - 1; + base = pfn; + p = 0; + } + } + p++; + i++; + } + } + + if (i) { + m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); + + if (order) + *order = ilog2(roundup_pow_of_two(i) >> m); + + *ncont = DIV_ROUND_UP(i, (1 << m)); + } else { + m = 0; + + if (order) + *order = 0; + + *ncont = 0; + } + *shift = PAGE_SHIFT + m; + *count = i; +} + +void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, + int page_shift, __be64 *pas, int umr) +{ + int shift = page_shift - PAGE_SHIFT; + int mask = (1 << shift) - 1; + struct ib_umem_chunk *chunk; + int i, j, k; + u64 cur = 0; + u64 base; + int len; + + i = 0; + list_for_each_entry(chunk, &umem->chunk_list, list) + for (j = 0; j < chunk->nmap; j++) { + len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT; + base = sg_dma_address(&chunk->page_list[j]); + for (k = 0; k < len; k++) { + if (!(i & mask)) { + cur = base + (k << PAGE_SHIFT); + if (umr) + cur |= 3; + + pas[i >> shift] = cpu_to_be64(cur); + mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", + i >> shift, be64_to_cpu(pas[i >> shift])); + } else + mlx5_ib_dbg(dev, "=====> 0x%llx\n", + base + (k << PAGE_SHIFT)); + i++; + } + } +} + +int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) +{ + u64 page_size; + u64 page_mask; + u64 off_size; + u64 off_mask; + u64 buf_off; + + page_size = 1 << page_shift; + page_mask = page_size - 1; + buf_off = addr & page_mask; + off_size = page_size >> 6; + off_mask = off_size - 1; + + if (buf_off & off_mask) + return -EINVAL; + + *offset = buf_off >> ilog2(off_size); + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h new file mode 100644 index 0000000..836be91 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -0,0 +1,545 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_IB_H +#define MLX5_IB_H + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_smi.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/srq.h> +#include <linux/types.h> + +#define mlx5_ib_dbg(dev, format, arg...) \ +pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ + __LINE__, current->pid, ##arg) + +#define mlx5_ib_err(dev, format, arg...) \ +pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ + __LINE__, current->pid, ##arg) + +#define mlx5_ib_warn(dev, format, arg...) \ +pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ + __LINE__, current->pid, ##arg) + +enum { + MLX5_IB_MMAP_CMD_SHIFT = 8, + MLX5_IB_MMAP_CMD_MASK = 0xff, +}; + +enum mlx5_ib_mmap_cmd { + MLX5_IB_MMAP_REGULAR_PAGE = 0, + MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, /* always last */ +}; + +enum { + MLX5_RES_SCAT_DATA32_CQE = 0x1, + MLX5_RES_SCAT_DATA64_CQE = 0x2, + MLX5_REQ_SCAT_DATA32_CQE = 0x11, + MLX5_REQ_SCAT_DATA64_CQE = 0x22, +}; + +enum mlx5_ib_latency_class { + MLX5_IB_LATENCY_CLASS_LOW, + MLX5_IB_LATENCY_CLASS_MEDIUM, + MLX5_IB_LATENCY_CLASS_HIGH, + MLX5_IB_LATENCY_CLASS_FAST_PATH +}; + +enum mlx5_ib_mad_ifc_flags { + MLX5_MAD_IFC_IGNORE_MKEY = 1, + MLX5_MAD_IFC_IGNORE_BKEY = 2, + MLX5_MAD_IFC_NET_VIEW = 4, +}; + +struct mlx5_ib_ucontext { + struct ib_ucontext ibucontext; + struct list_head db_page_list; + + /* protect doorbell record alloc/free + */ + struct mutex db_page_mutex; + struct mlx5_uuar_info uuari; +}; + +static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct mlx5_ib_ucontext, ibucontext); +} + +struct mlx5_ib_pd { + struct ib_pd ibpd; + u32 pdn; + u32 pa_lkey; +}; + +/* Use macros here so that don't have to duplicate + * enum ib_send_flags and enum ib_qp_type for low-level driver + */ + +#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START +#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1 +#define MLX5_IB_WR_UMR IB_WR_RESERVED1 + +struct wr_list { + u16 opcode; + u16 next; +}; + +struct mlx5_ib_wq { + u64 *wrid; + u32 *wr_data; + struct wr_list *w_list; + unsigned *wqe_head; + u16 unsig_count; + + /* serialize post to the work queue + */ + spinlock_t lock; + int wqe_cnt; + int max_post; + int max_gs; + int offset; + int wqe_shift; + unsigned head; + unsigned tail; + u16 cur_post; + u16 last_poll; + void *qend; +}; + +enum { + MLX5_QP_USER, + MLX5_QP_KERNEL, + MLX5_QP_EMPTY +}; + +struct mlx5_ib_qp { + struct ib_qp ibqp; + struct mlx5_core_qp mqp; + struct mlx5_buf buf; + + struct mlx5_db db; + struct mlx5_ib_wq rq; + + u32 doorbell_qpn; + u8 sq_signal_bits; + u8 fm_cache; + int sq_max_wqes_per_wr; + int sq_spare_wqes; + struct mlx5_ib_wq sq; + + struct ib_umem *umem; + int buf_size; + + /* serialize qp state modifications + */ + struct mutex mutex; + u16 xrcdn; + u32 flags; + u8 port; + u8 alt_port; + u8 atomic_rd_en; + u8 resp_depth; + u8 state; + int mlx_type; + int wq_sig; + int scat_cqe; + int max_inline_data; + struct mlx5_bf *bf; + int has_rq; + + /* only for user space QPs. For kernel + * we have it from the bf object + */ + int uuarn; + + int create_type; + u32 pa_lkey; +}; + +struct mlx5_ib_cq_buf { + struct mlx5_buf buf; + struct ib_umem *umem; + int cqe_size; +}; + +enum mlx5_ib_qp_flags { + MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0, + MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1, +}; + +struct mlx5_shared_mr_info { + int mr_id; + struct ib_umem *umem; +}; + +struct mlx5_ib_cq { + struct ib_cq ibcq; + struct mlx5_core_cq mcq; + struct mlx5_ib_cq_buf buf; + struct mlx5_db db; + + /* serialize access to the CQ + */ + spinlock_t lock; + + /* protect resize cq + */ + struct mutex resize_mutex; + struct mlx5_ib_cq_resize *resize_buf; + struct ib_umem *resize_umem; + int cqe_size; +}; + +struct mlx5_ib_srq { + struct ib_srq ibsrq; + struct mlx5_core_srq msrq; + struct mlx5_buf buf; + struct mlx5_db db; + u64 *wrid; + /* protect SRQ hanlding + */ + spinlock_t lock; + int head; + int tail; + u16 wqe_ctr; + struct ib_umem *umem; + /* serialize arming a SRQ + */ + struct mutex mutex; + int wq_sig; +}; + +struct mlx5_ib_xrcd { + struct ib_xrcd ibxrcd; + u32 xrcdn; +}; + +struct mlx5_ib_mr { + struct ib_mr ibmr; + struct mlx5_core_mr mmr; + struct ib_umem *umem; + struct mlx5_shared_mr_info *smr_info; + struct list_head list; + int order; + int umred; + __be64 *pas; + dma_addr_t dma; + int npages; + struct completion done; + enum ib_wc_status status; +}; + +struct mlx5_ib_fast_reg_page_list { + struct ib_fast_reg_page_list ibfrpl; + __be64 *mapped_page_list; + dma_addr_t map; +}; + +struct umr_common { + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_qp *qp; + struct ib_mr *mr; + /* control access to UMR QP + */ + struct semaphore sem; +}; + +enum { + MLX5_FMR_INVALID, + MLX5_FMR_VALID, + MLX5_FMR_BUSY, +}; + +struct mlx5_ib_fmr { + struct ib_fmr ibfmr; + struct mlx5_core_mr mr; + int access_flags; + int state; + /* protect fmr state + */ + spinlock_t lock; + u64 wrid; + struct ib_send_wr wr[2]; + u8 page_shift; + struct ib_fast_reg_page_list page_list; +}; + +struct mlx5_cache_ent { + struct list_head head; + /* sync access to the cahce entry + */ + spinlock_t lock; + + + struct dentry *dir; + char name[4]; + u32 order; + u32 size; + u32 cur; + u32 miss; + u32 limit; + + struct dentry *fsize; + struct dentry *fcur; + struct dentry *fmiss; + struct dentry *flimit; + + struct mlx5_ib_dev *dev; + struct work_struct work; + struct delayed_work dwork; +}; + +struct mlx5_mr_cache { + struct workqueue_struct *wq; + struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES]; + int stopped; + struct dentry *root; + unsigned long last_add; +}; + +struct mlx5_ib_resources { + struct ib_cq *c0; + struct ib_xrcd *x0; + struct ib_xrcd *x1; + struct ib_pd *p0; + struct ib_srq *s0; +}; + +struct mlx5_ib_dev { + struct ib_device ib_dev; + struct mlx5_core_dev mdev; + MLX5_DECLARE_DOORBELL_LOCK(uar_lock); + struct list_head eqs_list; + int num_ports; + int num_comp_vectors; + /* serialize update of capability mask + */ + struct mutex cap_mask_mutex; + bool ib_active; + struct umr_common umrc; + /* sync used page count stats + */ + spinlock_t mr_lock; + struct mlx5_ib_resources devr; + struct mlx5_mr_cache cache; +}; + +static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) +{ + return container_of(mcq, struct mlx5_ib_cq, mcq); +} + +static inline struct mlx5_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd) +{ + return container_of(ibxrcd, struct mlx5_ib_xrcd, ibxrcd); +} + +static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct mlx5_ib_dev, ib_dev); +} + +static inline struct mlx5_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct mlx5_ib_fmr, ibfmr); +} + +static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct mlx5_ib_cq, ibcq); +} + +static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) +{ + return container_of(mqp, struct mlx5_ib_qp, mqp); +} + +static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct mlx5_ib_pd, ibpd); +} + +static inline struct mlx5_ib_srq *to_msrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct mlx5_ib_srq, ibsrq); +} + +static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct mlx5_ib_qp, ibqp); +} + +static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq) +{ + return container_of(msrq, struct mlx5_ib_srq, msrq); +} + +static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct mlx5_ib_mr, ibmr); +} + +static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) +{ + return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl); +} + +struct mlx5_ib_ah { + struct ib_ah ibah; + struct mlx5_av av; +}; + +static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah) +{ + return container_of(ibah, struct mlx5_ib_ah, ibah); +} + +static inline struct mlx5_ib_dev *mlx5_core2ibdev(struct mlx5_core_dev *dev) +{ + return container_of(dev, struct mlx5_ib_dev, mdev); +} + +static inline struct mlx5_ib_dev *mlx5_pci2ibdev(struct pci_dev *pdev) +{ + return mlx5_core2ibdev(pci2mlx5_core_dev(pdev)); +} + +int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, + struct mlx5_db *db); +void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db); +void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); +void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); +void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); +int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, + int port, struct ib_wc *in_wc, struct ib_grh *in_grh, + void *in_mad, void *response_mad); +struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, + struct mlx5_ib_ah *ah); +struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); +int mlx5_ib_destroy_ah(struct ib_ah *ah); +struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); +int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); +int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); +int mlx5_ib_destroy_srq(struct ib_srq *srq); +int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); +int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); +int mlx5_ib_destroy_qp(struct ib_qp *qp); +int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); +struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, + int vector, struct ib_ucontext *context, + struct ib_udata *udata); +int mlx5_ib_destroy_cq(struct ib_cq *cq); +int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); +int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); +int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); +struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); +struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata); +int mlx5_ib_dereg_mr(struct ib_mr *ibmr); +struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len); +struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len); +void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); +struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc, + struct ib_fmr_attr *fmr_attr); +int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int npages, u64 iova); +int mlx5_ib_unmap_fmr(struct list_head *fmr_list); +int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr); +int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad); +struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata); +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd); +int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn); +int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); +int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); +int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); +int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev); +void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev); +void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, + int *ncont, int *order); +void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, + int page_shift, __be64 *pas, int umr); +void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); +int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); +int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); +int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); +int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); +void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context); + +static inline void init_query_mad(struct ib_smp *mad) +{ + mad->base_version = 1; + mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->class_version = 1; + mad->method = IB_MGMT_METHOD_GET; +} + +static inline u8 convert_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | + MLX5_PERM_LOCAL_READ; +} + +#endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c new file mode 100644 index 0000000..bd41df9 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -0,0 +1,1007 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include <linux/kref.h> +#include <linux/random.h> +#include <linux/debugfs.h> +#include <linux/export.h> +#include <rdma/ib_umem.h> +#include "mlx5_ib.h" + +enum { + DEF_CACHE_SIZE = 10, +}; + +static __be64 *mr_align(__be64 *ptr, int align) +{ + unsigned long mask = align - 1; + + return (__be64 *)(((unsigned long)ptr + mask) & ~mask); +} + +static int order2idx(struct mlx5_ib_dev *dev, int order) +{ + struct mlx5_mr_cache *cache = &dev->cache; + + if (order < cache->ent[0].order) + return 0; + else + return order - cache->ent[0].order; +} + +static int add_keys(struct mlx5_ib_dev *dev, int c, int num) +{ + struct device *ddev = dev->ib_dev.dma_device; + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent = &cache->ent[c]; + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_ib_mr *mr; + int npages = 1 << ent->order; + int size = sizeof(u64) * npages; + int err = 0; + int i; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + for (i = 0; i < num; i++) { + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + err = -ENOMEM; + goto out; + } + mr->order = ent->order; + mr->umred = 1; + mr->pas = kmalloc(size + 0x3f, GFP_KERNEL); + if (!mr->pas) { + kfree(mr); + err = -ENOMEM; + goto out; + } + mr->dma = dma_map_single(ddev, mr_align(mr->pas, 0x40), size, + DMA_TO_DEVICE); + if (dma_mapping_error(ddev, mr->dma)) { + kfree(mr->pas); + kfree(mr); + err = -ENOMEM; + goto out; + } + + in->seg.status = 1 << 6; + in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; + in->seg.log2_page_size = 12; + + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, + sizeof(*in)); + if (err) { + mlx5_ib_warn(dev, "create mkey failed %d\n", err); + dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); + kfree(mr->pas); + kfree(mr); + goto out; + } + cache->last_add = jiffies; + + spin_lock(&ent->lock); + list_add_tail(&mr->list, &ent->head); + ent->cur++; + ent->size++; + spin_unlock(&ent->lock); + } + +out: + kfree(in); + return err; +} + +static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) +{ + struct device *ddev = dev->ib_dev.dma_device; + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent = &cache->ent[c]; + struct mlx5_ib_mr *mr; + int size; + int err; + int i; + + for (i = 0; i < num; i++) { + spin_lock(&ent->lock); + if (list_empty(&ent->head)) { + spin_unlock(&ent->lock); + return; + } + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->cur--; + ent->size--; + spin_unlock(&ent->lock); + err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); + if (err) { + mlx5_ib_warn(dev, "failed destroy mkey\n"); + } else { + size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40); + dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); + kfree(mr->pas); + kfree(mr); + } + } +} + +static ssize_t size_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_cache_ent *ent = filp->private_data; + struct mlx5_ib_dev *dev = ent->dev; + char lbuf[20]; + u32 var; + int err; + int c; + + if (copy_from_user(lbuf, buf, sizeof(lbuf))) + return -EFAULT; + + c = order2idx(dev, ent->order); + lbuf[sizeof(lbuf) - 1] = 0; + + if (sscanf(lbuf, "%u", &var) != 1) + return -EINVAL; + + if (var < ent->limit) + return -EINVAL; + + if (var > ent->size) { + err = add_keys(dev, c, var - ent->size); + if (err) + return err; + } else if (var < ent->size) { + remove_keys(dev, c, ent->size - var); + } + + return count; +} + +static ssize_t size_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_cache_ent *ent = filp->private_data; + char lbuf[20]; + int err; + + if (*pos) + return 0; + + err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); + if (err < 0) + return err; + + if (copy_to_user(buf, lbuf, err)) + return -EFAULT; + + *pos += err; + + return err; +} + +static const struct file_operations size_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = size_write, + .read = size_read, +}; + +static ssize_t limit_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_cache_ent *ent = filp->private_data; + struct mlx5_ib_dev *dev = ent->dev; + char lbuf[20]; + u32 var; + int err; + int c; + + if (copy_from_user(lbuf, buf, sizeof(lbuf))) + return -EFAULT; + + c = order2idx(dev, ent->order); + lbuf[sizeof(lbuf) - 1] = 0; + + if (sscanf(lbuf, "%u", &var) != 1) + return -EINVAL; + + if (var > ent->size) + return -EINVAL; + + ent->limit = var; + + if (ent->cur < ent->limit) { + err = add_keys(dev, c, 2 * ent->limit - ent->cur); + if (err) + return err; + } + + return count; +} + +static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_cache_ent *ent = filp->private_data; + char lbuf[20]; + int err; + + if (*pos) + return 0; + + err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); + if (err < 0) + return err; + + if (copy_to_user(buf, lbuf, err)) + return -EFAULT; + + *pos += err; + + return err; +} + +static const struct file_operations limit_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = limit_write, + .read = limit_read, +}; + +static int someone_adding(struct mlx5_mr_cache *cache) +{ + int i; + + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + if (cache->ent[i].cur < cache->ent[i].limit) + return 1; + } + + return 0; +} + +static void __cache_work_func(struct mlx5_cache_ent *ent) +{ + struct mlx5_ib_dev *dev = ent->dev; + struct mlx5_mr_cache *cache = &dev->cache; + int i = order2idx(dev, ent->order); + + if (cache->stopped) + return; + + ent = &dev->cache.ent[i]; + if (ent->cur < 2 * ent->limit) { + add_keys(dev, i, 1); + if (ent->cur < 2 * ent->limit) + queue_work(cache->wq, &ent->work); + } else if (ent->cur > 2 * ent->limit) { + if (!someone_adding(cache) && + time_after(jiffies, cache->last_add + 60 * HZ)) { + remove_keys(dev, i, 1); + if (ent->cur > ent->limit) + queue_work(cache->wq, &ent->work); + } else { + queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ); + } + } +} + +static void delayed_cache_work_func(struct work_struct *work) +{ + struct mlx5_cache_ent *ent; + + ent = container_of(work, struct mlx5_cache_ent, dwork.work); + __cache_work_func(ent); +} + +static void cache_work_func(struct work_struct *work) +{ + struct mlx5_cache_ent *ent; + + ent = container_of(work, struct mlx5_cache_ent, work); + __cache_work_func(ent); +} + +static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_ib_mr *mr = NULL; + struct mlx5_cache_ent *ent; + int c; + int i; + + c = order2idx(dev, order); + if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { + mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); + return NULL; + } + + for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { + ent = &cache->ent[i]; + + mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); + + spin_lock(&ent->lock); + if (!list_empty(&ent->head)) { + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, + list); + list_del(&mr->list); + ent->cur--; + spin_unlock(&ent->lock); + if (ent->cur < ent->limit) + queue_work(cache->wq, &ent->work); + break; + } + spin_unlock(&ent->lock); + + queue_work(cache->wq, &ent->work); + + if (mr) + break; + } + + if (!mr) + cache->ent[c].miss++; + + return mr; +} + +static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + int shrink = 0; + int c; + + c = order2idx(dev, mr->order); + if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { + mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); + return; + } + ent = &cache->ent[c]; + spin_lock(&ent->lock); + list_add_tail(&mr->list, &ent->head); + ent->cur++; + if (ent->cur > 2 * ent->limit) + shrink = 1; + spin_unlock(&ent->lock); + + if (shrink) + queue_work(cache->wq, &ent->work); +} + +static void clean_keys(struct mlx5_ib_dev *dev, int c) +{ + struct device *ddev = dev->ib_dev.dma_device; + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent = &cache->ent[c]; + struct mlx5_ib_mr *mr; + int size; + int err; + + while (1) { + spin_lock(&ent->lock); + if (list_empty(&ent->head)) { + spin_unlock(&ent->lock); + return; + } + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->cur--; + ent->size--; + spin_unlock(&ent->lock); + err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); + if (err) { + mlx5_ib_warn(dev, "failed destroy mkey\n"); + } else { + size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40); + dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); + kfree(mr->pas); + kfree(mr); + } + } +} + +static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + int i; + + if (!mlx5_debugfs_root) + return 0; + + cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root); + if (!cache->root) + return -ENOMEM; + + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + ent = &cache->ent[i]; + sprintf(ent->name, "%d", ent->order); + ent->dir = debugfs_create_dir(ent->name, cache->root); + if (!ent->dir) + return -ENOMEM; + + ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, + &size_fops); + if (!ent->fsize) + return -ENOMEM; + + ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, + &limit_fops); + if (!ent->flimit) + return -ENOMEM; + + ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, + &ent->cur); + if (!ent->fcur) + return -ENOMEM; + + ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, + &ent->miss); + if (!ent->fmiss) + return -ENOMEM; + } + + return 0; +} + +static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->cache.root); +} + +int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + int limit; + int size; + int err; + int i; + + cache->wq = create_singlethread_workqueue("mkey_cache"); + if (!cache->wq) { + mlx5_ib_warn(dev, "failed to create work queue\n"); + return -ENOMEM; + } + + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + INIT_LIST_HEAD(&cache->ent[i].head); + spin_lock_init(&cache->ent[i].lock); + + ent = &cache->ent[i]; + INIT_LIST_HEAD(&ent->head); + spin_lock_init(&ent->lock); + ent->order = i + 2; + ent->dev = dev; + + if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) { + size = dev->mdev.profile->mr_cache[i].size; + limit = dev->mdev.profile->mr_cache[i].limit; + } else { + size = DEF_CACHE_SIZE; + limit = 0; + } + INIT_WORK(&ent->work, cache_work_func); + INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); + ent->limit = limit; + queue_work(cache->wq, &ent->work); + } + + err = mlx5_mr_cache_debugfs_init(dev); + if (err) + mlx5_ib_warn(dev, "cache debugfs failure\n"); + + return 0; +} + +int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) +{ + int i; + + dev->cache.stopped = 1; + destroy_workqueue(dev->cache.wq); + + mlx5_mr_cache_debugfs_cleanup(dev); + + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) + clean_keys(dev, i); + + return 0; +} + +struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_core_dev *mdev = &dev->mdev; + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_mkey_seg *seg; + struct mlx5_ib_mr *mr; + int err; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + seg = &in->seg; + seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA; + seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64); + seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + seg->start_addr = 0; + + err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in)); + if (err) + goto err_in; + + kfree(in); + mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.rkey = mr->mmr.key; + mr->umem = NULL; + + return &mr->ibmr; + +err_in: + kfree(in); + +err_free: + kfree(mr); + + return ERR_PTR(err); +} + +static int get_octo_len(u64 addr, u64 len, int page_size) +{ + u64 offset; + int npages; + + offset = addr & (page_size - 1); + npages = ALIGN(len + offset, page_size) >> ilog2(page_size); + return (npages + 1) / 2; +} + +static int use_umr(int order) +{ + return order <= 17; +} + +static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, + struct ib_sge *sg, u64 dma, int n, u32 key, + int page_shift, u64 virt_addr, u64 len, + int access_flags) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_mr *mr = dev->umrc.mr; + + sg->addr = dma; + sg->length = ALIGN(sizeof(u64) * n, 64); + sg->lkey = mr->lkey; + + wr->next = NULL; + wr->send_flags = 0; + wr->sg_list = sg; + if (n) + wr->num_sge = 1; + else + wr->num_sge = 0; + + wr->opcode = MLX5_IB_WR_UMR; + wr->wr.fast_reg.page_list_len = n; + wr->wr.fast_reg.page_shift = page_shift; + wr->wr.fast_reg.rkey = key; + wr->wr.fast_reg.iova_start = virt_addr; + wr->wr.fast_reg.length = len; + wr->wr.fast_reg.access_flags = access_flags; + wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd; +} + +static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, + struct ib_send_wr *wr, u32 key) +{ + wr->send_flags = MLX5_IB_SEND_UMR_UNREG; + wr->opcode = MLX5_IB_WR_UMR; + wr->wr.fast_reg.rkey = key; +} + +void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) +{ + struct mlx5_ib_mr *mr; + struct ib_wc wc; + int err; + + while (1) { + err = ib_poll_cq(cq, 1, &wc); + if (err < 0) { + pr_warn("poll cq error %d\n", err); + return; + } + if (err == 0) + break; + + mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id; + mr->status = wc.status; + complete(&mr->done); + } + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); +} + +static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, + u64 virt_addr, u64 len, int npages, + int page_shift, int order, int access_flags) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct umr_common *umrc = &dev->umrc; + struct ib_send_wr wr, *bad; + struct mlx5_ib_mr *mr; + struct ib_sge sg; + int err; + int i; + + for (i = 0; i < 10; i++) { + mr = alloc_cached_mr(dev, order); + if (mr) + break; + + err = add_keys(dev, order2idx(dev, order), 1); + if (err) { + mlx5_ib_warn(dev, "add_keys failed\n"); + break; + } + } + + if (!mr) + return ERR_PTR(-EAGAIN); + + mlx5_ib_populate_pas(dev, umem, page_shift, mr_align(mr->pas, 0x40), 1); + + memset(&wr, 0, sizeof(wr)); + wr.wr_id = (u64)(unsigned long)mr; + prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags); + + /* We serialize polls so one process does not kidnap another's + * completion. This is not a problem since wr is completed in + * around 1 usec + */ + down(&umrc->sem); + init_completion(&mr->done); + err = ib_post_send(umrc->qp, &wr, &bad); + if (err) { + mlx5_ib_warn(dev, "post send failed, err %d\n", err); + up(&umrc->sem); + goto error; + } + wait_for_completion(&mr->done); + up(&umrc->sem); + + if (mr->status != IB_WC_SUCCESS) { + mlx5_ib_warn(dev, "reg umr failed\n"); + err = -EFAULT; + goto error; + } + + return mr; + +error: + free_cached_mr(dev, mr); + return ERR_PTR(err); +} + +static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, + u64 length, struct ib_umem *umem, + int npages, int page_shift, + int access_flags) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_ib_mr *mr; + int inlen; + int err; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2; + in = mlx5_vzalloc(inlen); + if (!in) { + err = -ENOMEM; + goto err_1; + } + mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0); + + in->seg.flags = convert_access(access_flags) | + MLX5_ACCESS_MODE_MTT; + in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + in->seg.start_addr = cpu_to_be64(virt_addr); + in->seg.len = cpu_to_be64(length); + in->seg.bsfs_octo_size = 0; + in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); + in->seg.log2_page_size = page_shift; + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen); + if (err) { + mlx5_ib_warn(dev, "create mkey failed\n"); + goto err_2; + } + mr->umem = umem; + mlx5_vfree(in); + + mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); + + return mr; + +err_2: + mlx5_vfree(in); + +err_1: + kfree(mr); + + return ERR_PTR(err); +} + +struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_mr *mr = NULL; + struct ib_umem *umem; + int page_shift; + int npages; + int ncont; + int order; + int err; + + mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n", + start, virt_addr, length); + umem = ib_umem_get(pd->uobject->context, start, length, access_flags, + 0); + if (IS_ERR(umem)) { + mlx5_ib_dbg(dev, "umem get failed\n"); + return (void *)umem; + } + + mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); + if (!npages) { + mlx5_ib_warn(dev, "avoid zero region\n"); + err = -EINVAL; + goto error; + } + + mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", + npages, ncont, order, page_shift); + + if (use_umr(order)) { + mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, + order, access_flags); + if (PTR_ERR(mr) == -EAGAIN) { + mlx5_ib_dbg(dev, "cache empty for order %d", order); + mr = NULL; + } + } + + if (!mr) + mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, + access_flags); + + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + goto error; + } + + mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); + + mr->umem = umem; + mr->npages = npages; + spin_lock(&dev->mr_lock); + dev->mdev.priv.reg_pages += npages; + spin_unlock(&dev->mr_lock); + mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.rkey = mr->mmr.key; + + return &mr->ibmr; + +error: + ib_umem_release(umem); + return ERR_PTR(err); +} + +static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +{ + struct umr_common *umrc = &dev->umrc; + struct ib_send_wr wr, *bad; + int err; + + memset(&wr, 0, sizeof(wr)); + wr.wr_id = (u64)(unsigned long)mr; + prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); + + down(&umrc->sem); + init_completion(&mr->done); + err = ib_post_send(umrc->qp, &wr, &bad); + if (err) { + up(&umrc->sem); + mlx5_ib_dbg(dev, "err %d\n", err); + goto error; + } + wait_for_completion(&mr->done); + up(&umrc->sem); + if (mr->status != IB_WC_SUCCESS) { + mlx5_ib_warn(dev, "unreg umr failed\n"); + err = -EFAULT; + goto error; + } + return 0; + +error: + return err; +} + +int mlx5_ib_dereg_mr(struct ib_mr *ibmr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibmr->device); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct ib_umem *umem = mr->umem; + int npages = mr->npages; + int umred = mr->umred; + int err; + + if (!umred) { + err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); + if (err) { + mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", + mr->mmr.key, err); + return err; + } + } else { + err = unreg_umr(dev, mr); + if (err) { + mlx5_ib_warn(dev, "failed unregister\n"); + return err; + } + free_cached_mr(dev, mr); + } + + if (umem) { + ib_umem_release(umem); + spin_lock(&dev->mr_lock); + dev->mdev.priv.reg_pages -= npages; + spin_unlock(&dev->mr_lock); + } + + if (!umred) + kfree(mr); + + return 0; +} + +struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_ib_mr *mr; + int err; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + in->seg.status = 1 << 6; /* free */ + in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; + in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + /* + * TBD not needed - issue 197292 */ + in->seg.log2_page_size = PAGE_SHIFT; + + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in)); + kfree(in); + if (err) + goto err_free; + + mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.rkey = mr->mmr.key; + mr->umem = NULL; + + return &mr->ibmr; + +err_free: + kfree(mr); + return ERR_PTR(err); +} + +struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len) +{ + struct mlx5_ib_fast_reg_page_list *mfrpl; + int size = page_list_len * sizeof(u64); + + mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL); + if (!mfrpl) + return ERR_PTR(-ENOMEM); + + mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); + if (!mfrpl->ibfrpl.page_list) + goto err_free; + + mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device, + size, &mfrpl->map, + GFP_KERNEL); + if (!mfrpl->mapped_page_list) + goto err_free; + + WARN_ON(mfrpl->map & 0x3f); + + return &mfrpl->ibfrpl; + +err_free: + kfree(mfrpl->ibfrpl.page_list); + kfree(mfrpl); + return ERR_PTR(-ENOMEM); +} + +void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) +{ + struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); + struct mlx5_ib_dev *dev = to_mdev(page_list->device); + int size = page_list->max_page_list_len * sizeof(u64); + + dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list, + mfrpl->map); + kfree(mfrpl->ibfrpl.page_list); + kfree(mfrpl); +} diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c new file mode 100644 index 0000000..16ac54c --- /dev/null +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -0,0 +1,2524 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <rdma/ib_umem.h> +#include "mlx5_ib.h" +#include "user.h" + +/* not supported currently */ +static int wq_signature; + +enum { + MLX5_IB_ACK_REQ_FREQ = 8, +}; + +enum { + MLX5_IB_DEFAULT_SCHED_QUEUE = 0x83, + MLX5_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f, + MLX5_IB_LINK_TYPE_IB = 0, + MLX5_IB_LINK_TYPE_ETH = 1 +}; + +enum { + MLX5_IB_SQ_STRIDE = 6, + MLX5_IB_CACHE_LINE_SIZE = 64, +}; + +static const u32 mlx5_ib_opcode[] = { + [IB_WR_SEND] = MLX5_OPCODE_SEND, + [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM, + [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM, + [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS, + [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA, + [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL, + [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR, + [IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR, + [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS, + [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA, + [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, +}; + +struct umr_wr { + u64 virt_addr; + struct ib_pd *pd; + unsigned int page_shift; + unsigned int npages; + u32 length; + int access_flags; + u32 mkey; +}; + +static int is_qp0(enum ib_qp_type qp_type) +{ + return qp_type == IB_QPT_SMI; +} + +static int is_qp1(enum ib_qp_type qp_type) +{ + return qp_type == IB_QPT_GSI; +} + +static int is_sqp(enum ib_qp_type qp_type) +{ + return is_qp0(qp_type) || is_qp1(qp_type); +} + +static void *get_wqe(struct mlx5_ib_qp *qp, int offset) +{ + return mlx5_buf_offset(&qp->buf, offset); +} + +static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n) +{ + return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); +} + +void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n) +{ + return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE)); +} + +static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) +{ + struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; + struct ib_event event; + + if (type == MLX5_EVENT_TYPE_PATH_MIG) + to_mibqp(qp)->port = to_mibqp(qp)->alt_port; + + if (ibqp->event_handler) { + event.device = ibqp->device; + event.element.qp = ibqp; + switch (type) { + case MLX5_EVENT_TYPE_PATH_MIG: + event.event = IB_EVENT_PATH_MIG; + break; + case MLX5_EVENT_TYPE_COMM_EST: + event.event = IB_EVENT_COMM_EST; + break; + case MLX5_EVENT_TYPE_SQ_DRAINED: + event.event = IB_EVENT_SQ_DRAINED; + break; + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + event.event = IB_EVENT_QP_LAST_WQE_REACHED; + break; + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + event.event = IB_EVENT_QP_FATAL; + break; + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + event.event = IB_EVENT_PATH_MIG_ERR; + break; + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + event.event = IB_EVENT_QP_REQ_ERR; + break; + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + event.event = IB_EVENT_QP_ACCESS_ERR; + break; + default: + pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn); + return; + } + + ibqp->event_handler(&event, ibqp->qp_context); + } +} + +static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, + int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd) +{ + int wqe_size; + int wq_size; + + /* Sanity check RQ size before proceeding */ + if (cap->max_recv_wr > dev->mdev.caps.max_wqes) + return -EINVAL; + + if (!has_rq) { + qp->rq.max_gs = 0; + qp->rq.wqe_cnt = 0; + qp->rq.wqe_shift = 0; + } else { + if (ucmd) { + qp->rq.wqe_cnt = ucmd->rq_wqe_count; + qp->rq.wqe_shift = ucmd->rq_wqe_shift; + qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig; + qp->rq.max_post = qp->rq.wqe_cnt; + } else { + wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0; + wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg); + wqe_size = roundup_pow_of_two(wqe_size); + wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size; + wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB); + qp->rq.wqe_cnt = wq_size / wqe_size; + if (wqe_size > dev->mdev.caps.max_rq_desc_sz) { + mlx5_ib_dbg(dev, "wqe_size %d, max %d\n", + wqe_size, + dev->mdev.caps.max_rq_desc_sz); + return -EINVAL; + } + qp->rq.wqe_shift = ilog2(wqe_size); + qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig; + qp->rq.max_post = qp->rq.wqe_cnt; + } + } + + return 0; +} + +static int sq_overhead(enum ib_qp_type qp_type) +{ + int size; + + switch (qp_type) { + case IB_QPT_XRC_INI: + size = sizeof(struct mlx5_wqe_xrc_seg); + /* fall through */ + case IB_QPT_RC: + size += sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_atomic_seg) + + sizeof(struct mlx5_wqe_raddr_seg); + break; + + case IB_QPT_UC: + size = sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_raddr_seg); + break; + + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: + size = sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_datagram_seg); + break; + + case MLX5_IB_QPT_REG_UMR: + size = sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_umr_ctrl_seg) + + sizeof(struct mlx5_mkey_seg); + break; + + default: + return -EINVAL; + } + + return size; +} + +static int calc_send_wqe(struct ib_qp_init_attr *attr) +{ + int inl_size = 0; + int size; + + size = sq_overhead(attr->qp_type); + if (size < 0) + return size; + + if (attr->cap.max_inline_data) { + inl_size = size + sizeof(struct mlx5_wqe_inline_seg) + + attr->cap.max_inline_data; + } + + size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg); + + return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); +} + +static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, + struct mlx5_ib_qp *qp) +{ + int wqe_size; + int wq_size; + + if (!attr->cap.max_send_wr) + return 0; + + wqe_size = calc_send_wqe(attr); + mlx5_ib_dbg(dev, "wqe_size %d\n", wqe_size); + if (wqe_size < 0) + return wqe_size; + + if (wqe_size > dev->mdev.caps.max_sq_desc_sz) { + mlx5_ib_dbg(dev, "\n"); + return -EINVAL; + } + + qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) - + sizeof(struct mlx5_wqe_inline_seg); + attr->cap.max_inline_data = qp->max_inline_data; + + wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); + qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; + qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); + qp->sq.max_gs = attr->cap.max_send_sge; + qp->sq.max_post = 1 << ilog2(wq_size / wqe_size); + + return wq_size; +} + +static int set_user_buf_size(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + struct mlx5_ib_create_qp *ucmd) +{ + int desc_sz = 1 << qp->sq.wqe_shift; + + if (desc_sz > dev->mdev.caps.max_sq_desc_sz) { + mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n", + desc_sz, dev->mdev.caps.max_sq_desc_sz); + return -EINVAL; + } + + if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) { + mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n", + ucmd->sq_wqe_count, ucmd->sq_wqe_count); + return -EINVAL; + } + + qp->sq.wqe_cnt = ucmd->sq_wqe_count; + + if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) { + mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n", + qp->sq.wqe_cnt, dev->mdev.caps.max_wqes); + return -EINVAL; + } + + qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + + (qp->sq.wqe_cnt << 6); + + return 0; +} + +static int qp_has_rq(struct ib_qp_init_attr *attr) +{ + if (attr->qp_type == IB_QPT_XRC_INI || + attr->qp_type == IB_QPT_XRC_TGT || attr->srq || + attr->qp_type == MLX5_IB_QPT_REG_UMR || + !attr->cap.max_recv_wr) + return 0; + + return 1; +} + +static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari) +{ + int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; + int start_uuar; + int i; + + start_uuar = nuuars - uuari->num_low_latency_uuars; + for (i = start_uuar; i < nuuars; i++) { + if (!test_bit(i, uuari->bitmap)) { + set_bit(i, uuari->bitmap); + uuari->count[i]++; + return i; + } + } + + return -ENOMEM; +} + +static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari) +{ + int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; + int minidx = 1; + int uuarn; + int end; + int i; + + end = nuuars - uuari->num_low_latency_uuars; + + for (i = 1; i < end; i++) { + uuarn = i & 3; + if (uuarn == 2 || uuarn == 3) + continue; + + if (uuari->count[i] < uuari->count[minidx]) + minidx = i; + } + + uuari->count[minidx]++; + return minidx; +} + +static int alloc_uuar(struct mlx5_uuar_info *uuari, + enum mlx5_ib_latency_class lat) +{ + int uuarn = -EINVAL; + + mutex_lock(&uuari->lock); + switch (lat) { + case MLX5_IB_LATENCY_CLASS_LOW: + uuarn = 0; + uuari->count[uuarn]++; + break; + + case MLX5_IB_LATENCY_CLASS_MEDIUM: + uuarn = alloc_med_class_uuar(uuari); + break; + + case MLX5_IB_LATENCY_CLASS_HIGH: + uuarn = alloc_high_class_uuar(uuari); + break; + + case MLX5_IB_LATENCY_CLASS_FAST_PATH: + uuarn = 2; + break; + } + mutex_unlock(&uuari->lock); + + return uuarn; +} + +static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn) +{ + clear_bit(uuarn, uuari->bitmap); + --uuari->count[uuarn]; +} + +static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn) +{ + clear_bit(uuarn, uuari->bitmap); + --uuari->count[uuarn]; +} + +static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn) +{ + int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; + int high_uuar = nuuars - uuari->num_low_latency_uuars; + + mutex_lock(&uuari->lock); + if (uuarn == 0) { + --uuari->count[uuarn]; + goto out; + } + + if (uuarn < high_uuar) { + free_med_class_uuar(uuari, uuarn); + goto out; + } + + free_high_class_uuar(uuari, uuarn); + +out: + mutex_unlock(&uuari->lock); +} + +static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state) +{ + switch (state) { + case IB_QPS_RESET: return MLX5_QP_STATE_RST; + case IB_QPS_INIT: return MLX5_QP_STATE_INIT; + case IB_QPS_RTR: return MLX5_QP_STATE_RTR; + case IB_QPS_RTS: return MLX5_QP_STATE_RTS; + case IB_QPS_SQD: return MLX5_QP_STATE_SQD; + case IB_QPS_SQE: return MLX5_QP_STATE_SQER; + case IB_QPS_ERR: return MLX5_QP_STATE_ERR; + default: return -1; + } +} + +static int to_mlx5_st(enum ib_qp_type type) +{ + switch (type) { + case IB_QPT_RC: return MLX5_QP_ST_RC; + case IB_QPT_UC: return MLX5_QP_ST_UC; + case IB_QPT_UD: return MLX5_QP_ST_UD; + case MLX5_IB_QPT_REG_UMR: return MLX5_QP_ST_REG_UMR; + case IB_QPT_XRC_INI: + case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC; + case IB_QPT_SMI: return MLX5_QP_ST_QP0; + case IB_QPT_GSI: return MLX5_QP_ST_QP1; + case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6; + case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE; + case IB_QPT_RAW_PACKET: + case IB_QPT_MAX: + default: return -EINVAL; + } +} + +static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn) +{ + return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index; +} + +static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_qp *qp, struct ib_udata *udata, + struct mlx5_create_qp_mbox_in **in, + struct mlx5_ib_create_qp_resp *resp, int *inlen) +{ + struct mlx5_ib_ucontext *context; + struct mlx5_ib_create_qp ucmd; + int page_shift; + int uar_index; + int npages; + u32 offset; + int uuarn; + int ncont; + int err; + + err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); + if (err) { + mlx5_ib_dbg(dev, "copy failed\n"); + return err; + } + + context = to_mucontext(pd->uobject->context); + /* + * TBD: should come from the verbs when we have the API + */ + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH); + if (uuarn < 0) { + mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n"); + mlx5_ib_dbg(dev, "reverting to high latency\n"); + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); + if (uuarn < 0) { + mlx5_ib_dbg(dev, "uuar allocation failed\n"); + return uuarn; + } + } + + uar_index = uuarn_to_uar_index(&context->uuari, uuarn); + mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index); + + err = set_user_buf_size(dev, qp, &ucmd); + if (err) + goto err_uuar; + + qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, + qp->buf_size, 0, 0); + if (IS_ERR(qp->umem)) { + mlx5_ib_dbg(dev, "umem_get failed\n"); + err = PTR_ERR(qp->umem); + goto err_uuar; + } + + mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift, + &ncont, NULL); + err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); + if (err) { + mlx5_ib_warn(dev, "bad offset\n"); + goto err_umem; + } + mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n", + ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset); + + *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; + *in = mlx5_vzalloc(*inlen); + if (!*in) { + err = -ENOMEM; + goto err_umem; + } + mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0); + (*in)->ctx.log_pg_sz_remote_qpn = + cpu_to_be32((page_shift - PAGE_SHIFT) << 24); + (*in)->ctx.params2 = cpu_to_be32(offset << 6); + + (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); + resp->uuar_index = uuarn; + qp->uuarn = uuarn; + + err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db); + if (err) { + mlx5_ib_dbg(dev, "map failed\n"); + goto err_free; + } + + err = ib_copy_to_udata(udata, resp, sizeof(*resp)); + if (err) { + mlx5_ib_dbg(dev, "copy failed\n"); + goto err_unmap; + } + qp->create_type = MLX5_QP_USER; + + return 0; + +err_unmap: + mlx5_ib_db_unmap_user(context, &qp->db); + +err_free: + mlx5_vfree(*in); + +err_umem: + ib_umem_release(qp->umem); + +err_uuar: + free_uuar(&context->uuari, uuarn); + return err; +} + +static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp) +{ + struct mlx5_ib_ucontext *context; + + context = to_mucontext(pd->uobject->context); + mlx5_ib_db_unmap_user(context, &qp->db); + ib_umem_release(qp->umem); + free_uuar(&context->uuari, qp->uuarn); +} + +static int create_kernel_qp(struct mlx5_ib_dev *dev, + struct ib_qp_init_attr *init_attr, + struct mlx5_ib_qp *qp, + struct mlx5_create_qp_mbox_in **in, int *inlen) +{ + enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW; + struct mlx5_uuar_info *uuari; + int uar_index; + int uuarn; + int err; + + uuari = &dev->mdev.priv.uuari; + if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) + qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK; + + if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) + lc = MLX5_IB_LATENCY_CLASS_FAST_PATH; + + uuarn = alloc_uuar(uuari, lc); + if (uuarn < 0) { + mlx5_ib_dbg(dev, "\n"); + return -ENOMEM; + } + + qp->bf = &uuari->bfs[uuarn]; + uar_index = qp->bf->uar->index; + + err = calc_sq_size(dev, init_attr, qp); + if (err < 0) { + mlx5_ib_dbg(dev, "err %d\n", err); + goto err_uuar; + } + + qp->rq.offset = 0; + qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; + qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift); + + err = mlx5_buf_alloc(&dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + goto err_uuar; + } + + qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt); + *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages; + *in = mlx5_vzalloc(*inlen); + if (!*in) { + err = -ENOMEM; + goto err_buf; + } + (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); + (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24); + /* Set "fast registration enabled" for all kernel QPs */ + (*in)->ctx.params1 |= cpu_to_be32(1 << 11); + (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4); + + mlx5_fill_page_array(&qp->buf, (*in)->pas); + + err = mlx5_db_alloc(&dev->mdev, &qp->db); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + goto err_free; + } + + qp->db.db[0] = 0; + qp->db.db[1] = 0; + + qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL); + qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL); + qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL); + qp->sq.w_list = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list), GFP_KERNEL); + qp->sq.wqe_head = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head), GFP_KERNEL); + + if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid || + !qp->sq.w_list || !qp->sq.wqe_head) { + err = -ENOMEM; + goto err_wrid; + } + qp->create_type = MLX5_QP_KERNEL; + + return 0; + +err_wrid: + mlx5_db_free(&dev->mdev, &qp->db); + kfree(qp->sq.wqe_head); + kfree(qp->sq.w_list); + kfree(qp->sq.wrid); + kfree(qp->sq.wr_data); + kfree(qp->rq.wrid); + +err_free: + mlx5_vfree(*in); + +err_buf: + mlx5_buf_free(&dev->mdev, &qp->buf); + +err_uuar: + free_uuar(&dev->mdev.priv.uuari, uuarn); + return err; +} + +static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) +{ + mlx5_db_free(&dev->mdev, &qp->db); + kfree(qp->sq.wqe_head); + kfree(qp->sq.w_list); + kfree(qp->sq.wrid); + kfree(qp->sq.wr_data); + kfree(qp->rq.wrid); + mlx5_buf_free(&dev->mdev, &qp->buf); + free_uuar(&dev->mdev.priv.uuari, qp->bf->uuarn); +} + +static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) +{ + if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) || + (attr->qp_type == IB_QPT_XRC_INI)) + return cpu_to_be32(MLX5_SRQ_RQ); + else if (!qp->has_rq) + return cpu_to_be32(MLX5_ZERO_LEN_RQ); + else + return cpu_to_be32(MLX5_NON_ZERO_RQ); +} + +static int is_connected(enum ib_qp_type qp_type) +{ + if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC) + return 1; + + return 0; +} + +static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, struct mlx5_ib_qp *qp) +{ + struct mlx5_ib_resources *devr = &dev->devr; + struct mlx5_ib_create_qp_resp resp; + struct mlx5_create_qp_mbox_in *in; + struct mlx5_ib_create_qp ucmd; + int inlen = sizeof(*in); + int err; + + mutex_init(&qp->mutex); + spin_lock_init(&qp->sq.lock); + spin_lock_init(&qp->rq.lock); + + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) + qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; + + if (pd && pd->uobject) { + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + mlx5_ib_dbg(dev, "copy failed\n"); + return -EFAULT; + } + + qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); + qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); + } else { + qp->wq_sig = !!wq_signature; + } + + qp->has_rq = qp_has_rq(init_attr); + err = set_rq_size(dev, &init_attr->cap, qp->has_rq, + qp, (pd && pd->uobject) ? &ucmd : NULL); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + return err; + } + + if (pd) { + if (pd->uobject) { + mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count); + if (ucmd.rq_wqe_shift != qp->rq.wqe_shift || + ucmd.rq_wqe_count != qp->rq.wqe_cnt) { + mlx5_ib_dbg(dev, "invalid rq params\n"); + return -EINVAL; + } + if (ucmd.sq_wqe_count > dev->mdev.caps.max_wqes) { + mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n", + ucmd.sq_wqe_count, dev->mdev.caps.max_wqes); + return -EINVAL; + } + err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen); + if (err) + mlx5_ib_dbg(dev, "err %d\n", err); + } else { + err = create_kernel_qp(dev, init_attr, qp, &in, &inlen); + if (err) + mlx5_ib_dbg(dev, "err %d\n", err); + else + qp->pa_lkey = to_mpd(pd)->pa_lkey; + } + + if (err) + return err; + } else { + in = mlx5_vzalloc(sizeof(*in)); + if (!in) + return -ENOMEM; + + qp->create_type = MLX5_QP_EMPTY; + } + + if (is_sqp(init_attr->qp_type)) + qp->port = init_attr->port_num; + + in->ctx.flags = cpu_to_be32(to_mlx5_st(init_attr->qp_type) << 16 | + MLX5_QP_PM_MIGRATED << 11); + + if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR) + in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn); + else + in->ctx.flags_pd = cpu_to_be32(MLX5_QP_LAT_SENSITIVE); + + if (qp->wq_sig) + in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG); + + if (qp->scat_cqe && is_connected(init_attr->qp_type)) { + int rcqe_sz; + int scqe_sz; + + rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq); + scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq); + + if (rcqe_sz == 128) + in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE; + else + in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE; + + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) { + if (scqe_sz == 128) + in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE; + else + in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE; + } + } + + if (qp->rq.wqe_cnt) { + in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4); + in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3; + } + + in->ctx.rq_type_srqn = get_rx_type(qp, init_attr); + + if (qp->sq.wqe_cnt) + in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11); + else + in->ctx.sq_crq_size |= cpu_to_be16(0x8000); + + /* Set default resources */ + switch (init_attr->qp_type) { + case IB_QPT_XRC_TGT: + in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); + in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); + in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn); + in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn); + break; + case IB_QPT_XRC_INI: + in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); + in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn); + in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn); + break; + default: + if (init_attr->srq) { + in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn); + in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn); + } else { + in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn); + in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn); + } + } + + if (init_attr->send_cq) + in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn); + + if (init_attr->recv_cq) + in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn); + + in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma); + + err = mlx5_core_create_qp(&dev->mdev, &qp->mqp, in, inlen); + if (err) { + mlx5_ib_dbg(dev, "create qp failed\n"); + goto err_create; + } + + mlx5_vfree(in); + /* Hardware wants QPN written in big-endian order (after + * shifting) for send doorbell. Precompute this value to save + * a little bit when posting sends. + */ + qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); + + qp->mqp.event = mlx5_ib_qp_event; + + return 0; + +err_create: + if (qp->create_type == MLX5_QP_USER) + destroy_qp_user(pd, qp); + else if (qp->create_type == MLX5_QP_KERNEL) + destroy_qp_kernel(dev, qp); + + mlx5_vfree(in); + return err; +} + +static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq) + __acquires(&send_cq->lock) __acquires(&recv_cq->lock) +{ + if (send_cq) { + if (recv_cq) { + if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { + spin_lock_irq(&send_cq->lock); + spin_lock_nested(&recv_cq->lock, + SINGLE_DEPTH_NESTING); + } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) { + spin_lock_irq(&send_cq->lock); + __acquire(&recv_cq->lock); + } else { + spin_lock_irq(&recv_cq->lock); + spin_lock_nested(&send_cq->lock, + SINGLE_DEPTH_NESTING); + } + } else { + spin_lock_irq(&send_cq->lock); + } + } else if (recv_cq) { + spin_lock_irq(&recv_cq->lock); + } +} + +static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq) + __releases(&send_cq->lock) __releases(&recv_cq->lock) +{ + if (send_cq) { + if (recv_cq) { + if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { + spin_unlock(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) { + __release(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else { + spin_unlock(&send_cq->lock); + spin_unlock_irq(&recv_cq->lock); + } + } else { + spin_unlock_irq(&send_cq->lock); + } + } else if (recv_cq) { + spin_unlock_irq(&recv_cq->lock); + } +} + +static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp) +{ + return to_mpd(qp->ibqp.pd); +} + +static void get_cqs(struct mlx5_ib_qp *qp, + struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq) +{ + switch (qp->ibqp.qp_type) { + case IB_QPT_XRC_TGT: + *send_cq = NULL; + *recv_cq = NULL; + break; + case MLX5_IB_QPT_REG_UMR: + case IB_QPT_XRC_INI: + *send_cq = to_mcq(qp->ibqp.send_cq); + *recv_cq = NULL; + break; + + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + case IB_QPT_RAW_IPV6: + case IB_QPT_RAW_ETHERTYPE: + *send_cq = to_mcq(qp->ibqp.send_cq); + *recv_cq = to_mcq(qp->ibqp.recv_cq); + break; + + case IB_QPT_RAW_PACKET: + case IB_QPT_MAX: + default: + *send_cq = NULL; + *recv_cq = NULL; + break; + } +} + +static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) +{ + struct mlx5_ib_cq *send_cq, *recv_cq; + struct mlx5_modify_qp_mbox_in *in; + int err; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return; + if (qp->state != IB_QPS_RESET) + if (mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(qp->state), + MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp)) + mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n", + qp->mqp.qpn); + + get_cqs(qp, &send_cq, &recv_cq); + + if (qp->create_type == MLX5_QP_KERNEL) { + mlx5_ib_lock_cqs(send_cq, recv_cq); + __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + if (send_cq != recv_cq) + __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); + mlx5_ib_unlock_cqs(send_cq, recv_cq); + } + + err = mlx5_core_destroy_qp(&dev->mdev, &qp->mqp); + if (err) + mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn); + kfree(in); + + + if (qp->create_type == MLX5_QP_KERNEL) + destroy_qp_kernel(dev, qp); + else if (qp->create_type == MLX5_QP_USER) + destroy_qp_user(&get_pd(qp)->ibpd, qp); +} + +static const char *ib_qp_type_str(enum ib_qp_type type) +{ + switch (type) { + case IB_QPT_SMI: + return "IB_QPT_SMI"; + case IB_QPT_GSI: + return "IB_QPT_GSI"; + case IB_QPT_RC: + return "IB_QPT_RC"; + case IB_QPT_UC: + return "IB_QPT_UC"; + case IB_QPT_UD: + return "IB_QPT_UD"; + case IB_QPT_RAW_IPV6: + return "IB_QPT_RAW_IPV6"; + case IB_QPT_RAW_ETHERTYPE: + return "IB_QPT_RAW_ETHERTYPE"; + case IB_QPT_XRC_INI: + return "IB_QPT_XRC_INI"; + case IB_QPT_XRC_TGT: + return "IB_QPT_XRC_TGT"; + case IB_QPT_RAW_PACKET: + return "IB_QPT_RAW_PACKET"; + case MLX5_IB_QPT_REG_UMR: + return "MLX5_IB_QPT_REG_UMR"; + case IB_QPT_MAX: + default: + return "Invalid QP type"; + } +} + +struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev; + struct mlx5_ib_qp *qp; + u16 xrcdn = 0; + int err; + + if (pd) { + dev = to_mdev(pd->device); + } else { + /* being cautious here */ + if (init_attr->qp_type != IB_QPT_XRC_TGT && + init_attr->qp_type != MLX5_IB_QPT_REG_UMR) { + pr_warn("%s: no PD for transport %s\n", __func__, + ib_qp_type_str(init_attr->qp_type)); + return ERR_PTR(-EINVAL); + } + dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); + } + + switch (init_attr->qp_type) { + case IB_QPT_XRC_TGT: + case IB_QPT_XRC_INI: + if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC)) { + mlx5_ib_dbg(dev, "XRC not supported\n"); + return ERR_PTR(-ENOSYS); + } + init_attr->recv_cq = NULL; + if (init_attr->qp_type == IB_QPT_XRC_TGT) { + xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; + init_attr->send_cq = NULL; + } + + /* fall through */ + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: + case MLX5_IB_QPT_REG_UMR: + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + err = create_qp_common(dev, pd, init_attr, udata, qp); + if (err) { + mlx5_ib_dbg(dev, "create_qp_common failed\n"); + kfree(qp); + return ERR_PTR(err); + } + + if (is_qp0(init_attr->qp_type)) + qp->ibqp.qp_num = 0; + else if (is_qp1(init_attr->qp_type)) + qp->ibqp.qp_num = 1; + else + qp->ibqp.qp_num = qp->mqp.qpn; + + mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n", + qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn, + to_mcq(init_attr->send_cq)->mcq.cqn); + + qp->xrcdn = xrcdn; + + break; + + case IB_QPT_RAW_IPV6: + case IB_QPT_RAW_ETHERTYPE: + case IB_QPT_RAW_PACKET: + case IB_QPT_MAX: + default: + mlx5_ib_dbg(dev, "unsupported qp type %d\n", + init_attr->qp_type); + /* Don't support raw QPs */ + return ERR_PTR(-EINVAL); + } + + return &qp->ibqp; +} + +int mlx5_ib_destroy_qp(struct ib_qp *qp) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_qp *mqp = to_mqp(qp); + + destroy_qp_common(dev, mqp); + + kfree(mqp); + + return 0; +} + +static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr, + int attr_mask) +{ + u32 hw_access_flags = 0; + u8 dest_rd_atomic; + u32 access_flags; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + dest_rd_atomic = attr->max_dest_rd_atomic; + else + dest_rd_atomic = qp->resp_depth; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + access_flags = attr->qp_access_flags; + else + access_flags = qp->atomic_rd_en; + + if (!dest_rd_atomic) + access_flags &= IB_ACCESS_REMOTE_WRITE; + + if (access_flags & IB_ACCESS_REMOTE_READ) + hw_access_flags |= MLX5_QP_BIT_RRE; + if (access_flags & IB_ACCESS_REMOTE_ATOMIC) + hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX); + if (access_flags & IB_ACCESS_REMOTE_WRITE) + hw_access_flags |= MLX5_QP_BIT_RWE; + + return cpu_to_be32(hw_access_flags); +} + +enum { + MLX5_PATH_FLAG_FL = 1 << 0, + MLX5_PATH_FLAG_FREE_AR = 1 << 1, + MLX5_PATH_FLAG_COUNTER = 1 << 2, +}; + +static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) +{ + if (rate == IB_RATE_PORT_CURRENT) { + return 0; + } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) { + return -EINVAL; + } else { + while (rate != IB_RATE_2_5_GBPS && + !(1 << (rate + MLX5_STAT_RATE_OFFSET) & + dev->mdev.caps.stat_rate_support)) + --rate; + } + + return rate + MLX5_STAT_RATE_OFFSET; +} + +static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, + struct mlx5_qp_path *path, u8 port, int attr_mask, + u32 path_flags, const struct ib_qp_attr *attr) +{ + int err; + + path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; + path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0; + + if (attr_mask & IB_QP_PKEY_INDEX) + path->pkey_index = attr->pkey_index; + + path->grh_mlid = ah->src_path_bits & 0x7f; + path->rlid = cpu_to_be16(ah->dlid); + + if (ah->ah_flags & IB_AH_GRH) { + path->grh_mlid |= 1 << 7; + path->mgid_index = ah->grh.sgid_index; + path->hop_limit = ah->grh.hop_limit; + path->tclass_flowlabel = + cpu_to_be32((ah->grh.traffic_class << 20) | + (ah->grh.flow_label)); + memcpy(path->rgid, ah->grh.dgid.raw, 16); + } + + err = ib_rate_to_mlx5(dev, ah->static_rate); + if (err < 0) + return err; + path->static_rate = err; + path->port = port; + + if (ah->ah_flags & IB_AH_GRH) { + if (ah->grh.sgid_index >= dev->mdev.caps.port[port - 1].gid_table_len) { + pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n", + ah->grh.sgid_index, dev->mdev.caps.port[port - 1].gid_table_len); + return -EINVAL; + } + + path->grh_mlid |= 1 << 7; + path->mgid_index = ah->grh.sgid_index; + path->hop_limit = ah->grh.hop_limit; + path->tclass_flowlabel = + cpu_to_be32((ah->grh.traffic_class << 20) | + (ah->grh.flow_label)); + memcpy(path->rgid, ah->grh.dgid.raw, 16); + } + + if (attr_mask & IB_QP_TIMEOUT) + path->ackto_lt = attr->timeout << 3; + + path->sl = ah->sl & 0xf; + + return 0; +} + +static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_QP_ST_MAX] = { + [MLX5_QP_STATE_INIT] = { + [MLX5_QP_STATE_INIT] = { + [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_PRI_PORT, + [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_PRI_PORT, + [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_Q_KEY | + MLX5_QP_OPTPAR_PRI_PORT, + }, + [MLX5_QP_STATE_RTR] = { + [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX, + [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX, + [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_Q_KEY, + }, + }, + [MLX5_QP_STATE_RTR] = { + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_RNR_TIMEOUT, + [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PM_STATE, + [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY, + }, + }, + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_PM_STATE, + [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PM_STATE, + [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY | + MLX5_QP_OPTPAR_SRQN | + MLX5_QP_OPTPAR_CQN_RCV, + }, + }, + [MLX5_QP_STATE_SQER] = { + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY, + }, + }, +}; + +static int ib_nr_to_mlx5_nr(int ib_mask) +{ + switch (ib_mask) { + case IB_QP_STATE: + return 0; + case IB_QP_CUR_STATE: + return 0; + case IB_QP_EN_SQD_ASYNC_NOTIFY: + return 0; + case IB_QP_ACCESS_FLAGS: + return MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE; + case IB_QP_PKEY_INDEX: + return MLX5_QP_OPTPAR_PKEY_INDEX; + case IB_QP_PORT: + return MLX5_QP_OPTPAR_PRI_PORT; + case IB_QP_QKEY: + return MLX5_QP_OPTPAR_Q_KEY; + case IB_QP_AV: + return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH | + MLX5_QP_OPTPAR_PRI_PORT; + case IB_QP_PATH_MTU: + return 0; + case IB_QP_TIMEOUT: + return MLX5_QP_OPTPAR_ACK_TIMEOUT; + case IB_QP_RETRY_CNT: + return MLX5_QP_OPTPAR_RETRY_COUNT; + case IB_QP_RNR_RETRY: + return MLX5_QP_OPTPAR_RNR_RETRY; + case IB_QP_RQ_PSN: + return 0; + case IB_QP_MAX_QP_RD_ATOMIC: + return MLX5_QP_OPTPAR_SRA_MAX; + case IB_QP_ALT_PATH: + return MLX5_QP_OPTPAR_ALT_ADDR_PATH; + case IB_QP_MIN_RNR_TIMER: + return MLX5_QP_OPTPAR_RNR_TIMEOUT; + case IB_QP_SQ_PSN: + return 0; + case IB_QP_MAX_DEST_RD_ATOMIC: + return MLX5_QP_OPTPAR_RRA_MAX | MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE; + case IB_QP_PATH_MIG_STATE: + return MLX5_QP_OPTPAR_PM_STATE; + case IB_QP_CAP: + return 0; + case IB_QP_DEST_QPN: + return 0; + } + return 0; +} + +static int ib_mask_to_mlx5_opt(int ib_mask) +{ + int result = 0; + int i; + + for (i = 0; i < 8 * sizeof(int); i++) { + if ((1 << i) & ib_mask) + result |= ib_nr_to_mlx5_nr(1 << i); + } + + return result; +} + +static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, int attr_mask, + enum ib_qp_state cur_state, enum ib_qp_state new_state) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_cq *send_cq, *recv_cq; + struct mlx5_qp_context *context; + struct mlx5_modify_qp_mbox_in *in; + struct mlx5_ib_pd *pd; + enum mlx5_qp_state mlx5_cur, mlx5_new; + enum mlx5_qp_optpar optpar; + int sqd_event; + int mlx5_st; + int err; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + context = &in->ctx; + err = to_mlx5_st(ibqp->qp_type); + if (err < 0) + goto out; + + context->flags = cpu_to_be32(err << 16); + + if (!(attr_mask & IB_QP_PATH_MIG_STATE)) { + context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); + } else { + switch (attr->path_mig_state) { + case IB_MIG_MIGRATED: + context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); + break; + case IB_MIG_REARM: + context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11); + break; + case IB_MIG_ARMED: + context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11); + break; + } + } + + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { + context->mtu_msgmax = (IB_MTU_256 << 5) | 8; + } else if (ibqp->qp_type == IB_QPT_UD || + ibqp->qp_type == MLX5_IB_QPT_REG_UMR) { + context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; + } else if (attr_mask & IB_QP_PATH_MTU) { + if (attr->path_mtu < IB_MTU_256 || + attr->path_mtu > IB_MTU_4096) { + mlx5_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu); + err = -EINVAL; + goto out; + } + context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev.caps.log_max_msg; + } + + if (attr_mask & IB_QP_DEST_QPN) + context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num); + + if (attr_mask & IB_QP_PKEY_INDEX) + context->pri_path.pkey_index = attr->pkey_index; + + /* todo implement counter_index functionality */ + + if (is_sqp(ibqp->qp_type)) + context->pri_path.port = qp->port; + + if (attr_mask & IB_QP_PORT) + context->pri_path.port = attr->port_num; + + if (attr_mask & IB_QP_AV) { + err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path, + attr_mask & IB_QP_PORT ? attr->port_num : qp->port, + attr_mask, 0, attr); + if (err) + goto out; + } + + if (attr_mask & IB_QP_TIMEOUT) + context->pri_path.ackto_lt |= attr->timeout << 3; + + if (attr_mask & IB_QP_ALT_PATH) { + err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path, + attr->alt_port_num, attr_mask, 0, attr); + if (err) + goto out; + } + + pd = get_pd(qp); + get_cqs(qp, &send_cq, &recv_cq); + + context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn); + context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0; + context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0; + context->params1 = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28); + + if (attr_mask & IB_QP_RNR_RETRY) + context->params1 |= cpu_to_be32(attr->rnr_retry << 13); + + if (attr_mask & IB_QP_RETRY_CNT) + context->params1 |= cpu_to_be32(attr->retry_cnt << 16); + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attr->max_rd_atomic) + context->params1 |= + cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); + } + + if (attr_mask & IB_QP_SQ_PSN) + context->next_send_psn = cpu_to_be32(attr->sq_psn); + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + if (attr->max_dest_rd_atomic) + context->params2 |= + cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); + } + + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) + context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask); + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); + + if (attr_mask & IB_QP_RQ_PSN) + context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); + + if (attr_mask & IB_QP_QKEY) + context->qkey = cpu_to_be32(attr->qkey); + + if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + context->db_rec_addr = cpu_to_be64(qp->db.dma); + + if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && + attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) + sqd_event = 1; + else + sqd_event = 0; + + if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + context->sq_crq_size |= cpu_to_be16(1 << 4); + + + mlx5_cur = to_mlx5_state(cur_state); + mlx5_new = to_mlx5_state(new_state); + mlx5_st = to_mlx5_st(ibqp->qp_type); + if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0) + goto out; + + optpar = ib_mask_to_mlx5_opt(attr_mask); + optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; + in->optparam = cpu_to_be32(optpar); + err = mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(cur_state), + to_mlx5_state(new_state), in, sqd_event, + &qp->mqp); + if (err) + goto out; + + qp->state = new_state; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->atomic_rd_en = attr->qp_access_flags; + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->resp_depth = attr->max_dest_rd_atomic; + if (attr_mask & IB_QP_PORT) + qp->port = attr->port_num; + if (attr_mask & IB_QP_ALT_PATH) + qp->alt_port = attr->alt_port_num; + + /* + * If we moved a kernel QP to RESET, clean up all old CQ + * entries and reinitialize the QP. + */ + if (new_state == IB_QPS_RESET && !ibqp->uobject) { + mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn, + ibqp->srq ? to_msrq(ibqp->srq) : NULL); + if (send_cq != recv_cq) + mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); + + qp->rq.head = 0; + qp->rq.tail = 0; + qp->sq.head = 0; + qp->sq.tail = 0; + qp->sq.cur_post = 0; + qp->sq.last_poll = 0; + qp->db.db[MLX5_RCV_DBR] = 0; + qp->db.db[MLX5_SND_DBR] = 0; + } + +out: + kfree(in); + return err; +} + +int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_ib_qp *qp = to_mqp(ibqp); + enum ib_qp_state cur_state, new_state; + int err = -EINVAL; + int port; + + mutex_lock(&qp->mutex); + + cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR && + !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) + goto out; + + if ((attr_mask & IB_QP_PORT) && + (attr->port_num == 0 || attr->port_num > dev->mdev.caps.num_ports)) + goto out; + + if (attr_mask & IB_QP_PKEY_INDEX) { + port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + if (attr->pkey_index >= dev->mdev.caps.port[port - 1].pkey_table_len) + goto out; + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > dev->mdev.caps.max_ra_res_qp) + goto out; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > dev->mdev.caps.max_ra_req_qp) + goto out; + + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + err = 0; + goto out; + } + + err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + +out: + mutex_unlock(&qp->mutex); + return err; +} + +static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq) +{ + struct mlx5_ib_cq *cq; + unsigned cur; + + cur = wq->head - wq->tail; + if (likely(cur + nreq < wq->max_post)) + return 0; + + cq = to_mcq(ib_cq); + spin_lock(&cq->lock); + cur = wq->head - wq->tail; + spin_unlock(&cq->lock); + + return cur + nreq >= wq->max_post; +} + +static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, + u64 remote_addr, u32 rkey) +{ + rseg->raddr = cpu_to_be64(remote_addr); + rseg->rkey = cpu_to_be32(rkey); + rseg->reserved = 0; +} + +static void set_atomic_seg(struct mlx5_wqe_atomic_seg *aseg, struct ib_send_wr *wr) +{ + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask); + } else { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = 0; + } +} + +static void set_masked_atomic_seg(struct mlx5_wqe_masked_atomic_seg *aseg, + struct ib_send_wr *wr) +{ + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask); +} + +static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, + struct ib_send_wr *wr) +{ + memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av)); + dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV); + dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); +} + +static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg) +{ + dseg->byte_count = cpu_to_be32(sg->length); + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->addr); +} + +static __be16 get_klm_octo(int npages) +{ + return cpu_to_be16(ALIGN(npages, 8) / 2); +} + +static __be64 frwr_mkey_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_RINVAL | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_A | + MLX5_MKEY_MASK_SMALL_FENCE | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, + struct ib_send_wr *wr, int li) +{ + memset(umr, 0, sizeof(*umr)); + + if (li) { + umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + umr->flags = 1 << 7; + return; + } + + umr->flags = (1 << 5); /* fail if not free */ + umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len); + umr->mkey_mask = frwr_mkey_mask(); +} + +static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, + struct ib_send_wr *wr) +{ + struct umr_wr *umrwr = (struct umr_wr *)&wr->wr.fast_reg; + u64 mask; + + memset(umr, 0, sizeof(*umr)); + + if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) { + umr->flags = 1 << 5; /* fail if not free */ + umr->klm_octowords = get_klm_octo(umrwr->npages); + mask = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_PD | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_A | + MLX5_MKEY_MASK_FREE; + umr->mkey_mask = cpu_to_be64(mask); + } else { + umr->flags = 2 << 5; /* fail if free */ + mask = MLX5_MKEY_MASK_FREE; + umr->mkey_mask = cpu_to_be64(mask); + } + + if (!wr->num_sge) + umr->flags |= (1 << 7); /* inline */ +} + +static u8 get_umr_flags(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | + MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; +} + +static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, + int li, int *writ) +{ + memset(seg, 0, sizeof(*seg)); + if (li) { + seg->status = 1 << 6; + return; + } + + seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags); + *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE); + seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00); + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); + seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); + seg->len = cpu_to_be64(wr->wr.fast_reg.length); + seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2); + seg->log2_page_size = wr->wr.fast_reg.page_shift; +} + +static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr) +{ + memset(seg, 0, sizeof(*seg)); + if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) { + seg->status = 1 << 6; + return; + } + + seg->flags = convert_access(wr->wr.fast_reg.access_flags); + seg->flags_pd = cpu_to_be32(to_mpd((struct ib_pd *)wr->wr.fast_reg.page_list)->pdn); + seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); + seg->len = cpu_to_be64(wr->wr.fast_reg.length); + seg->log2_page_size = wr->wr.fast_reg.page_shift; + seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); +} + +static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, + struct ib_send_wr *wr, + struct mlx5_core_dev *mdev, + struct mlx5_ib_pd *pd, + int writ) +{ + struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); + u64 *page_list = wr->wr.fast_reg.page_list->page_list; + u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0); + int i; + + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) + mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm); + dseg->addr = cpu_to_be64(mfrpl->map); + dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64)); + dseg->lkey = cpu_to_be32(pd->pa_lkey); +} + +static __be32 send_ieth(struct ib_send_wr *wr) +{ + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + return wr->ex.imm_data; + + case IB_WR_SEND_WITH_INV: + return cpu_to_be32(wr->ex.invalidate_rkey); + + default: + return 0; + } +} + +static u8 calc_sig(void *wqe, int size) +{ + u8 *p = wqe; + u8 res = 0; + int i; + + for (i = 0; i < size; i++) + res ^= p[i]; + + return ~res; +} + +static u8 wq_sig(void *wqe) +{ + return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4); +} + +static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr, + void *wqe, int *sz) +{ + struct mlx5_wqe_inline_seg *seg; + void *qend = qp->sq.qend; + void *addr; + int inl = 0; + int copy; + int len; + int i; + + seg = wqe; + wqe += sizeof(*seg); + for (i = 0; i < wr->num_sge; i++) { + addr = (void *)(unsigned long)(wr->sg_list[i].addr); + len = wr->sg_list[i].length; + inl += len; + + if (unlikely(inl > qp->max_inline_data)) + return -ENOMEM; + + if (unlikely(wqe + len > qend)) { + copy = qend - wqe; + memcpy(wqe, addr, copy); + addr += copy; + len -= copy; + wqe = mlx5_get_send_wqe(qp, 0); + } + memcpy(wqe, addr, len); + wqe += len; + } + + seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); + + *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16; + + return 0; +} + +static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, + struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp) +{ + int writ = 0; + int li; + + li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0; + if (unlikely(wr->send_flags & IB_SEND_INLINE)) + return -EINVAL; + + set_frwr_umr_segment(*seg, wr, li); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + set_mkey_segment(*seg, wr, li, &writ); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + if (!li) { + set_frwr_pages(*seg, wr, mdev, pd, writ); + *seg += sizeof(struct mlx5_wqe_data_seg); + *size += (sizeof(struct mlx5_wqe_data_seg) / 16); + } + return 0; +} + +static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) +{ + __be32 *p = NULL; + int tidx = idx; + int i, j; + + pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx)); + for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { + if ((i & 0xf) == 0) { + void *buf = mlx5_get_send_wqe(qp, tidx); + tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1); + p = buf; + j = 0; + } + pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]), + be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]), + be32_to_cpu(p[j + 3])); + } +} + +static void mlx5_bf_copy(u64 __iomem *dst, u64 *src, + unsigned bytecnt, struct mlx5_ib_qp *qp) +{ + while (bytecnt > 0) { + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + bytecnt -= 64; + if (unlikely(src == qp->sq.qend)) + src = mlx5_get_send_wqe(qp, 0); + } +} + +static u8 get_fence(u8 fence, struct ib_send_wr *wr) +{ + if (unlikely(wr->opcode == IB_WR_LOCAL_INV && + wr->send_flags & IB_SEND_FENCE)) + return MLX5_FENCE_MODE_STRONG_ORDERING; + + if (unlikely(fence)) { + if (wr->send_flags & IB_SEND_FENCE) + return MLX5_FENCE_MODE_SMALL_AND_FENCE; + else + return fence; + + } else { + return 0; + } +} + +int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_core_dev *mdev = &dev->mdev; + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_wqe_data_seg *dpseg; + struct mlx5_wqe_xrc_seg *xrc; + struct mlx5_bf *bf = qp->bf; + int uninitialized_var(size); + void *qend = qp->sq.qend; + unsigned long flags; + u32 mlx5_opcode; + unsigned idx; + int err = 0; + int inl = 0; + int num_sge; + void *seg; + int nreq; + int i; + u8 next_fence = 0; + u8 opmod = 0; + u8 fence; + + spin_lock_irqsave(&qp->sq.lock, flags); + + for (nreq = 0; wr; nreq++, wr = wr->next) { + if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) { + mlx5_ib_warn(dev, "\n"); + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + fence = qp->fm_cache; + num_sge = wr->num_sge; + if (unlikely(num_sge > qp->sq.max_gs)) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); + seg = mlx5_get_send_wqe(qp, idx); + ctrl = seg; + *(uint32_t *)(seg + 8) = 0; + ctrl->imm = send_ieth(wr); + ctrl->fm_ce_se = qp->sq_signal_bits | + (wr->send_flags & IB_SEND_SIGNALED ? + MLX5_WQE_CTRL_CQ_UPDATE : 0) | + (wr->send_flags & IB_SEND_SOLICITED ? + MLX5_WQE_CTRL_SOLICITED : 0); + + seg += sizeof(*ctrl); + size = sizeof(*ctrl) / 16; + + switch (ibqp->qp_type) { + case IB_QPT_XRC_INI: + xrc = seg; + xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num); + seg += sizeof(*xrc); + size += sizeof(*xrc) / 16; + /* fall through */ + case IB_QPT_RC: + switch (wr->opcode) { + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + set_raddr_seg(seg, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + seg += sizeof(struct mlx5_wqe_raddr_seg); + size += sizeof(struct mlx5_wqe_raddr_seg) / 16; + break; + + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + set_raddr_seg(seg, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); + seg += sizeof(struct mlx5_wqe_raddr_seg); + + set_atomic_seg(seg, wr); + seg += sizeof(struct mlx5_wqe_atomic_seg); + + size += (sizeof(struct mlx5_wqe_raddr_seg) + + sizeof(struct mlx5_wqe_atomic_seg)) / 16; + break; + + case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: + set_raddr_seg(seg, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); + seg += sizeof(struct mlx5_wqe_raddr_seg); + + set_masked_atomic_seg(seg, wr); + seg += sizeof(struct mlx5_wqe_masked_atomic_seg); + + size += (sizeof(struct mlx5_wqe_raddr_seg) + + sizeof(struct mlx5_wqe_masked_atomic_seg)) / 16; + break; + + case IB_WR_LOCAL_INV: + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; + ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); + err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + num_sge = 0; + break; + + case IB_WR_FAST_REG_MR: + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR; + ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey); + err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + num_sge = 0; + break; + + default: + break; + } + break; + + case IB_QPT_UC: + switch (wr->opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + set_raddr_seg(seg, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + seg += sizeof(struct mlx5_wqe_raddr_seg); + size += sizeof(struct mlx5_wqe_raddr_seg) / 16; + break; + + default: + break; + } + break; + + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: + set_datagram_seg(seg, wr); + seg += sizeof(struct mlx5_wqe_datagram_seg); + size += sizeof(struct mlx5_wqe_datagram_seg) / 16; + if (unlikely((seg == qend))) + seg = mlx5_get_send_wqe(qp, 0); + break; + + case MLX5_IB_QPT_REG_UMR: + if (wr->opcode != MLX5_IB_WR_UMR) { + err = -EINVAL; + mlx5_ib_warn(dev, "bad opcode\n"); + goto out; + } + qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; + ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey); + set_reg_umr_segment(seg, wr); + seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + if (unlikely((seg == qend))) + seg = mlx5_get_send_wqe(qp, 0); + set_reg_mkey_segment(seg, wr); + seg += sizeof(struct mlx5_mkey_seg); + size += sizeof(struct mlx5_mkey_seg) / 16; + if (unlikely((seg == qend))) + seg = mlx5_get_send_wqe(qp, 0); + break; + + default: + break; + } + + if (wr->send_flags & IB_SEND_INLINE && num_sge) { + int uninitialized_var(sz); + + err = set_data_inl_seg(qp, wr, seg, &sz); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + inl = 1; + size += sz; + } else { + dpseg = seg; + for (i = 0; i < num_sge; i++) { + if (unlikely(dpseg == qend)) { + seg = mlx5_get_send_wqe(qp, 0); + dpseg = seg; + } + if (likely(wr->sg_list[i].length)) { + set_data_ptr_seg(dpseg, wr->sg_list + i); + size += sizeof(struct mlx5_wqe_data_seg) / 16; + dpseg++; + } + } + } + + mlx5_opcode = mlx5_ib_opcode[wr->opcode]; + ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | + mlx5_opcode | + ((u32)opmod << 24)); + ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8)); + ctrl->fm_ce_se |= get_fence(fence, wr); + qp->fm_cache = next_fence; + if (unlikely(qp->wq_sig)) + ctrl->signature = wq_sig(ctrl); + + qp->sq.wrid[idx] = wr->wr_id; + qp->sq.w_list[idx].opcode = mlx5_opcode; + qp->sq.wqe_head[idx] = qp->sq.head + nreq; + qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); + qp->sq.w_list[idx].next = qp->sq.cur_post; + + if (0) + dump_wqe(qp, idx, size); + } + +out: + if (likely(nreq)) { + qp->sq.head += nreq; + + /* Make sure that descriptors are written before + * updating doorbell record and ringing the doorbell + */ + wmb(); + + qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); + + if (bf->need_lock) + spin_lock(&bf->lock); + + /* TBD enable WC */ + if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) { + mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp); + /* wc_wmb(); */ + } else { + mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset, + MLX5_GET_DOORBELL_LOCK(&bf->lock32)); + /* Make sure doorbells don't leak out of SQ spinlock + * and reach the HCA out of order. + */ + mmiowb(); + } + bf->offset ^= bf->buf_size; + if (bf->need_lock) + spin_unlock(&bf->lock); + } + + spin_unlock_irqrestore(&qp->sq.lock, flags); + + return err; +} + +static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size) +{ + sig->signature = calc_sig(sig, size); +} + +int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_wqe_data_seg *scat; + struct mlx5_rwqe_sig *sig; + unsigned long flags; + int err = 0; + int nreq; + int ind; + int i; + + spin_lock_irqsave(&qp->rq.lock, flags); + + ind = qp->rq.head & (qp->rq.wqe_cnt - 1); + + for (nreq = 0; wr; nreq++, wr = wr->next) { + if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_sge > qp->rq.max_gs)) { + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + scat = get_recv_wqe(qp, ind); + if (qp->wq_sig) + scat++; + + for (i = 0; i < wr->num_sge; i++) + set_data_ptr_seg(scat + i, wr->sg_list + i); + + if (i < qp->rq.max_gs) { + scat[i].byte_count = 0; + scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY); + scat[i].addr = 0; + } + + if (qp->wq_sig) { + sig = (struct mlx5_rwqe_sig *)scat; + set_sig_seg(sig, (qp->rq.max_gs + 1) << 2); + } + + qp->rq.wrid[ind] = wr->wr_id; + + ind = (ind + 1) & (qp->rq.wqe_cnt - 1); + } + +out: + if (likely(nreq)) { + qp->rq.head += nreq; + + /* Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); + } + + spin_unlock_irqrestore(&qp->rq.lock, flags); + + return err; +} + +static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state) +{ + switch (mlx5_state) { + case MLX5_QP_STATE_RST: return IB_QPS_RESET; + case MLX5_QP_STATE_INIT: return IB_QPS_INIT; + case MLX5_QP_STATE_RTR: return IB_QPS_RTR; + case MLX5_QP_STATE_RTS: return IB_QPS_RTS; + case MLX5_QP_STATE_SQ_DRAINING: + case MLX5_QP_STATE_SQD: return IB_QPS_SQD; + case MLX5_QP_STATE_SQER: return IB_QPS_SQE; + case MLX5_QP_STATE_ERR: return IB_QPS_ERR; + default: return -1; + } +} + +static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state) +{ + switch (mlx5_mig_state) { + case MLX5_QP_PM_ARMED: return IB_MIG_ARMED; + case MLX5_QP_PM_REARM: return IB_MIG_REARM; + case MLX5_QP_PM_MIGRATED: return IB_MIG_MIGRATED; + default: return -1; + } +} + +static int to_ib_qp_access_flags(int mlx5_flags) +{ + int ib_flags = 0; + + if (mlx5_flags & MLX5_QP_BIT_RRE) + ib_flags |= IB_ACCESS_REMOTE_READ; + if (mlx5_flags & MLX5_QP_BIT_RWE) + ib_flags |= IB_ACCESS_REMOTE_WRITE; + if (mlx5_flags & MLX5_QP_BIT_RAE) + ib_flags |= IB_ACCESS_REMOTE_ATOMIC; + + return ib_flags; +} + +static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr, + struct mlx5_qp_path *path) +{ + struct mlx5_core_dev *dev = &ibdev->mdev; + + memset(ib_ah_attr, 0, sizeof(*ib_ah_attr)); + ib_ah_attr->port_num = path->port; + + if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) + return; + + ib_ah_attr->sl = path->sl & 0xf; + + ib_ah_attr->dlid = be16_to_cpu(path->rlid); + ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f; + ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; + ib_ah_attr->ah_flags = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0; + if (ib_ah_attr->ah_flags) { + ib_ah_attr->grh.sgid_index = path->mgid_index; + ib_ah_attr->grh.hop_limit = path->hop_limit; + ib_ah_attr->grh.traffic_class = + (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff; + ib_ah_attr->grh.flow_label = + be32_to_cpu(path->tclass_flowlabel) & 0xfffff; + memcpy(ib_ah_attr->grh.dgid.raw, + path->rgid, sizeof(ib_ah_attr->grh.dgid.raw)); + } +} + +int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_query_qp_mbox_out *outb; + struct mlx5_qp_context *context; + int mlx5_state; + int err = 0; + + mutex_lock(&qp->mutex); + outb = kzalloc(sizeof(*outb), GFP_KERNEL); + if (!outb) { + err = -ENOMEM; + goto out; + } + context = &outb->ctx; + err = mlx5_core_qp_query(&dev->mdev, &qp->mqp, outb, sizeof(*outb)); + if (err) + goto out_free; + + mlx5_state = be32_to_cpu(context->flags) >> 28; + + qp->state = to_ib_qp_state(mlx5_state); + qp_attr->qp_state = qp->state; + qp_attr->path_mtu = context->mtu_msgmax >> 5; + qp_attr->path_mig_state = + to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3); + qp_attr->qkey = be32_to_cpu(context->qkey); + qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff; + qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff; + qp_attr->dest_qp_num = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff; + qp_attr->qp_access_flags = + to_ib_qp_access_flags(be32_to_cpu(context->params2)); + + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); + to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); + qp_attr->alt_pkey_index = context->alt_path.pkey_index & 0x7f; + qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; + } + + qp_attr->pkey_index = context->pri_path.pkey_index & 0x7f; + qp_attr->port_num = context->pri_path.port; + + /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ + qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING; + + qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7); + + qp_attr->max_dest_rd_atomic = + 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7); + qp_attr->min_rnr_timer = + (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f; + qp_attr->timeout = context->pri_path.ackto_lt >> 3; + qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7; + qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7; + qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3; + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; + qp_attr->cap.max_recv_sge = qp->rq.max_gs; + + if (!ibqp->uobject) { + qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; + qp_attr->cap.max_send_sge = qp->sq.max_gs; + } else { + qp_attr->cap.max_send_wr = 0; + qp_attr->cap.max_send_sge = 0; + } + + /* We don't support inline sends for kernel QPs (yet), and we + * don't know what userspace's value should be. + */ + qp_attr->cap.max_inline_data = 0; + + qp_init_attr->cap = qp_attr->cap; + + qp_init_attr->create_flags = 0; + if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) + qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; + + qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? + IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + +out_free: + kfree(outb); + +out: + mutex_unlock(&qp->mutex); + return err; +} + +struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_xrcd *xrcd; + int err; + + if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC)) + return ERR_PTR(-ENOSYS); + + xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL); + if (!xrcd) + return ERR_PTR(-ENOMEM); + + err = mlx5_core_xrcd_alloc(&dev->mdev, &xrcd->xrcdn); + if (err) { + kfree(xrcd); + return ERR_PTR(-ENOMEM); + } + + return &xrcd->ibxrcd; +} + +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) +{ + struct mlx5_ib_dev *dev = to_mdev(xrcd->device); + u32 xrcdn = to_mxrcd(xrcd)->xrcdn; + int err; + + err = mlx5_core_xrcd_dealloc(&dev->mdev, xrcdn); + if (err) { + mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); + return err; + } + + kfree(xrcd); + + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c new file mode 100644 index 0000000..84d297a --- /dev/null +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -0,0 +1,473 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/srq.h> +#include <linux/slab.h> +#include <rdma/ib_umem.h> + +#include "mlx5_ib.h" +#include "user.h" + +/* not supported currently */ +static int srq_signature; + +static void *get_wqe(struct mlx5_ib_srq *srq, int n) +{ + return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift); +} + +static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type) +{ + struct ib_event event; + struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq; + + if (ibsrq->event_handler) { + event.device = ibsrq->device; + event.element.srq = ibsrq; + switch (type) { + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + break; + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + event.event = IB_EVENT_SRQ_ERR; + break; + default: + pr_warn("mlx5_ib: Unexpected event type %d on SRQ %06x\n", + type, srq->srqn); + return; + } + + ibsrq->event_handler(&event, ibsrq->srq_context); + } +} + +static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, + struct mlx5_create_srq_mbox_in **in, + struct ib_udata *udata, int buf_size, int *inlen) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_create_srq ucmd; + int err; + int npages; + int page_shift; + int ncont; + u32 offset; + + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + mlx5_ib_dbg(dev, "failed copy udata\n"); + return -EFAULT; + } + srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); + + srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size, + 0, 0); + if (IS_ERR(srq->umem)) { + mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size); + err = PTR_ERR(srq->umem); + return err; + } + + mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, &npages, + &page_shift, &ncont, NULL); + err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, + &offset); + if (err) { + mlx5_ib_warn(dev, "bad offset\n"); + goto err_umem; + } + + *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; + *in = mlx5_vzalloc(*inlen); + if (!(*in)) { + err = -ENOMEM; + goto err_umem; + } + + mlx5_ib_populate_pas(dev, srq->umem, page_shift, (*in)->pas, 0); + + err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), + ucmd.db_addr, &srq->db); + if (err) { + mlx5_ib_dbg(dev, "map doorbell failed\n"); + goto err_in; + } + + (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; + (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26); + + return 0; + +err_in: + mlx5_vfree(*in); + +err_umem: + ib_umem_release(srq->umem); + + return err; +} + +static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, + struct mlx5_create_srq_mbox_in **in, int buf_size, + int *inlen) +{ + int err; + int i; + struct mlx5_wqe_srq_next_seg *next; + int page_shift; + int npages; + + err = mlx5_db_alloc(&dev->mdev, &srq->db); + if (err) { + mlx5_ib_warn(dev, "alloc dbell rec failed\n"); + return err; + } + + *srq->db.db = 0; + + if (mlx5_buf_alloc(&dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) { + mlx5_ib_dbg(dev, "buf alloc failed\n"); + err = -ENOMEM; + goto err_db; + } + page_shift = srq->buf.page_shift; + + srq->head = 0; + srq->tail = srq->msrq.max - 1; + srq->wqe_ctr = 0; + + for (i = 0; i < srq->msrq.max; i++) { + next = get_wqe(srq, i); + next->next_wqe_index = + cpu_to_be16((i + 1) & (srq->msrq.max - 1)); + } + + npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT)); + mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n", + buf_size, page_shift, srq->buf.npages, npages); + *inlen = sizeof(**in) + sizeof(*(*in)->pas) * npages; + *in = mlx5_vzalloc(*inlen); + if (!*in) { + err = -ENOMEM; + goto err_buf; + } + mlx5_fill_page_array(&srq->buf, (*in)->pas); + + srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL); + if (!srq->wrid) { + mlx5_ib_dbg(dev, "kmalloc failed %lu\n", + (unsigned long)(srq->msrq.max * sizeof(u64))); + err = -ENOMEM; + goto err_in; + } + srq->wq_sig = !!srq_signature; + + (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; + + return 0; + +err_in: + mlx5_vfree(*in); + +err_buf: + mlx5_buf_free(&dev->mdev, &srq->buf); + +err_db: + mlx5_db_free(&dev->mdev, &srq->db); + return err; +} + +static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq) +{ + mlx5_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); + ib_umem_release(srq->umem); +} + + +static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq) +{ + kfree(srq->wrid); + mlx5_buf_free(&dev->mdev, &srq->buf); + mlx5_db_free(&dev->mdev, &srq->db); +} + +struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_srq *srq; + int desc_size; + int buf_size; + int err; + struct mlx5_create_srq_mbox_in *uninitialized_var(in); + int uninitialized_var(inlen); + int is_xrc; + u32 flgs, xrcdn; + + /* Sanity check SRQ size before proceeding */ + if (init_attr->attr.max_wr >= dev->mdev.caps.max_srq_wqes) { + mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", + init_attr->attr.max_wr, + dev->mdev.caps.max_srq_wqes); + return ERR_PTR(-EINVAL); + } + + srq = kmalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + mutex_init(&srq->mutex); + spin_lock_init(&srq->lock); + srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1); + srq->msrq.max_gs = init_attr->attr.max_sge; + + desc_size = sizeof(struct mlx5_wqe_srq_next_seg) + + srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg); + desc_size = roundup_pow_of_two(desc_size); + desc_size = max_t(int, 32, desc_size); + srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) / + sizeof(struct mlx5_wqe_data_seg); + srq->msrq.wqe_shift = ilog2(desc_size); + buf_size = srq->msrq.max * desc_size; + mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n", + desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, + srq->msrq.max_avail_gather); + + if (pd->uobject) + err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen); + else + err = create_srq_kernel(dev, srq, &in, buf_size, &inlen); + + if (err) { + mlx5_ib_warn(dev, "create srq %s failed, err %d\n", + pd->uobject ? "user" : "kernel", err); + goto err_srq; + } + + is_xrc = (init_attr->srq_type == IB_SRQT_XRC); + in->ctx.state_log_sz = ilog2(srq->msrq.max); + flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24; + xrcdn = 0; + if (is_xrc) { + xrcdn = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; + in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(init_attr->ext.xrc.cq)->mcq.cqn); + } else if (init_attr->srq_type == IB_SRQT_BASIC) { + xrcdn = to_mxrcd(dev->devr.x0)->xrcdn; + in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(dev->devr.c0)->mcq.cqn); + } + + in->ctx.flags_xrcd = cpu_to_be32((flgs & 0xFF000000) | (xrcdn & 0xFFFFFF)); + + in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn); + in->ctx.db_record = cpu_to_be64(srq->db.dma); + err = mlx5_core_create_srq(&dev->mdev, &srq->msrq, in, inlen); + mlx5_vfree(in); + if (err) { + mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); + goto err_srq; + } + + mlx5_ib_dbg(dev, "create SRQ with srqn 0x%x\n", srq->msrq.srqn); + + srq->msrq.event = mlx5_ib_srq_event; + srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; + + if (pd->uobject) + if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) { + mlx5_ib_dbg(dev, "copy to user failed\n"); + err = -EFAULT; + goto err_core; + } + + init_attr->attr.max_wr = srq->msrq.max - 1; + + return &srq->ibsrq; + +err_core: + mlx5_core_destroy_srq(&dev->mdev, &srq->msrq); + if (pd->uobject) + destroy_srq_user(pd, srq); + else + destroy_srq_kernel(dev, srq); + +err_srq: + kfree(srq); + + return ERR_PTR(err); +} + +int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ibsrq->device); + struct mlx5_ib_srq *srq = to_msrq(ibsrq); + int ret; + + /* We don't support resizing SRQs yet */ + if (attr_mask & IB_SRQ_MAX_WR) + return -EINVAL; + + if (attr_mask & IB_SRQ_LIMIT) { + if (attr->srq_limit >= srq->msrq.max) + return -EINVAL; + + mutex_lock(&srq->mutex); + ret = mlx5_core_arm_srq(&dev->mdev, &srq->msrq, attr->srq_limit, 1); + mutex_unlock(&srq->mutex); + + if (ret) + return ret; + } + + return 0; +} + +int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibsrq->device); + struct mlx5_ib_srq *srq = to_msrq(ibsrq); + int ret; + struct mlx5_query_srq_mbox_out *out; + + out = kzalloc(sizeof(*out), GFP_KERNEL); + if (!out) + return -ENOMEM; + + ret = mlx5_core_query_srq(&dev->mdev, &srq->msrq, out); + if (ret) + goto out_box; + + srq_attr->srq_limit = be16_to_cpu(out->ctx.lwm); + srq_attr->max_wr = srq->msrq.max - 1; + srq_attr->max_sge = srq->msrq.max_gs; + +out_box: + kfree(out); + return ret; +} + +int mlx5_ib_destroy_srq(struct ib_srq *srq) +{ + struct mlx5_ib_dev *dev = to_mdev(srq->device); + struct mlx5_ib_srq *msrq = to_msrq(srq); + + mlx5_core_destroy_srq(&dev->mdev, &msrq->msrq); + + if (srq->uobject) { + mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); + ib_umem_release(msrq->umem); + } else { + kfree(msrq->wrid); + mlx5_buf_free(&dev->mdev, &msrq->buf); + mlx5_db_free(&dev->mdev, &msrq->db); + } + + kfree(srq); + return 0; +} + +void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index) +{ + struct mlx5_wqe_srq_next_seg *next; + + /* always called with interrupts disabled. */ + spin_lock(&srq->lock); + + next = get_wqe(srq, srq->tail); + next->next_wqe_index = cpu_to_be16(wqe_index); + srq->tail = wqe_index; + + spin_unlock(&srq->lock); +} + +int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mlx5_ib_srq *srq = to_msrq(ibsrq); + struct mlx5_wqe_srq_next_seg *next; + struct mlx5_wqe_data_seg *scat; + unsigned long flags; + int err = 0; + int nreq; + int i; + + spin_lock_irqsave(&srq->lock, flags); + + for (nreq = 0; wr; nreq++, wr = wr->next) { + if (unlikely(wr->num_sge > srq->msrq.max_gs)) { + err = -EINVAL; + *bad_wr = wr; + break; + } + + if (unlikely(srq->head == srq->tail)) { + err = -ENOMEM; + *bad_wr = wr; + break; + } + + srq->wrid[srq->head] = wr->wr_id; + + next = get_wqe(srq, srq->head); + srq->head = be16_to_cpu(next->next_wqe_index); + scat = (struct mlx5_wqe_data_seg *)(next + 1); + + for (i = 0; i < wr->num_sge; i++) { + scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length); + scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey); + scat[i].addr = cpu_to_be64(wr->sg_list[i].addr); + } + + if (i < srq->msrq.max_avail_gather) { + scat[i].byte_count = 0; + scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY); + scat[i].addr = 0; + } + } + + if (likely(nreq)) { + srq->wqe_ctr += nreq; + + /* Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + *srq->db.db = cpu_to_be32(srq->wqe_ctr); + } + + spin_unlock_irqrestore(&srq->lock, flags); + + return err; +} diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h new file mode 100644 index 0000000..a886de3 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/user.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_IB_USER_H +#define MLX5_IB_USER_H + +#include <linux/types.h> + +enum { + MLX5_QP_FLAG_SIGNATURE = 1 << 0, + MLX5_QP_FLAG_SCATTER_CQE = 1 << 1, +}; + +enum { + MLX5_SRQ_FLAG_SIGNATURE = 1 << 0, +}; + + +/* Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define MLX5_IB_UVERBS_ABI_VERSION 1 + +/* Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct mlx5_ib_alloc_ucontext_req { + __u32 total_num_uuars; + __u32 num_low_latency_uuars; +}; + +struct mlx5_ib_alloc_ucontext_resp { + __u32 qp_tab_size; + __u32 bf_reg_size; + __u32 tot_uuars; + __u32 cache_line_size; + __u16 max_sq_desc_sz; + __u16 max_rq_desc_sz; + __u32 max_send_wqebb; + __u32 max_recv_wr; + __u32 max_srq_recv_wr; + __u16 num_ports; + __u16 reserved; +}; + +struct mlx5_ib_alloc_pd_resp { + __u32 pdn; +}; + +struct mlx5_ib_create_cq { + __u64 buf_addr; + __u64 db_addr; + __u32 cqe_size; +}; + +struct mlx5_ib_create_cq_resp { + __u32 cqn; + __u32 reserved; +}; + +struct mlx5_ib_resize_cq { + __u64 buf_addr; +}; + +struct mlx5_ib_create_srq { + __u64 buf_addr; + __u64 db_addr; + __u32 flags; +}; + +struct mlx5_ib_create_srq_resp { + __u32 srqn; + __u32 reserved; +}; + +struct mlx5_ib_create_qp { + __u64 buf_addr; + __u64 db_addr; + __u32 sq_wqe_count; + __u32 rq_wqe_count; + __u32 rq_wqe_shift; + __u32 flags; +}; + +struct mlx5_ib_create_qp_resp { + __u32 uuar_index; +}; +#endif /* MLX5_IB_USER_H */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 48970af..d540180 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -42,8 +42,6 @@ #define OCRDMA_ROCE_DEV_VERSION "1.0.0" #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" -#define ocrdma_err(format, arg...) printk(KERN_ERR format, ##arg) - #define OCRDMA_MAX_AH 512 #define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME) @@ -97,7 +95,6 @@ struct ocrdma_queue_info { u16 id; /* qid, where to ring the doorbell. */ u16 head, tail; bool created; - atomic_t used; /* Number of valid elements in the queue */ }; struct ocrdma_eq { @@ -198,7 +195,6 @@ struct ocrdma_cq { struct ocrdma_ucontext *ucontext; dma_addr_t pa; u32 len; - atomic_t use_cnt; /* head of all qp's sq and rq for which cqes need to be flushed * by the software. @@ -210,7 +206,6 @@ struct ocrdma_pd { struct ib_pd ibpd; struct ocrdma_dev *dev; struct ocrdma_ucontext *uctx; - atomic_t use_cnt; u32 id; int num_dpp_qp; u32 dpp_page; @@ -241,16 +236,16 @@ struct ocrdma_srq { struct ib_srq ibsrq; struct ocrdma_dev *dev; u8 __iomem *db; + struct ocrdma_qp_hwq_info rq; + u64 *rqe_wr_id_tbl; + u32 *idx_bit_fields; + u32 bit_fields_len; + /* provide synchronization to multiple context(s) posting rqe */ spinlock_t q_lock ____cacheline_aligned; - struct ocrdma_qp_hwq_info rq; struct ocrdma_pd *pd; - atomic_t use_cnt; u32 id; - u64 *rqe_wr_id_tbl; - u32 *idx_bit_fields; - u32 bit_fields_len; }; struct ocrdma_qp { @@ -258,8 +253,6 @@ struct ocrdma_qp { struct ocrdma_dev *dev; u8 __iomem *sq_db; - /* provide synchronization to multiple context(s) posting wqe, rqe */ - spinlock_t q_lock ____cacheline_aligned; struct ocrdma_qp_hwq_info sq; struct { uint64_t wrid; @@ -269,6 +262,9 @@ struct ocrdma_qp { uint8_t rsvd[3]; } *wqe_wr_id_tbl; u32 max_inline_data; + + /* provide synchronization to multiple context(s) posting wqe, rqe */ + spinlock_t q_lock ____cacheline_aligned; struct ocrdma_cq *sq_cq; /* list maintained per CQ to flush SQ errors */ struct list_head sq_entry; @@ -296,10 +292,6 @@ struct ocrdma_qp { u8 *ird_q_va; }; -#define OCRDMA_GET_NUM_POSTED_SHIFT_VAL(qp) \ - (((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) && \ - (qp->id < 64)) ? 24 : 16) - struct ocrdma_hw_mr { struct ocrdma_dev *dev; u32 lkey; @@ -390,4 +382,43 @@ static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq) return container_of(ibsrq, struct ocrdma_srq, ibsrq); } + +static inline int ocrdma_get_num_posted_shift(struct ocrdma_qp *qp) +{ + return ((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY && + qp->id < 64) ? 24 : 16); +} + +static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe) +{ + int cqe_valid; + cqe_valid = le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID; + return ((cqe_valid == cq->phase) ? 1 : 0); +} + +static inline int is_cqe_for_sq(struct ocrdma_cqe *cqe) +{ + return (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_QTYPE) ? 0 : 1; +} + +static inline int is_cqe_invalidated(struct ocrdma_cqe *cqe) +{ + return (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_INVALIDATE) ? 1 : 0; +} + +static inline int is_cqe_imm(struct ocrdma_cqe *cqe) +{ + return (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_IMM) ? 1 : 0; +} + +static inline int is_cqe_wr_imm(struct ocrdma_cqe *cqe) +{ + return (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_WRITE_IMM) ? 1 : 0; +} + + #endif diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 71942af..0965278 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -128,7 +128,6 @@ static inline struct ocrdma_mqe *ocrdma_get_mqe(struct ocrdma_dev *dev) static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev) { dev->mq.sq.head = (dev->mq.sq.head + 1) & (OCRDMA_MQ_LEN - 1); - atomic_inc(&dev->mq.sq.used); } static inline void *ocrdma_get_mqe_rsp(struct ocrdma_dev *dev) @@ -564,32 +563,19 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev, memset(cmd, 0, sizeof(*cmd)); num_pages = PAGES_4K_SPANNED(mq->va, mq->size); - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { - ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ, - OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); - cmd->v0.pages = num_pages; - cmd->v0.async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID; - cmd->v0.async_cqid_valid = (cq->id << 1); - cmd->v0.cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) << - OCRDMA_CREATE_MQ_RING_SIZE_SHIFT); - cmd->v0.cqid_ringsize |= - (cq->id << OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT); - cmd->v0.valid = OCRDMA_CREATE_MQ_VALID; - pa = &cmd->v0.pa[0]; - } else { - ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ_EXT, - OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); - cmd->req.rsvd_version = 1; - cmd->v1.cqid_pages = num_pages; - cmd->v1.cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT); - cmd->v1.async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID; - cmd->v1.async_event_bitmap = Bit(20); - cmd->v1.async_cqid_ringsize = cq->id; - cmd->v1.async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) << - OCRDMA_CREATE_MQ_RING_SIZE_SHIFT); - cmd->v1.valid = OCRDMA_CREATE_MQ_VALID; - pa = &cmd->v1.pa[0]; - } + ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ_EXT, + OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); + cmd->req.rsvd_version = 1; + cmd->cqid_pages = num_pages; + cmd->cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT); + cmd->async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID; + cmd->async_event_bitmap = Bit(20); + cmd->async_cqid_ringsize = cq->id; + cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) << + OCRDMA_CREATE_MQ_RING_SIZE_SHIFT); + cmd->valid = OCRDMA_CREATE_MQ_VALID; + pa = &cmd->pa[0]; + ocrdma_build_q_pages(pa, num_pages, mq->dma, PAGE_SIZE_4K); status = be_roce_mcc_cmd(dev->nic_info.netdev, cmd, sizeof(*cmd), NULL, NULL); @@ -745,7 +731,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, qp_event = 0; srq_event = 0; dev_event = 0; - ocrdma_err("%s() unknown type=0x%x\n", __func__, type); + pr_err("%s() unknown type=0x%x\n", __func__, type); break; } @@ -775,8 +761,8 @@ static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe) if (evt_code == OCRDMA_ASYNC_EVE_CODE) ocrdma_dispatch_ibevent(dev, cqe); else - ocrdma_err("%s(%d) invalid evt code=0x%x\n", - __func__, dev->id, evt_code); + pr_err("%s(%d) invalid evt code=0x%x\n", __func__, + dev->id, evt_code); } static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe) @@ -790,8 +776,8 @@ static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe) dev->mqe_ctx.cmd_done = true; wake_up(&dev->mqe_ctx.cmd_wait); } else - ocrdma_err("%s() cqe for invalid tag0x%x.expected=0x%x\n", - __func__, cqe->tag_lo, dev->mqe_ctx.tag); + pr_err("%s() cqe for invalid tag0x%x.expected=0x%x\n", + __func__, cqe->tag_lo, dev->mqe_ctx.tag); } static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id) @@ -810,7 +796,7 @@ static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id) else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK) ocrdma_process_mcqe(dev, cqe); else - ocrdma_err("%s() cqe->compl is not set.\n", __func__); + pr_err("%s() cqe->compl is not set.\n", __func__); memset(cqe, 0, sizeof(struct ocrdma_mcqe)); ocrdma_mcq_inc_tail(dev); } @@ -869,7 +855,7 @@ static void ocrdma_qp_cq_handler(struct ocrdma_dev *dev, u16 cq_idx) cq = dev->cq_tbl[cq_idx]; if (cq == NULL) { - ocrdma_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx); + pr_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx); return; } spin_lock_irqsave(&cq->cq_lock, flags); @@ -971,7 +957,7 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe) rsp = ocrdma_get_mqe_rsp(dev); ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe))); if (cqe_status || ext_status) { - ocrdma_err + pr_err ("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n", __func__, (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >> @@ -1353,8 +1339,8 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, if (dpp_cq) return -EINVAL; if (entries > dev->attr.max_cqe) { - ocrdma_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n", - __func__, dev->id, dev->attr.max_cqe, entries); + pr_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n", + __func__, dev->id, dev->attr.max_cqe, entries); return -EINVAL; } if (dpp_cq && (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY)) @@ -1621,7 +1607,7 @@ int ocrdma_reg_mr(struct ocrdma_dev *dev, status = ocrdma_mbx_reg_mr(dev, hwmr, pdid, cur_pbl_cnt, hwmr->pbe_size, last); if (status) { - ocrdma_err("%s() status=%d\n", __func__, status); + pr_err("%s() status=%d\n", __func__, status); return status; } /* if there is no more pbls to register then exit. */ @@ -1644,7 +1630,7 @@ int ocrdma_reg_mr(struct ocrdma_dev *dev, break; } if (status) - ocrdma_err("%s() err. status=%d\n", __func__, status); + pr_err("%s() err. status=%d\n", __func__, status); return status; } @@ -1841,8 +1827,8 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd, status = ocrdma_build_q_conf(&max_wqe_allocated, dev->attr.wqe_size, &hw_pages, &hw_page_size); if (status) { - ocrdma_err("%s() req. max_send_wr=0x%x\n", __func__, - max_wqe_allocated); + pr_err("%s() req. max_send_wr=0x%x\n", __func__, + max_wqe_allocated); return -EINVAL; } qp->sq.max_cnt = max_wqe_allocated; @@ -1891,8 +1877,8 @@ static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd, status = ocrdma_build_q_conf(&max_rqe_allocated, dev->attr.rqe_size, &hw_pages, &hw_page_size); if (status) { - ocrdma_err("%s() req. max_recv_wr=0x%x\n", __func__, - attrs->cap.max_recv_wr + 1); + pr_err("%s() req. max_recv_wr=0x%x\n", __func__, + attrs->cap.max_recv_wr + 1); return status; } qp->rq.max_cnt = max_rqe_allocated; @@ -1900,7 +1886,7 @@ static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd, qp->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL); if (!qp->rq.va) - return status; + return -ENOMEM; memset(qp->rq.va, 0, len); qp->rq.pa = pa; qp->rq.len = len; @@ -2087,10 +2073,10 @@ mbx_err: if (qp->rq.va) dma_free_coherent(&pdev->dev, qp->rq.len, qp->rq.va, qp->rq.pa); rq_err: - ocrdma_err("%s(%d) rq_err\n", __func__, dev->id); + pr_err("%s(%d) rq_err\n", __func__, dev->id); dma_free_coherent(&pdev->dev, qp->sq.len, qp->sq.va, qp->sq.pa); sq_err: - ocrdma_err("%s(%d) sq_err\n", __func__, dev->id); + pr_err("%s(%d) sq_err\n", __func__, dev->id); kfree(cmd); return status; } @@ -2127,7 +2113,7 @@ int ocrdma_resolve_dgid(struct ocrdma_dev *dev, union ib_gid *dgid, else if (rdma_link_local_addr(&in6)) rdma_get_ll_mac(&in6, mac_addr); else { - ocrdma_err("%s() fail to resolve mac_addr.\n", __func__); + pr_err("%s() fail to resolve mac_addr.\n", __func__); return -EINVAL; } return 0; @@ -2362,8 +2348,8 @@ int ocrdma_mbx_create_srq(struct ocrdma_srq *srq, dev->attr.rqe_size, &hw_pages, &hw_page_size); if (status) { - ocrdma_err("%s() req. max_wr=0x%x\n", __func__, - srq_attr->attr.max_wr); + pr_err("%s() req. max_wr=0x%x\n", __func__, + srq_attr->attr.max_wr); status = -EINVAL; goto ret; } @@ -2614,7 +2600,7 @@ mq_err: ocrdma_destroy_qp_eqs(dev); qpeq_err: ocrdma_destroy_eq(dev, &dev->meq); - ocrdma_err("%s() status=%d\n", __func__, status); + pr_err("%s() status=%d\n", __func__, status); return status; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 48928c8..ded416f1 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -378,7 +378,7 @@ static int ocrdma_alloc_resources(struct ocrdma_dev *dev) spin_lock_init(&dev->flush_q_lock); return 0; alloc_err: - ocrdma_err("%s(%d) error.\n", __func__, dev->id); + pr_err("%s(%d) error.\n", __func__, dev->id); return -ENOMEM; } @@ -396,7 +396,7 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev)); if (!dev) { - ocrdma_err("Unable to allocate ib device\n"); + pr_err("Unable to allocate ib device\n"); return NULL; } dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL); @@ -437,7 +437,7 @@ init_err: idr_err: kfree(dev->mbx_cmd); ib_dealloc_device(&dev->ibdev); - ocrdma_err("%s() leaving. ret=%d\n", __func__, status); + pr_err("%s() leaving. ret=%d\n", __func__, status); return NULL; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index c75cbdf..36b062d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -608,16 +608,8 @@ enum { OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = Bit(0) }; -struct ocrdma_create_mq_v0 { - u32 pages; - u32 cqid_ringsize; - u32 valid; - u32 async_cqid_valid; - u32 rsvd; - struct ocrdma_pa pa[8]; -} __packed; - -struct ocrdma_create_mq_v1 { +struct ocrdma_create_mq_req { + struct ocrdma_mbx_hdr req; u32 cqid_pages; u32 async_event_bitmap; u32 async_cqid_ringsize; @@ -627,14 +619,6 @@ struct ocrdma_create_mq_v1 { struct ocrdma_pa pa[8]; } __packed; -struct ocrdma_create_mq_req { - struct ocrdma_mbx_hdr req; - union { - struct ocrdma_create_mq_v0 v0; - struct ocrdma_create_mq_v1 v1; - }; -} __packed; - struct ocrdma_create_mq_rsp { struct ocrdma_mbx_rsp rsp; u32 id; @@ -1550,21 +1534,6 @@ struct ocrdma_cqe { u32 flags_status_srcqpn; /* w3 */ } __packed; -#define is_cqe_valid(cq, cqe) \ - (((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID)\ - == cq->phase) ? 1 : 0) -#define is_cqe_for_sq(cqe) \ - ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_QTYPE) ? 0 : 1) -#define is_cqe_for_rq(cqe) \ - ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_QTYPE) ? 1 : 0) -#define is_cqe_invalidated(cqe) \ - ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_INVALIDATE) ? \ - 1 : 0) -#define is_cqe_imm(cqe) \ - ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_IMM) ? 1 : 0) -#define is_cqe_wr_imm(cqe) \ - ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_WRITE_IMM) ? 1 : 0) - struct ocrdma_sge { u32 addr_hi; u32 addr_lo; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index b29a424..dcfbab1 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -114,8 +114,8 @@ int ocrdma_query_port(struct ib_device *ibdev, dev = get_ocrdma_dev(ibdev); if (port > 1) { - ocrdma_err("%s(%d) invalid_port=0x%x\n", __func__, - dev->id, port); + pr_err("%s(%d) invalid_port=0x%x\n", __func__, + dev->id, port); return -EINVAL; } netdev = dev->nic_info.netdev; @@ -155,8 +155,7 @@ int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask, dev = get_ocrdma_dev(ibdev); if (port > 1) { - ocrdma_err("%s(%d) invalid_port=0x%x\n", __func__, - dev->id, port); + pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port); return -EINVAL; } return 0; @@ -398,7 +397,6 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev, kfree(pd); return ERR_PTR(status); } - atomic_set(&pd->use_cnt, 0); if (udata && context) { status = ocrdma_copy_pd_uresp(pd, context, udata); @@ -419,12 +417,6 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd) int status; u64 usr_db; - if (atomic_read(&pd->use_cnt)) { - ocrdma_err("%s(%d) pd=0x%x is in use.\n", - __func__, dev->id, pd->id); - status = -EFAULT; - goto dealloc_err; - } status = ocrdma_mbx_dealloc_pd(dev, pd); if (pd->uctx) { u64 dpp_db = dev->nic_info.dpp_unmapped_addr + @@ -436,7 +428,6 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd) ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size); } kfree(pd); -dealloc_err: return status; } @@ -450,8 +441,8 @@ static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd, struct ocrdma_dev *dev = pd->dev; if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) { - ocrdma_err("%s(%d) leaving err, invalid access rights\n", - __func__, dev->id); + pr_err("%s(%d) leaving err, invalid access rights\n", + __func__, dev->id); return ERR_PTR(-EINVAL); } @@ -474,7 +465,6 @@ static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd, return ERR_PTR(-ENOMEM); } mr->pd = pd; - atomic_inc(&pd->use_cnt); mr->ibmr.lkey = mr->hwmr.lkey; if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) mr->ibmr.rkey = mr->hwmr.lkey; @@ -664,7 +654,6 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, if (status) goto mbx_err; mr->pd = pd; - atomic_inc(&pd->use_cnt); mr->ibmr.lkey = mr->hwmr.lkey; if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) mr->ibmr.rkey = mr->hwmr.lkey; @@ -689,7 +678,6 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr) if (mr->hwmr.fr_mr == 0) ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); - atomic_dec(&mr->pd->use_cnt); /* it could be user registered memory. */ if (mr->umem) ib_umem_release(mr->umem); @@ -714,8 +702,8 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_cq *cq, struct ib_udata *udata, uresp.phase_change = cq->phase_change ? 1 : 0; status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (status) { - ocrdma_err("%s(%d) copy error cqid=0x%x.\n", - __func__, cq->dev->id, cq->id); + pr_err("%s(%d) copy error cqid=0x%x.\n", + __func__, cq->dev->id, cq->id); goto err; } uctx = get_ocrdma_ucontext(ib_ctx); @@ -752,7 +740,6 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, spin_lock_init(&cq->cq_lock); spin_lock_init(&cq->comp_handler_lock); - atomic_set(&cq->use_cnt, 0); INIT_LIST_HEAD(&cq->sq_head); INIT_LIST_HEAD(&cq->rq_head); cq->dev = dev; @@ -799,9 +786,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq) struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); struct ocrdma_dev *dev = cq->dev; - if (atomic_read(&cq->use_cnt)) - return -EINVAL; - status = ocrdma_mbx_destroy_cq(dev, cq); if (cq->ucontext) { @@ -837,57 +821,56 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, if (attrs->qp_type != IB_QPT_GSI && attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_UD) { - ocrdma_err("%s(%d) unsupported qp type=0x%x requested\n", - __func__, dev->id, attrs->qp_type); + pr_err("%s(%d) unsupported qp type=0x%x requested\n", + __func__, dev->id, attrs->qp_type); return -EINVAL; } if (attrs->cap.max_send_wr > dev->attr.max_wqe) { - ocrdma_err("%s(%d) unsupported send_wr=0x%x requested\n", - __func__, dev->id, attrs->cap.max_send_wr); - ocrdma_err("%s(%d) supported send_wr=0x%x\n", - __func__, dev->id, dev->attr.max_wqe); + pr_err("%s(%d) unsupported send_wr=0x%x requested\n", + __func__, dev->id, attrs->cap.max_send_wr); + pr_err("%s(%d) supported send_wr=0x%x\n", + __func__, dev->id, dev->attr.max_wqe); return -EINVAL; } if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) { - ocrdma_err("%s(%d) unsupported recv_wr=0x%x requested\n", - __func__, dev->id, attrs->cap.max_recv_wr); - ocrdma_err("%s(%d) supported recv_wr=0x%x\n", - __func__, dev->id, dev->attr.max_rqe); + pr_err("%s(%d) unsupported recv_wr=0x%x requested\n", + __func__, dev->id, attrs->cap.max_recv_wr); + pr_err("%s(%d) supported recv_wr=0x%x\n", + __func__, dev->id, dev->attr.max_rqe); return -EINVAL; } if (attrs->cap.max_inline_data > dev->attr.max_inline_data) { - ocrdma_err("%s(%d) unsupported inline data size=0x%x" - " requested\n", __func__, dev->id, - attrs->cap.max_inline_data); - ocrdma_err("%s(%d) supported inline data size=0x%x\n", - __func__, dev->id, dev->attr.max_inline_data); + pr_err("%s(%d) unsupported inline data size=0x%x requested\n", + __func__, dev->id, attrs->cap.max_inline_data); + pr_err("%s(%d) supported inline data size=0x%x\n", + __func__, dev->id, dev->attr.max_inline_data); return -EINVAL; } if (attrs->cap.max_send_sge > dev->attr.max_send_sge) { - ocrdma_err("%s(%d) unsupported send_sge=0x%x requested\n", - __func__, dev->id, attrs->cap.max_send_sge); - ocrdma_err("%s(%d) supported send_sge=0x%x\n", - __func__, dev->id, dev->attr.max_send_sge); + pr_err("%s(%d) unsupported send_sge=0x%x requested\n", + __func__, dev->id, attrs->cap.max_send_sge); + pr_err("%s(%d) supported send_sge=0x%x\n", + __func__, dev->id, dev->attr.max_send_sge); return -EINVAL; } if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) { - ocrdma_err("%s(%d) unsupported recv_sge=0x%x requested\n", - __func__, dev->id, attrs->cap.max_recv_sge); - ocrdma_err("%s(%d) supported recv_sge=0x%x\n", - __func__, dev->id, dev->attr.max_recv_sge); + pr_err("%s(%d) unsupported recv_sge=0x%x requested\n", + __func__, dev->id, attrs->cap.max_recv_sge); + pr_err("%s(%d) supported recv_sge=0x%x\n", + __func__, dev->id, dev->attr.max_recv_sge); return -EINVAL; } /* unprivileged user space cannot create special QP */ if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { - ocrdma_err + pr_err ("%s(%d) Userspace can't create special QPs of type=0x%x\n", __func__, dev->id, attrs->qp_type); return -EINVAL; } /* allow creating only one GSI type of QP */ if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) { - ocrdma_err("%s(%d) GSI special QPs already created.\n", - __func__, dev->id); + pr_err("%s(%d) GSI special QPs already created.\n", + __func__, dev->id); return -EINVAL; } /* verify consumer QPs are not trying to use GSI QP's CQ */ @@ -896,8 +879,8 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) || (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) || (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) { - ocrdma_err("%s(%d) Consumer QP cannot use GSI CQs.\n", - __func__, dev->id); + pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n", + __func__, dev->id); return -EINVAL; } } @@ -949,7 +932,7 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp, } status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (status) { - ocrdma_err("%s(%d) user copy error.\n", __func__, dev->id); + pr_err("%s(%d) user copy error.\n", __func__, dev->id); goto err; } status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0], @@ -1023,15 +1006,6 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp, qp->state = OCRDMA_QPS_RST; } -static void ocrdma_set_qp_use_cnt(struct ocrdma_qp *qp, struct ocrdma_pd *pd) -{ - atomic_inc(&pd->use_cnt); - atomic_inc(&qp->sq_cq->use_cnt); - atomic_inc(&qp->rq_cq->use_cnt); - if (qp->srq) - atomic_inc(&qp->srq->use_cnt); - qp->ibqp.qp_num = qp->id; -} static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev, struct ib_qp_init_attr *attrs) @@ -1099,7 +1073,7 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd, goto cpy_err; } ocrdma_store_gsi_qp_cq(dev, attrs); - ocrdma_set_qp_use_cnt(qp, pd); + qp->ibqp.qp_num = qp->id; mutex_unlock(&dev->dev_lock); return &qp->ibqp; @@ -1112,7 +1086,7 @@ mbx_err: kfree(qp->wqe_wr_id_tbl); kfree(qp->rqe_wr_id_tbl); kfree(qp); - ocrdma_err("%s(%d) error=%d\n", __func__, dev->id, status); + pr_err("%s(%d) error=%d\n", __func__, dev->id, status); gen_err: return ERR_PTR(status); } @@ -1162,10 +1136,10 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, spin_unlock_irqrestore(&qp->q_lock, flags); if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) { - ocrdma_err("%s(%d) invalid attribute mask=0x%x specified for " - "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n", - __func__, dev->id, attr_mask, qp->id, ibqp->qp_type, - old_qps, new_qps); + pr_err("%s(%d) invalid attribute mask=0x%x specified for\n" + "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n", + __func__, dev->id, attr_mask, qp->id, ibqp->qp_type, + old_qps, new_qps); goto param_err; } @@ -1475,11 +1449,6 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp) ocrdma_del_flush_qp(qp); - atomic_dec(&qp->pd->use_cnt); - atomic_dec(&qp->sq_cq->use_cnt); - atomic_dec(&qp->rq_cq->use_cnt); - if (qp->srq) - atomic_dec(&qp->srq->use_cnt); kfree(qp->wqe_wr_id_tbl); kfree(qp->rqe_wr_id_tbl); kfree(qp); @@ -1565,14 +1534,12 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd, goto arm_err; } - atomic_set(&srq->use_cnt, 0); if (udata) { status = ocrdma_copy_srq_uresp(srq, udata); if (status) goto arm_err; } - atomic_inc(&pd->use_cnt); return &srq->ibsrq; arm_err: @@ -1618,18 +1585,12 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq) srq = get_ocrdma_srq(ibsrq); dev = srq->dev; - if (atomic_read(&srq->use_cnt)) { - ocrdma_err("%s(%d) err, srq=0x%x in use\n", - __func__, dev->id, srq->id); - return -EAGAIN; - } status = ocrdma_mbx_destroy_srq(dev, srq); if (srq->pd->uctx) ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, srq->rq.len); - atomic_dec(&srq->pd->use_cnt); kfree(srq->idx_bit_fields); kfree(srq->rqe_wr_id_tbl); kfree(srq); @@ -1677,9 +1638,9 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp, { if (wr->send_flags & IB_SEND_INLINE) { if (wr->sg_list[0].length > qp->max_inline_data) { - ocrdma_err("%s() supported_len=0x%x," - " unspported len req=0x%x\n", __func__, - qp->max_inline_data, wr->sg_list[0].length); + pr_err("%s() supported_len=0x%x,\n" + " unspported len req=0x%x\n", __func__, + qp->max_inline_data, wr->sg_list[0].length); return -EINVAL; } memcpy(sge, @@ -1773,12 +1734,14 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qp->q_lock, flags); if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) { spin_unlock_irqrestore(&qp->q_lock, flags); + *bad_wr = wr; return -EINVAL; } while (wr) { if (ocrdma_hwq_free_cnt(&qp->sq) == 0 || wr->num_sge > qp->sq.max_sges) { + *bad_wr = wr; status = -ENOMEM; break; } @@ -1856,7 +1819,7 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, static void ocrdma_ring_rq_db(struct ocrdma_qp *qp) { - u32 val = qp->rq.dbid | (1 << OCRDMA_GET_NUM_POSTED_SHIFT_VAL(qp)); + u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp)); iowrite32(val, qp->rq_db); } @@ -2094,8 +2057,8 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc, break; default: ibwc->status = IB_WC_GENERAL_ERR; - ocrdma_err("%s() invalid opcode received = 0x%x\n", - __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK); + pr_err("%s() invalid opcode received = 0x%x\n", + __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK); break; }; } diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig index 1e603a3..d03ca4c 100644 --- a/drivers/infiniband/hw/qib/Kconfig +++ b/drivers/infiniband/hw/qib/Kconfig @@ -5,3 +5,11 @@ config INFINIBAND_QIB This is a low-level driver for Intel PCIe QLE InfiniBand host channel adapters. This driver does not support the Intel HyperTransport card (model QHT7140). + +config INFINIBAND_QIB_DCA + bool "QIB DCA support" + depends on INFINIBAND_QIB && DCA && SMP && GENERIC_HARDIRQS && !(INFINIBAND_QIB=y && DCA=m) + default y + ---help--- + Setting this enables DCA support on some Intel chip sets + with the iba7322 HCA. diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile index f12d7bb..57f8103 100644 --- a/drivers/infiniband/hw/qib/Makefile +++ b/drivers/infiniband/hw/qib/Makefile @@ -13,3 +13,4 @@ ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o +ib_qib-$(CONFIG_DEBUG_FS) += qib_debugfs.o diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 4d11575..4a9af79 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1,7 +1,7 @@ #ifndef _QIB_KERNEL_H #define _QIB_KERNEL_H /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * @@ -51,6 +51,7 @@ #include <linux/completion.h> #include <linux/kref.h> #include <linux/sched.h> +#include <linux/kthread.h> #include "qib_common.h" #include "qib_verbs.h" @@ -114,6 +115,11 @@ struct qib_eep_log_mask { /* * Below contains all data related to a single context (formerly called port). */ + +#ifdef CONFIG_DEBUG_FS +struct qib_opcode_stats_perctx; +#endif + struct qib_ctxtdata { void **rcvegrbuf; dma_addr_t *rcvegrbuf_phys; @@ -154,6 +160,8 @@ struct qib_ctxtdata { */ /* instead of calculating it */ unsigned ctxt; + /* local node of context */ + int node_id; /* non-zero if ctxt is being shared. */ u16 subctxt_cnt; /* non-zero if ctxt is being shared. */ @@ -222,12 +230,15 @@ struct qib_ctxtdata { u8 redirect_seq_cnt; /* ctxt rcvhdrq head offset */ u32 head; - u32 pkt_count; /* lookaside fields */ struct qib_qp *lookaside_qp; u32 lookaside_qpn; /* QPs waiting for context processing */ struct list_head qp_wait_list; +#ifdef CONFIG_DEBUG_FS + /* verbs stats per CTX */ + struct qib_opcode_stats_perctx *opstats; +#endif }; struct qib_sge_state; @@ -428,9 +439,19 @@ struct qib_verbs_txreq { #define ACTIVITY_TIMER 5 #define MAX_NAME_SIZE 64 + +#ifdef CONFIG_INFINIBAND_QIB_DCA +struct qib_irq_notify; +#endif + struct qib_msix_entry { struct msix_entry msix; void *arg; +#ifdef CONFIG_INFINIBAND_QIB_DCA + int dca; + int rcv; + struct qib_irq_notify *notifier; +#endif char name[MAX_NAME_SIZE]; cpumask_var_t mask; }; @@ -828,6 +849,9 @@ struct qib_devdata { struct qib_ctxtdata *); void (*f_writescratch)(struct qib_devdata *, u32); int (*f_tempsense_rd)(struct qib_devdata *, int regnum); +#ifdef CONFIG_INFINIBAND_QIB_DCA + int (*f_notify_dca)(struct qib_devdata *, unsigned long event); +#endif char *boardname; /* human readable board info */ @@ -1075,6 +1099,10 @@ struct qib_devdata { u16 psxmitwait_check_rate; /* high volume overflow errors defered to tasklet */ struct tasklet_struct error_tasklet; + /* per device cq worker */ + struct kthread_worker *worker; + + int assigned_node_id; /* NUMA node closest to HCA */ }; /* hol_state values */ @@ -1154,7 +1182,7 @@ int qib_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *); int qib_setup_eagerbufs(struct qib_ctxtdata *); void qib_set_ctxtcnt(struct qib_devdata *); int qib_create_ctxts(struct qib_devdata *dd); -struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32); +struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int); void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *); @@ -1320,7 +1348,7 @@ static inline int __qib_sdma_running(struct qib_pportdata *ppd) return ppd->sdma_state.current_state == qib_sdma_state_s99_running; } int qib_sdma_running(struct qib_pportdata *); - +void dump_sdma_state(struct qib_pportdata *ppd); void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); @@ -1445,6 +1473,7 @@ extern unsigned qib_n_krcv_queues; extern unsigned qib_sdma_fetch_arb; extern unsigned qib_compat_ddr_negotiate; extern int qib_special_trigger; +extern unsigned qib_numa_aware; extern struct mutex qib_mutex; @@ -1474,27 +1503,23 @@ extern struct mutex qib_mutex; * first to avoid possible serial port delays from printk. */ #define qib_early_err(dev, fmt, ...) \ - do { \ - dev_err(dev, fmt, ##__VA_ARGS__); \ - } while (0) + dev_err(dev, fmt, ##__VA_ARGS__) #define qib_dev_err(dd, fmt, ...) \ - do { \ - dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ - qib_get_unit_name((dd)->unit), ##__VA_ARGS__); \ - } while (0) + dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ + qib_get_unit_name((dd)->unit), ##__VA_ARGS__) + +#define qib_dev_warn(dd, fmt, ...) \ + dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \ + qib_get_unit_name((dd)->unit), ##__VA_ARGS__) #define qib_dev_porterr(dd, port, fmt, ...) \ - do { \ - dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ - qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ - ##__VA_ARGS__); \ - } while (0) + dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ + qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ + ##__VA_ARGS__) #define qib_devinfo(pcidev, fmt, ...) \ - do { \ - dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__); \ - } while (0) + dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__) /* * this is used for formatting hw error messages... diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h index d39e018..4f255b7 100644 --- a/drivers/infiniband/hw/qib/qib_common.h +++ b/drivers/infiniband/hw/qib/qib_common.h @@ -279,7 +279,7 @@ struct qib_base_info { * may not be implemented; the user code must deal with this if it * cares, or it must abort after initialization reports the difference. */ -#define QIB_USER_SWMINOR 11 +#define QIB_USER_SWMINOR 12 #define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR) diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c index 5246aa4..ab4e11c 100644 --- a/drivers/infiniband/hw/qib/qib_cq.c +++ b/drivers/infiniband/hw/qib/qib_cq.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -34,8 +35,10 @@ #include <linux/err.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/kthread.h> #include "qib_verbs.h" +#include "qib.h" /** * qib_cq_enter - add a new entry to the completion queue @@ -102,13 +105,18 @@ void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited) if (cq->notify == IB_CQ_NEXT_COMP || (cq->notify == IB_CQ_SOLICITED && (solicited || entry->status != IB_WC_SUCCESS))) { - cq->notify = IB_CQ_NONE; - cq->triggered++; + struct kthread_worker *worker; /* * This will cause send_complete() to be called in * another thread. */ - queue_work(qib_cq_wq, &cq->comptask); + smp_rmb(); + worker = cq->dd->worker; + if (likely(worker)) { + cq->notify = IB_CQ_NONE; + cq->triggered++; + queue_kthread_work(worker, &cq->comptask); + } } spin_unlock_irqrestore(&cq->lock, flags); @@ -163,7 +171,7 @@ bail: return npolled; } -static void send_complete(struct work_struct *work) +static void send_complete(struct kthread_work *work) { struct qib_cq *cq = container_of(work, struct qib_cq, comptask); @@ -287,11 +295,12 @@ struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, * The number of entries should be >= the number requested or return * an error. */ + cq->dd = dd_from_dev(dev); cq->ibcq.cqe = entries; cq->notify = IB_CQ_NONE; cq->triggered = 0; spin_lock_init(&cq->lock); - INIT_WORK(&cq->comptask, send_complete); + init_kthread_work(&cq->comptask, send_complete); wc->head = 0; wc->tail = 0; cq->queue = wc; @@ -323,7 +332,7 @@ int qib_destroy_cq(struct ib_cq *ibcq) struct qib_ibdev *dev = to_idev(ibcq->device); struct qib_cq *cq = to_icq(ibcq); - flush_work(&cq->comptask); + flush_kthread_work(&cq->comptask); spin_lock(&dev->n_cqs_lock); dev->n_cqs_allocated--; spin_unlock(&dev->n_cqs_lock); @@ -483,3 +492,49 @@ bail_free: bail: return ret; } + +int qib_cq_init(struct qib_devdata *dd) +{ + int ret = 0; + int cpu; + struct task_struct *task; + + if (dd->worker) + return 0; + dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL); + if (!dd->worker) + return -ENOMEM; + init_kthread_worker(dd->worker); + task = kthread_create_on_node( + kthread_worker_fn, + dd->worker, + dd->assigned_node_id, + "qib_cq%d", dd->unit); + if (IS_ERR(task)) + goto task_fail; + cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id)); + kthread_bind(task, cpu); + wake_up_process(task); +out: + return ret; +task_fail: + ret = PTR_ERR(task); + kfree(dd->worker); + dd->worker = NULL; + goto out; +} + +void qib_cq_exit(struct qib_devdata *dd) +{ + struct kthread_worker *worker; + + worker = dd->worker; + if (!worker) + return; + /* blocks future queuing from send_complete() */ + dd->worker = NULL; + smp_wmb(); + flush_kthread_worker(worker); + kthread_stop(worker->task); + kfree(worker); +} diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c new file mode 100644 index 0000000..799a0c3 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -0,0 +1,283 @@ +#ifdef CONFIG_DEBUG_FS +/* + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/kernel.h> +#include <linux/export.h> + +#include "qib.h" +#include "qib_verbs.h" +#include "qib_debugfs.h" + +static struct dentry *qib_dbg_root; + +#define DEBUGFS_FILE(name) \ +static const struct seq_operations _##name##_seq_ops = { \ + .start = _##name##_seq_start, \ + .next = _##name##_seq_next, \ + .stop = _##name##_seq_stop, \ + .show = _##name##_seq_show \ +}; \ +static int _##name##_open(struct inode *inode, struct file *s) \ +{ \ + struct seq_file *seq; \ + int ret; \ + ret = seq_open(s, &_##name##_seq_ops); \ + if (ret) \ + return ret; \ + seq = s->private_data; \ + seq->private = inode->i_private; \ + return 0; \ +} \ +static const struct file_operations _##name##_file_ops = { \ + .owner = THIS_MODULE, \ + .open = _##name##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = seq_release \ +}; + +#define DEBUGFS_FILE_CREATE(name) \ +do { \ + struct dentry *ent; \ + ent = debugfs_create_file(#name , 0400, ibd->qib_ibdev_dbg, \ + ibd, &_##name##_file_ops); \ + if (!ent) \ + pr_warn("create of " #name " failed\n"); \ +} while (0) + +static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_opcode_stats_perctx *opstats; + + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct qib_opcode_stats_perctx *opstats; + + ++*pos; + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + + +static void _opcode_stats_seq_stop(struct seq_file *s, void *v) +{ + /* nothing allocated */ +} + +static int _opcode_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos = v; + loff_t i = *spos, j; + u64 n_packets = 0, n_bytes = 0; + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + for (j = 0; j < dd->first_user_ctxt; j++) { + if (!dd->rcd[j]) + continue; + n_packets += dd->rcd[j]->opstats->stats[i].n_packets; + n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes; + } + if (!n_packets && !n_bytes) + return SEQ_SKIP; + seq_printf(s, "%02llx %llu/%llu\n", i, + (unsigned long long) n_packets, + (unsigned long long) n_bytes); + + return 0; +} + +DEBUGFS_FILE(opcode_stats) + +static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (!*pos) + return SEQ_START_TOKEN; + if (*pos >= dd->first_user_ctxt) + return NULL; + return pos; +} + +static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (v == SEQ_START_TOKEN) + return pos; + + ++*pos; + if (*pos >= dd->first_user_ctxt) + return NULL; + return pos; +} + +static void _ctx_stats_seq_stop(struct seq_file *s, void *v) +{ + /* nothing allocated */ +} + +static int _ctx_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos; + loff_t i, j; + u64 n_packets = 0; + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (v == SEQ_START_TOKEN) { + seq_puts(s, "Ctx:npkts\n"); + return 0; + } + + spos = v; + i = *spos; + + if (!dd->rcd[i]) + return SEQ_SKIP; + + for (j = 0; j < ARRAY_SIZE(dd->rcd[i]->opstats->stats); j++) + n_packets += dd->rcd[i]->opstats->stats[j].n_packets; + + if (!n_packets) + return SEQ_SKIP; + + seq_printf(s, " %llu:%llu\n", i, n_packets); + return 0; +} + +DEBUGFS_FILE(ctx_stats) + +static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_qp_iter *iter; + loff_t n = *pos; + + iter = qib_qp_iter_init(s->private); + if (!iter) + return NULL; + + while (n--) { + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + } + + return iter; +} + +static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, + loff_t *pos) +{ + struct qib_qp_iter *iter = iter_ptr; + + (*pos)++; + + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) +{ + /* nothing for now */ +} + +static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr) +{ + struct qib_qp_iter *iter = iter_ptr; + + if (!iter) + return 0; + + qib_qp_iter_print(s, iter); + + return 0; +} + +DEBUGFS_FILE(qp_stats) + +void qib_dbg_ibdev_init(struct qib_ibdev *ibd) +{ + char name[10]; + + snprintf(name, sizeof(name), "qib%d", dd_from_dev(ibd)->unit); + ibd->qib_ibdev_dbg = debugfs_create_dir(name, qib_dbg_root); + if (!ibd->qib_ibdev_dbg) { + pr_warn("create of %s failed\n", name); + return; + } + DEBUGFS_FILE_CREATE(opcode_stats); + DEBUGFS_FILE_CREATE(ctx_stats); + DEBUGFS_FILE_CREATE(qp_stats); + return; +} + +void qib_dbg_ibdev_exit(struct qib_ibdev *ibd) +{ + if (!qib_dbg_root) + goto out; + debugfs_remove_recursive(ibd->qib_ibdev_dbg); +out: + ibd->qib_ibdev_dbg = NULL; +} + +void qib_dbg_init(void) +{ + qib_dbg_root = debugfs_create_dir(QIB_DRV_NAME, NULL); + if (!qib_dbg_root) + pr_warn("init of debugfs failed\n"); +} + +void qib_dbg_exit(void) +{ + debugfs_remove_recursive(qib_dbg_root); + qib_dbg_root = NULL; +} + +#endif + diff --git a/drivers/infiniband/hw/qib/qib_debugfs.h b/drivers/infiniband/hw/qib/qib_debugfs.h new file mode 100644 index 0000000..7ae983a --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_debugfs.h @@ -0,0 +1,45 @@ +#ifndef _QIB_DEBUGFS_H +#define _QIB_DEBUGFS_H + +#ifdef CONFIG_DEBUG_FS +/* + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +struct qib_ibdev; +void qib_dbg_ibdev_init(struct qib_ibdev *ibd); +void qib_dbg_ibdev_exit(struct qib_ibdev *ibd); +void qib_dbg_init(void); +void qib_dbg_exit(void); + +#endif + +#endif /* _QIB_DEBUGFS_H */ diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 2160924..5bee08f 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -558,7 +558,6 @@ move_along: } rcd->head = l; - rcd->pkt_count += i; /* * Iterate over all QPs waiting to respond. diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 9dd0bc8..b51a514 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * @@ -1155,6 +1155,49 @@ static unsigned int qib_poll(struct file *fp, struct poll_table_struct *pt) return pollflag; } +static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd) +{ + struct qib_filedata *fd = fp->private_data; + const unsigned int weight = cpumask_weight(¤t->cpus_allowed); + const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus); + int local_cpu; + + /* + * If process has NOT already set it's affinity, select and + * reserve a processor for it on the local NUMA node. + */ + if ((weight >= qib_cpulist_count) && + (cpumask_weight(local_mask) <= qib_cpulist_count)) { + for_each_cpu(local_cpu, local_mask) + if (!test_and_set_bit(local_cpu, qib_cpulist)) { + fd->rec_cpu_num = local_cpu; + return; + } + } + + /* + * If process has NOT already set it's affinity, select and + * reserve a processor for it, as a rendevous for all + * users of the driver. If they don't actually later + * set affinity to this cpu, or set it to some other cpu, + * it just means that sooner or later we don't recommend + * a cpu, and let the scheduler do it's best. + */ + if (weight >= qib_cpulist_count) { + int cpu; + cpu = find_first_zero_bit(qib_cpulist, + qib_cpulist_count); + if (cpu == qib_cpulist_count) + qib_dev_err(dd, + "no cpus avail for affinity PID %u\n", + current->pid); + else { + __set_bit(cpu, qib_cpulist); + fd->rec_cpu_num = cpu; + } + } +} + /* * Check that userland and driver are compatible for subcontexts. */ @@ -1259,12 +1302,20 @@ bail: static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, struct file *fp, const struct qib_user_info *uinfo) { + struct qib_filedata *fd = fp->private_data; struct qib_devdata *dd = ppd->dd; struct qib_ctxtdata *rcd; void *ptmp = NULL; int ret; + int numa_id; + + assign_ctxt_affinity(fp, dd); - rcd = qib_create_ctxtdata(ppd, ctxt); + numa_id = qib_numa_aware ? ((fd->rec_cpu_num != -1) ? + cpu_to_node(fd->rec_cpu_num) : + numa_node_id()) : dd->assigned_node_id; + + rcd = qib_create_ctxtdata(ppd, ctxt, numa_id); /* * Allocate memory for use in qib_tid_update() at open to @@ -1296,6 +1347,9 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, goto bail; bailerr: + if (fd->rec_cpu_num != -1) + __clear_bit(fd->rec_cpu_num, qib_cpulist); + dd->rcd[ctxt] = NULL; kfree(rcd); kfree(ptmp); @@ -1485,6 +1539,57 @@ static int qib_open(struct inode *in, struct file *fp) return fp->private_data ? 0 : -ENOMEM; } +static int find_hca(unsigned int cpu, int *unit) +{ + int ret = 0, devmax, npresent, nup, ndev; + + *unit = -1; + + devmax = qib_count_units(&npresent, &nup); + if (!npresent) { + ret = -ENXIO; + goto done; + } + if (!nup) { + ret = -ENETDOWN; + goto done; + } + for (ndev = 0; ndev < devmax; ndev++) { + struct qib_devdata *dd = qib_lookup(ndev); + if (dd) { + if (pcibus_to_node(dd->pcidev->bus) < 0) { + ret = -EINVAL; + goto done; + } + if (cpu_to_node(cpu) == + pcibus_to_node(dd->pcidev->bus)) { + *unit = ndev; + goto done; + } + } + } +done: + return ret; +} + +static int do_qib_user_sdma_queue_create(struct file *fp) +{ + struct qib_filedata *fd = fp->private_data; + struct qib_ctxtdata *rcd = fd->rcd; + struct qib_devdata *dd = rcd->dd; + + if (dd->flags & QIB_HAS_SEND_DMA) + + fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, + dd->unit, + rcd->ctxt, + fd->subctxt); + if (!fd->pq) + return -ENOMEM; + + return 0; +} + /* * Get ctxt early, so can set affinity prior to memory allocation. */ @@ -1517,61 +1622,36 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) if (qib_compatible_subctxts(swmajor, swminor) && uinfo->spu_subctxt_cnt) { ret = find_shared_ctxt(fp, uinfo); - if (ret) { - if (ret > 0) - ret = 0; - goto done_chk_sdma; + if (ret > 0) { + ret = do_qib_user_sdma_queue_create(fp); + if (!ret) + assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd); + goto done_ok; } } i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE; if (i_minor) ret = find_free_ctxt(i_minor - 1, fp, uinfo); - else + else { + int unit; + const unsigned int cpu = cpumask_first(¤t->cpus_allowed); + const unsigned int weight = + cpumask_weight(¤t->cpus_allowed); + + if (weight == 1 && !test_bit(cpu, qib_cpulist)) + if (!find_hca(cpu, &unit) && unit >= 0) + if (!find_free_ctxt(unit, fp, uinfo)) { + ret = 0; + goto done_chk_sdma; + } ret = get_a_ctxt(fp, uinfo, alg); - -done_chk_sdma: - if (!ret) { - struct qib_filedata *fd = fp->private_data; - const struct qib_ctxtdata *rcd = fd->rcd; - const struct qib_devdata *dd = rcd->dd; - unsigned int weight; - - if (dd->flags & QIB_HAS_SEND_DMA) { - fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, - dd->unit, - rcd->ctxt, - fd->subctxt); - if (!fd->pq) - ret = -ENOMEM; - } - - /* - * If process has NOT already set it's affinity, select and - * reserve a processor for it, as a rendezvous for all - * users of the driver. If they don't actually later - * set affinity to this cpu, or set it to some other cpu, - * it just means that sooner or later we don't recommend - * a cpu, and let the scheduler do it's best. - */ - weight = cpumask_weight(tsk_cpus_allowed(current)); - if (!ret && weight >= qib_cpulist_count) { - int cpu; - cpu = find_first_zero_bit(qib_cpulist, - qib_cpulist_count); - if (cpu != qib_cpulist_count) { - __set_bit(cpu, qib_cpulist); - fd->rec_cpu_num = cpu; - } - } else if (weight == 1 && - test_bit(cpumask_first(tsk_cpus_allowed(current)), - qib_cpulist)) - qib_devinfo(dd->pcidev, - "%s PID %u affinity set to cpu %d; already allocated\n", - current->comm, current->pid, - cpumask_first(tsk_cpus_allowed(current))); } +done_chk_sdma: + if (!ret) + ret = do_qib_user_sdma_queue_create(fp); +done_ok: mutex_unlock(&qib_mutex); done: diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 0232ae5..84e593d 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -3464,6 +3464,13 @@ static int qib_6120_tempsense_rd(struct qib_devdata *dd, int regnum) return -ENXIO; } +#ifdef CONFIG_INFINIBAND_QIB_DCA +static int qib_6120_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + return 0; +} +#endif + /* Dummy function, as 6120 boards never disable EEPROM Write */ static int qib_6120_eeprom_wen(struct qib_devdata *dd, int wen) { @@ -3539,6 +3546,9 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev, dd->f_xgxs_reset = qib_6120_xgxs_reset; dd->f_writescratch = writescratch; dd->f_tempsense_rd = qib_6120_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_6120_notify_dca; +#endif /* * Do remaining pcie setup and save pcie values in dd. * Any error printing is already done by the init code. diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 64d0ecb..454c2e7 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -4513,6 +4513,13 @@ bail: return ret; } +#ifdef CONFIG_INFINIBAND_QIB_DCA +static int qib_7220_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + return 0; +} +#endif + /* Dummy function, as 7220 boards never disable EEPROM Write */ static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen) { @@ -4587,6 +4594,9 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev, dd->f_xgxs_reset = qib_7220_xgxs_reset; dd->f_writescratch = writescratch; dd->f_tempsense_rd = qib_7220_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_7220_notify_dca; +#endif /* * Do remaining pcie setup and save pcie values in dd. * Any error printing is already done by the init code. diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 3f6b21e..21e8b09 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -44,6 +44,9 @@ #include <linux/module.h> #include <rdma/ib_verbs.h> #include <rdma/ib_smi.h> +#ifdef CONFIG_INFINIBAND_QIB_DCA +#include <linux/dca.h> +#endif #include "qib.h" #include "qib_7322_regs.h" @@ -80,6 +83,7 @@ static void ibsd_wr_allchans(struct qib_pportdata *, int, unsigned, unsigned); static void serdes_7322_los_enable(struct qib_pportdata *, int); static int serdes_7322_init_old(struct qib_pportdata *); static int serdes_7322_init_new(struct qib_pportdata *); +static void dump_sdma_7322_state(struct qib_pportdata *); #define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb)) @@ -519,6 +523,14 @@ static const u8 qib_7322_physportstate[0x20] = { [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN }; +#ifdef CONFIG_INFINIBAND_QIB_DCA +struct qib_irq_notify { + int rcv; + void *arg; + struct irq_affinity_notify notify; +}; +#endif + struct qib_chip_specific { u64 __iomem *cregbase; u64 *cntrs; @@ -546,6 +558,12 @@ struct qib_chip_specific { u32 lastbuf_for_pio; u32 stay_in_freeze; u32 recovery_ports_initted; +#ifdef CONFIG_INFINIBAND_QIB_DCA + u32 dca_ctrl; + int rhdr_cpu[18]; + int sdma_cpu[2]; + u64 dca_rcvhdr_ctrl[5]; /* B, C, D, E, F */ +#endif struct qib_msix_entry *msix_entries; unsigned long *sendchkenable; unsigned long *sendgrhchk; @@ -573,7 +591,7 @@ struct vendor_txdds_ent { static void write_tx_serdes_param(struct qib_pportdata *, struct txdds_ent *); #define TXDDS_TABLE_SZ 16 /* number of entries per speed in onchip table */ -#define TXDDS_EXTRA_SZ 13 /* number of extra tx settings entries */ +#define TXDDS_EXTRA_SZ 18 /* number of extra tx settings entries */ #define TXDDS_MFG_SZ 2 /* number of mfg tx settings entries */ #define SERDES_CHANS 4 /* yes, it's obvious, but one less magic number */ @@ -635,6 +653,7 @@ struct qib_chippport_specific { u8 ibmalfusesnap; struct qib_qsfp_data qsfp_data; char epmsgbuf[192]; /* for port error interrupt msg buffer */ + char sdmamsgbuf[192]; /* for per-port sdma error messages */ }; static struct { @@ -642,28 +661,76 @@ static struct { irq_handler_t handler; int lsb; int port; /* 0 if not port-specific, else port # */ + int dca; } irq_table[] = { - { "", qib_7322intr, -1, 0 }, + { "", qib_7322intr, -1, 0, 0 }, { " (buf avail)", qib_7322bufavail, - SYM_LSB(IntStatus, SendBufAvail), 0 }, + SYM_LSB(IntStatus, SendBufAvail), 0, 0}, { " (sdma 0)", sdma_intr, - SYM_LSB(IntStatus, SDmaInt_0), 1 }, + SYM_LSB(IntStatus, SDmaInt_0), 1, 1 }, { " (sdma 1)", sdma_intr, - SYM_LSB(IntStatus, SDmaInt_1), 2 }, + SYM_LSB(IntStatus, SDmaInt_1), 2, 1 }, { " (sdmaI 0)", sdma_idle_intr, - SYM_LSB(IntStatus, SDmaIdleInt_0), 1 }, + SYM_LSB(IntStatus, SDmaIdleInt_0), 1, 1}, { " (sdmaI 1)", sdma_idle_intr, - SYM_LSB(IntStatus, SDmaIdleInt_1), 2 }, + SYM_LSB(IntStatus, SDmaIdleInt_1), 2, 1}, { " (sdmaP 0)", sdma_progress_intr, - SYM_LSB(IntStatus, SDmaProgressInt_0), 1 }, + SYM_LSB(IntStatus, SDmaProgressInt_0), 1, 1 }, { " (sdmaP 1)", sdma_progress_intr, - SYM_LSB(IntStatus, SDmaProgressInt_1), 2 }, + SYM_LSB(IntStatus, SDmaProgressInt_1), 2, 1 }, { " (sdmaC 0)", sdma_cleanup_intr, - SYM_LSB(IntStatus, SDmaCleanupDone_0), 1 }, + SYM_LSB(IntStatus, SDmaCleanupDone_0), 1, 0 }, { " (sdmaC 1)", sdma_cleanup_intr, - SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 }, + SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 , 0}, }; +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static const struct dca_reg_map { + int shadow_inx; + int lsb; + u64 mask; + u16 regno; +} dca_rcvhdr_reg_map[] = { + { 0, SYM_LSB(DCACtrlB, RcvHdrq0DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq0DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq1DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq1DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq2DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq2DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq3DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq3DCAOPH) , KREG_IDX(DCACtrlB) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq4DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq4DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq5DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq5DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq6DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq6DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq7DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq7DCAOPH) , KREG_IDX(DCACtrlC) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq8DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq8DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq9DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq9DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq10DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq10DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq11DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq11DCAOPH) , KREG_IDX(DCACtrlD) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq12DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq12DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq13DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq13DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq14DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq14DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq15DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq15DCAOPH) , KREG_IDX(DCACtrlE) }, + { 4, SYM_LSB(DCACtrlF, RcvHdrq16DCAOPH), + ~SYM_MASK(DCACtrlF, RcvHdrq16DCAOPH) , KREG_IDX(DCACtrlF) }, + { 4, SYM_LSB(DCACtrlF, RcvHdrq17DCAOPH), + ~SYM_MASK(DCACtrlF, RcvHdrq17DCAOPH) , KREG_IDX(DCACtrlF) }, +}; +#endif + /* ibcctrl bits */ #define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1 /* cycle through TS1/TS2 till OK */ @@ -686,6 +753,13 @@ static void write_7322_init_portregs(struct qib_pportdata *); static void setup_7322_link_recovery(struct qib_pportdata *, u32); static void check_7322_rxe_status(struct qib_pportdata *); static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *, u64, u32 *); +#ifdef CONFIG_INFINIBAND_QIB_DCA +static void qib_setup_dca(struct qib_devdata *dd); +static void setup_dca_notifier(struct qib_devdata *dd, + struct qib_msix_entry *m); +static void reset_dca_notifier(struct qib_devdata *dd, + struct qib_msix_entry *m); +#endif /** * qib_read_ureg32 - read 32-bit virtualized per-context register @@ -1529,6 +1603,15 @@ static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs) spin_lock_irqsave(&ppd->sdma_lock, flags); + if (errs != QIB_E_P_SDMAHALT) { + /* SDMA errors have QIB_E_P_SDMAHALT and another bit set */ + qib_dev_porterr(dd, ppd->port, + "SDMA %s 0x%016llx %s\n", + qib_sdma_state_names[ppd->sdma_state.current_state], + errs, ppd->cpspec->sdmamsgbuf); + dump_sdma_7322_state(ppd); + } + switch (ppd->sdma_state.current_state) { case qib_sdma_state_s00_hw_down: break; @@ -2084,6 +2167,29 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, qib_dev_err(dd, "%s hardware error\n", msg); + if (hwerrs & + (SYM_MASK(HwErrMask, SDmaMemReadErrMask_0) | + SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) { + int pidx = 0; + int err; + unsigned long flags; + struct qib_pportdata *ppd = dd->pport; + for (; pidx < dd->num_pports; ++pidx, ppd++) { + err = 0; + if (pidx == 0 && (hwerrs & + SYM_MASK(HwErrMask, SDmaMemReadErrMask_0))) + err++; + if (pidx == 1 && (hwerrs & + SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) + err++; + if (err) { + spin_lock_irqsave(&ppd->sdma_lock, flags); + dump_sdma_7322_state(ppd); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } + } + } + if (isfatal && !dd->diag_client) { qib_dev_err(dd, "Fatal Hardware Error, no longer usable, SN %.16s\n", @@ -2558,6 +2664,162 @@ static void qib_setup_7322_setextled(struct qib_pportdata *ppd, u32 on) qib_write_kreg_port(ppd, krp_rcvpktledcnt, ledblink); } +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static int qib_7322_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + switch (event) { + case DCA_PROVIDER_ADD: + if (dd->flags & QIB_DCA_ENABLED) + break; + if (!dca_add_requester(&dd->pcidev->dev)) { + qib_devinfo(dd->pcidev, "DCA enabled\n"); + dd->flags |= QIB_DCA_ENABLED; + qib_setup_dca(dd); + } + break; + case DCA_PROVIDER_REMOVE: + if (dd->flags & QIB_DCA_ENABLED) { + dca_remove_requester(&dd->pcidev->dev); + dd->flags &= ~QIB_DCA_ENABLED; + dd->cspec->dca_ctrl = 0; + qib_write_kreg(dd, KREG_IDX(DCACtrlA), + dd->cspec->dca_ctrl); + } + break; + } + return 0; +} + +static void qib_update_rhdrq_dca(struct qib_ctxtdata *rcd, int cpu) +{ + struct qib_devdata *dd = rcd->dd; + struct qib_chip_specific *cspec = dd->cspec; + + if (!(dd->flags & QIB_DCA_ENABLED)) + return; + if (cspec->rhdr_cpu[rcd->ctxt] != cpu) { + const struct dca_reg_map *rmp; + + cspec->rhdr_cpu[rcd->ctxt] = cpu; + rmp = &dca_rcvhdr_reg_map[rcd->ctxt]; + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] &= rmp->mask; + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] |= + (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << rmp->lsb; + qib_devinfo(dd->pcidev, + "Ctxt %d cpu %d dca %llx\n", rcd->ctxt, cpu, + (long long) cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]); + qib_write_kreg(dd, rmp->regno, + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]); + cspec->dca_ctrl |= SYM_MASK(DCACtrlA, RcvHdrqDCAEnable); + qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl); + } +} + +static void qib_update_sdma_dca(struct qib_pportdata *ppd, int cpu) +{ + struct qib_devdata *dd = ppd->dd; + struct qib_chip_specific *cspec = dd->cspec; + unsigned pidx = ppd->port - 1; + + if (!(dd->flags & QIB_DCA_ENABLED)) + return; + if (cspec->sdma_cpu[pidx] != cpu) { + cspec->sdma_cpu[pidx] = cpu; + cspec->dca_rcvhdr_ctrl[4] &= ~(ppd->hw_pidx ? + SYM_MASK(DCACtrlF, SendDma1DCAOPH) : + SYM_MASK(DCACtrlF, SendDma0DCAOPH)); + cspec->dca_rcvhdr_ctrl[4] |= + (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << + (ppd->hw_pidx ? + SYM_LSB(DCACtrlF, SendDma1DCAOPH) : + SYM_LSB(DCACtrlF, SendDma0DCAOPH)); + qib_devinfo(dd->pcidev, + "sdma %d cpu %d dca %llx\n", ppd->hw_pidx, cpu, + (long long) cspec->dca_rcvhdr_ctrl[4]); + qib_write_kreg(dd, KREG_IDX(DCACtrlF), + cspec->dca_rcvhdr_ctrl[4]); + cspec->dca_ctrl |= ppd->hw_pidx ? + SYM_MASK(DCACtrlA, SendDMAHead1DCAEnable) : + SYM_MASK(DCACtrlA, SendDMAHead0DCAEnable); + qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl); + } +} + +static void qib_setup_dca(struct qib_devdata *dd) +{ + struct qib_chip_specific *cspec = dd->cspec; + int i; + + for (i = 0; i < ARRAY_SIZE(cspec->rhdr_cpu); i++) + cspec->rhdr_cpu[i] = -1; + for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++) + cspec->sdma_cpu[i] = -1; + cspec->dca_rcvhdr_ctrl[0] = + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq0DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq1DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq2DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq3DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[1] = + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq4DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq5DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq6DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq7DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[2] = + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq8DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq9DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq10DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq11DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[3] = + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq12DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq13DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq14DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq15DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[4] = + (1ULL << SYM_LSB(DCACtrlF, RcvHdrq16DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlF, RcvHdrq17DCAXfrCnt)); + for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++) + qib_write_kreg(dd, KREG_IDX(DCACtrlB) + i, + cspec->dca_rcvhdr_ctrl[i]); + for (i = 0; i < cspec->num_msix_entries; i++) + setup_dca_notifier(dd, &cspec->msix_entries[i]); +} + +static void qib_irq_notifier_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct qib_irq_notify *n = + container_of(notify, struct qib_irq_notify, notify); + int cpu = cpumask_first(mask); + + if (n->rcv) { + struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg; + qib_update_rhdrq_dca(rcd, cpu); + } else { + struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg; + qib_update_sdma_dca(ppd, cpu); + } +} + +static void qib_irq_notifier_release(struct kref *ref) +{ + struct qib_irq_notify *n = + container_of(ref, struct qib_irq_notify, notify.kref); + struct qib_devdata *dd; + + if (n->rcv) { + struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg; + dd = rcd->dd; + } else { + struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg; + dd = ppd->dd; + } + qib_devinfo(dd->pcidev, + "release on HCA notify 0x%p n 0x%p\n", ref, n); + kfree(n); +} +#endif + /* * Disable MSIx interrupt if enabled, call generic MSIx code * to cleanup, and clear pending MSIx interrupts. @@ -2575,6 +2837,9 @@ static void qib_7322_nomsix(struct qib_devdata *dd) dd->cspec->num_msix_entries = 0; for (i = 0; i < n; i++) { +#ifdef CONFIG_INFINIBAND_QIB_DCA + reset_dca_notifier(dd, &dd->cspec->msix_entries[i]); +#endif irq_set_affinity_hint( dd->cspec->msix_entries[i].msix.vector, NULL); free_cpumask_var(dd->cspec->msix_entries[i].mask); @@ -2602,6 +2867,15 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd) { int i; +#ifdef CONFIG_INFINIBAND_QIB_DCA + if (dd->flags & QIB_DCA_ENABLED) { + dca_remove_requester(&dd->pcidev->dev); + dd->flags &= ~QIB_DCA_ENABLED; + dd->cspec->dca_ctrl = 0; + qib_write_kreg(dd, KREG_IDX(DCACtrlA), dd->cspec->dca_ctrl); + } +#endif + qib_7322_free_irq(dd); kfree(dd->cspec->cntrs); kfree(dd->cspec->sendchkenable); @@ -3068,6 +3342,53 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data) return IRQ_HANDLED; } +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) +{ + if (!m->dca) + return; + qib_devinfo(dd->pcidev, + "Disabling notifier on HCA %d irq %d\n", + dd->unit, + m->msix.vector); + irq_set_affinity_notifier( + m->msix.vector, + NULL); + m->notifier = NULL; +} + +static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) +{ + struct qib_irq_notify *n; + + if (!m->dca) + return; + n = kzalloc(sizeof(*n), GFP_KERNEL); + if (n) { + int ret; + + m->notifier = n; + n->notify.irq = m->msix.vector; + n->notify.notify = qib_irq_notifier_notify; + n->notify.release = qib_irq_notifier_release; + n->arg = m->arg; + n->rcv = m->rcv; + qib_devinfo(dd->pcidev, + "set notifier irq %d rcv %d notify %p\n", + n->notify.irq, n->rcv, &n->notify); + ret = irq_set_affinity_notifier( + n->notify.irq, + &n->notify); + if (ret) { + m->notifier = NULL; + kfree(n); + } + } +} + +#endif + /* * Set up our chip-specific interrupt handler. * The interrupt type has already been setup, so @@ -3149,6 +3470,9 @@ try_intx: void *arg; u64 val; int lsb, reg, sh; +#ifdef CONFIG_INFINIBAND_QIB_DCA + int dca = 0; +#endif dd->cspec->msix_entries[msixnum]. name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1] @@ -3161,6 +3485,9 @@ try_intx: arg = dd->pport + irq_table[i].port - 1; } else arg = dd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca = irq_table[i].dca; +#endif lsb = irq_table[i].lsb; handler = irq_table[i].handler; snprintf(dd->cspec->msix_entries[msixnum].name, @@ -3178,6 +3505,9 @@ try_intx: continue; if (qib_krcvq01_no_msi && ctxt < 2) continue; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca = 1; +#endif lsb = QIB_I_RCVAVAIL_LSB + ctxt; handler = qib_7322pintr; snprintf(dd->cspec->msix_entries[msixnum].name, @@ -3203,6 +3533,11 @@ try_intx: goto try_intx; } dd->cspec->msix_entries[msixnum].arg = arg; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->cspec->msix_entries[msixnum].dca = dca; + dd->cspec->msix_entries[msixnum].rcv = + handler == qib_7322pintr; +#endif if (lsb >= 0) { reg = lsb / IBA7322_REDIRECT_VEC_PER_REG; sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) * @@ -6452,6 +6787,86 @@ static void qib_sdma_set_7322_desc_cnt(struct qib_pportdata *ppd, unsigned cnt) qib_write_kreg_port(ppd, krp_senddmadesccnt, cnt); } +/* + * sdma_lock should be acquired before calling this routine + */ +static void dump_sdma_7322_state(struct qib_pportdata *ppd) +{ + u64 reg, reg1, reg2; + + reg = qib_read_kreg_port(ppd, krp_senddmastatus); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmastatus: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_sendctrl); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA sendctrl: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmabase); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmabase: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmabufmask0); + reg1 = qib_read_kreg_port(ppd, krp_senddmabufmask1); + reg2 = qib_read_kreg_port(ppd, krp_senddmabufmask2); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmabufmask 0:%llx 1:%llx 2:%llx\n", + reg, reg1, reg2); + + /* get bufuse bits, clear them, and print them again if non-zero */ + reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0); + qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg); + reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1); + qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg1); + reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2); + qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg2); + /* 0 and 1 should always be zero, so print as short form */ + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA current senddmabuf_use 0:%llx 1:%llx 2:%llx\n", + reg, reg1, reg2); + reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0); + reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1); + reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2); + /* 0 and 1 should always be zero, so print as short form */ + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA cleared senddmabuf_use 0:%llx 1:%llx 2:%llx\n", + reg, reg1, reg2); + + reg = qib_read_kreg_port(ppd, krp_senddmatail); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmatail: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmahead); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmahead: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmaheadaddr); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmaheadaddr: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmalengen); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmalengen: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmadesccnt); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmadesccnt: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmaidlecnt); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmaidlecnt: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmaprioritythld); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmapriorityhld: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmareloadcnt); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmareloadcnt: 0x%016llx\n", reg); + + dump_sdma_state(ppd); +} + static struct sdma_set_state_action sdma_7322_action_table[] = { [qib_sdma_state_s00_hw_down] = { .go_s99_running_tofalse = 1, @@ -6885,6 +7300,9 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, dd->f_sdma_init_early = qib_7322_sdma_init_early; dd->f_writescratch = writescratch; dd->f_tempsense_rd = qib_7322_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_7322_notify_dca; +#endif /* * Do remaining PCIe setup and save PCIe values in dd. * Any error printing is already done by the init code. @@ -6921,7 +7339,7 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, actual_cnt -= dd->num_pports; tabsize = actual_cnt; - dd->cspec->msix_entries = kmalloc(tabsize * + dd->cspec->msix_entries = kzalloc(tabsize * sizeof(struct qib_msix_entry), GFP_KERNEL); if (!dd->cspec->msix_entries) { qib_dev_err(dd, "No memory for MSIx table\n"); @@ -6941,7 +7359,13 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, /* clear diagctrl register, in case diags were running and crashed */ qib_write_kreg(dd, kr_hwdiagctrl, 0); - +#ifdef CONFIG_INFINIBAND_QIB_DCA + if (!dca_add_requester(&pdev->dev)) { + qib_devinfo(dd->pcidev, "DCA enabled\n"); + dd->flags |= QIB_DCA_ENABLED; + qib_setup_dca(dd); + } +#endif goto bail; bail_cleanup: @@ -7156,15 +7580,20 @@ static const struct txdds_ent txdds_extra_sdr[TXDDS_EXTRA_SZ] = { { 0, 0, 0, 1 }, /* QMH7342 backplane settings */ { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ - { 0, 0, 0, 11 }, /* QME7342 backplane settings */ { 0, 0, 0, 3 }, /* QMH7342 backplane settings */ { 0, 0, 0, 4 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ }; static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = { @@ -7173,15 +7602,20 @@ static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = { { 0, 0, 0, 7 }, /* QMH7342 backplane settings */ { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ - { 0, 0, 0, 13 }, /* QME7342 backplane settings */ { 0, 0, 0, 9 }, /* QMH7342 backplane settings */ { 0, 0, 0, 10 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ }; static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = { @@ -7190,15 +7624,20 @@ static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = { { 0, 1, 0, 5 }, /* QMH7342 backplane settings */ { 0, 1, 0, 6 }, /* QMH7342 backplane settings */ { 0, 1, 0, 8 }, /* QMH7342 backplane settings */ - { 0, 1, 12, 10 }, /* QME7342 backplane setting */ - { 0, 1, 12, 11 }, /* QME7342 backplane setting */ - { 0, 1, 12, 12 }, /* QME7342 backplane setting */ - { 0, 1, 12, 14 }, /* QME7342 backplane setting */ - { 0, 1, 12, 6 }, /* QME7342 backplane setting */ - { 0, 1, 12, 7 }, /* QME7342 backplane setting */ - { 0, 1, 12, 8 }, /* QME7342 backplane setting */ { 0, 1, 0, 10 }, /* QMH7342 backplane settings */ { 0, 1, 0, 12 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ }; static const struct txdds_ent txdds_extra_mfg[TXDDS_MFG_SZ] = { diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 173f805..36e048e 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -39,10 +39,17 @@ #include <linux/idr.h> #include <linux/module.h> #include <linux/printk.h> +#ifdef CONFIG_INFINIBAND_QIB_DCA +#include <linux/dca.h> +#endif #include "qib.h" #include "qib_common.h" #include "qib_mad.h" +#ifdef CONFIG_DEBUG_FS +#include "qib_debugfs.h" +#include "qib_verbs.h" +#endif #undef pr_fmt #define pr_fmt(fmt) QIB_DRV_NAME ": " fmt @@ -64,6 +71,11 @@ ushort qib_cfgctxts; module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); +unsigned qib_numa_aware; +module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO); +MODULE_PARM_DESC(numa_aware, + "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process"); + /* * If set, do not write to any regs if avoidable, hack to allow * check for deranged default register values. @@ -89,8 +101,6 @@ unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); -struct workqueue_struct *qib_cq_wq; - static void verify_interrupt(unsigned long); static struct idr qib_unit_table; @@ -121,6 +131,11 @@ int qib_create_ctxts(struct qib_devdata *dd) { unsigned i; int ret; + int local_node_id = pcibus_to_node(dd->pcidev->bus); + + if (local_node_id < 0) + local_node_id = numa_node_id(); + dd->assigned_node_id = local_node_id; /* * Allocate full ctxtcnt array, rather than just cfgctxts, because @@ -143,7 +158,8 @@ int qib_create_ctxts(struct qib_devdata *dd) continue; ppd = dd->pport + (i % dd->num_pports); - rcd = qib_create_ctxtdata(ppd, i); + + rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id); if (!rcd) { qib_dev_err(dd, "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); @@ -161,20 +177,33 @@ done: /* * Common code for user and kernel context setup. */ -struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) +struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt, + int node_id) { struct qib_devdata *dd = ppd->dd; struct qib_ctxtdata *rcd; - rcd = kzalloc(sizeof(*rcd), GFP_KERNEL); + rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, node_id); if (rcd) { INIT_LIST_HEAD(&rcd->qp_wait_list); + rcd->node_id = node_id; rcd->ppd = ppd; rcd->dd = dd; rcd->cnt = 1; rcd->ctxt = ctxt; dd->rcd[ctxt] = rcd; - +#ifdef CONFIG_DEBUG_FS + if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */ + rcd->opstats = kzalloc_node(sizeof(*rcd->opstats), + GFP_KERNEL, node_id); + if (!rcd->opstats) { + kfree(rcd); + qib_dev_err(dd, + "Unable to allocate per ctxt stats buffer\n"); + return NULL; + } + } +#endif dd->f_init_ctxt(rcd); /* @@ -429,6 +458,7 @@ static int loadtime_init(struct qib_devdata *dd) dd->intrchk_timer.function = verify_interrupt; dd->intrchk_timer.data = (unsigned long) dd; + ret = qib_cq_init(dd); done: return ret; } @@ -944,6 +974,10 @@ void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd) vfree(rcd->subctxt_uregbase); vfree(rcd->subctxt_rcvegrbuf); vfree(rcd->subctxt_rcvhdr_base); +#ifdef CONFIG_DEBUG_FS + kfree(rcd->opstats); + rcd->opstats = NULL; +#endif kfree(rcd); } @@ -1033,7 +1067,6 @@ done: dd->f_set_armlaunch(dd, 1); } - void qib_free_devdata(struct qib_devdata *dd) { unsigned long flags; @@ -1043,6 +1076,9 @@ void qib_free_devdata(struct qib_devdata *dd) list_del(&dd->list); spin_unlock_irqrestore(&qib_devs_lock, flags); +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_exit(&dd->verbs_dev); +#endif ib_dealloc_device(&dd->verbs_dev.ibdev); } @@ -1066,6 +1102,10 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) goto bail; } +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_init(&dd->verbs_dev); +#endif + idr_preload(GFP_KERNEL); spin_lock_irqsave(&qib_devs_lock, flags); @@ -1081,6 +1121,9 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) if (ret < 0) { qib_early_err(&pdev->dev, "Could not allocate unit ID: error %d\n", -ret); +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_exit(&dd->verbs_dev); +#endif ib_dealloc_device(&dd->verbs_dev.ibdev); dd = ERR_PTR(ret); goto bail; @@ -1158,6 +1201,35 @@ struct pci_driver qib_driver = { .err_handler = &qib_pci_err_handler, }; +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static int qib_notify_dca(struct notifier_block *, unsigned long, void *); +static struct notifier_block dca_notifier = { + .notifier_call = qib_notify_dca, + .next = NULL, + .priority = 0 +}; + +static int qib_notify_dca_device(struct device *device, void *data) +{ + struct qib_devdata *dd = dev_get_drvdata(device); + unsigned long event = *(unsigned long *)data; + + return dd->f_notify_dca(dd, event); +} + +static int qib_notify_dca(struct notifier_block *nb, unsigned long event, + void *p) +{ + int rval; + + rval = driver_for_each_device(&qib_driver.driver, NULL, + &event, qib_notify_dca_device); + return rval ? NOTIFY_BAD : NOTIFY_DONE; +} + +#endif + /* * Do all the generic driver unit- and chip-independent memory * allocation and initialization. @@ -1170,22 +1242,22 @@ static int __init qlogic_ib_init(void) if (ret) goto bail; - qib_cq_wq = create_singlethread_workqueue("qib_cq"); - if (!qib_cq_wq) { - ret = -ENOMEM; - goto bail_dev; - } - /* * These must be called before the driver is registered with * the PCI subsystem. */ idr_init(&qib_unit_table); +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_register_notify(&dca_notifier); +#endif +#ifdef CONFIG_DEBUG_FS + qib_dbg_init(); +#endif ret = pci_register_driver(&qib_driver); if (ret < 0) { pr_err("Unable to register driver: error %d\n", -ret); - goto bail_unit; + goto bail_dev; } /* not fatal if it doesn't work */ @@ -1193,10 +1265,14 @@ static int __init qlogic_ib_init(void) pr_err("Unable to register ipathfs\n"); goto bail; /* all OK */ -bail_unit: - idr_destroy(&qib_unit_table); - destroy_workqueue(qib_cq_wq); bail_dev: +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_unregister_notify(&dca_notifier); +#endif +#ifdef CONFIG_DEBUG_FS + qib_dbg_exit(); +#endif + idr_destroy(&qib_unit_table); qib_dev_cleanup(); bail: return ret; @@ -1217,9 +1293,13 @@ static void __exit qlogic_ib_cleanup(void) "Unable to cleanup counter filesystem: error %d\n", -ret); +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_unregister_notify(&dca_notifier); +#endif pci_unregister_driver(&qib_driver); - - destroy_workqueue(qib_cq_wq); +#ifdef CONFIG_DEBUG_FS + qib_dbg_exit(); +#endif qib_cpulist_count = 0; kfree(qib_cpulist); @@ -1270,7 +1350,7 @@ static void cleanup_device_data(struct qib_devdata *dd) if (dd->pageshadow) { struct page **tmpp = dd->pageshadow; dma_addr_t *tmpd = dd->physshadow; - int i, cnt = 0; + int i; for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) { int ctxt_tidbase = ctxt * dd->rcvtidcnt; @@ -1283,13 +1363,13 @@ static void cleanup_device_data(struct qib_devdata *dd) PAGE_SIZE, PCI_DMA_FROMDEVICE); qib_release_user_pages(&tmpp[i], 1); tmpp[i] = NULL; - cnt++; } } - tmpp = dd->pageshadow; dd->pageshadow = NULL; vfree(tmpp); + dd->physshadow = NULL; + vfree(tmpd); } /* @@ -1311,6 +1391,7 @@ static void cleanup_device_data(struct qib_devdata *dd) } kfree(tmp); kfree(dd->boardname); + qib_cq_exit(dd); } /* @@ -1483,6 +1564,7 @@ static void qib_remove_one(struct pci_dev *pdev) int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) { unsigned amt; + int old_node_id; if (!rcd->rcvhdrq) { dma_addr_t phys_hdrqtail; @@ -1492,9 +1574,13 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) sizeof(u32), PAGE_SIZE); gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? GFP_USER : GFP_KERNEL; + + old_node_id = dev_to_node(&dd->pcidev->dev); + set_dev_node(&dd->pcidev->dev, rcd->node_id); rcd->rcvhdrq = dma_alloc_coherent( &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, gfp_flags | __GFP_COMP); + set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvhdrq) { qib_dev_err(dd, @@ -1510,9 +1596,11 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) } if (!(dd->flags & QIB_NODMA_RTAIL)) { + set_dev_node(&dd->pcidev->dev, rcd->node_id); rcd->rcvhdrtail_kvaddr = dma_alloc_coherent( &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, gfp_flags); + set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvhdrtail_kvaddr) goto bail_free; rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; @@ -1556,6 +1644,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; size_t size; gfp_t gfp_flags; + int old_node_id; /* * GFP_USER, but without GFP_FS, so buffer cache can be @@ -1574,25 +1663,29 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) size = rcd->rcvegrbuf_size; if (!rcd->rcvegrbuf) { rcd->rcvegrbuf = - kzalloc(chunk * sizeof(rcd->rcvegrbuf[0]), - GFP_KERNEL); + kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]), + GFP_KERNEL, rcd->node_id); if (!rcd->rcvegrbuf) goto bail; } if (!rcd->rcvegrbuf_phys) { rcd->rcvegrbuf_phys = - kmalloc(chunk * sizeof(rcd->rcvegrbuf_phys[0]), - GFP_KERNEL); + kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]), + GFP_KERNEL, rcd->node_id); if (!rcd->rcvegrbuf_phys) goto bail_rcvegrbuf; } for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { if (rcd->rcvegrbuf[e]) continue; + + old_node_id = dev_to_node(&dd->pcidev->dev); + set_dev_node(&dd->pcidev->dev, rcd->node_id); rcd->rcvegrbuf[e] = dma_alloc_coherent(&dd->pcidev->dev, size, &rcd->rcvegrbuf_phys[e], gfp_flags); + set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvegrbuf[e]) goto bail_rcvegrbuf_phys; } diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index a6a2cc2..3cca55b 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -35,6 +35,9 @@ #include <linux/err.h> #include <linux/vmalloc.h> #include <linux/jhash.h> +#ifdef CONFIG_DEBUG_FS +#include <linux/seq_file.h> +#endif #include "qib.h" @@ -222,8 +225,8 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) unsigned long flags; unsigned n = qpn_hash(dev, qp->ibqp.qp_num); - spin_lock_irqsave(&dev->qpt_lock, flags); atomic_inc(&qp->refcount); + spin_lock_irqsave(&dev->qpt_lock, flags); if (qp->ibqp.qp_num == 0) rcu_assign_pointer(ibp->qp0, qp); @@ -235,7 +238,6 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) } spin_unlock_irqrestore(&dev->qpt_lock, flags); - synchronize_rcu(); } /* @@ -247,36 +249,39 @@ static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); unsigned n = qpn_hash(dev, qp->ibqp.qp_num); unsigned long flags; + int removed = 1; spin_lock_irqsave(&dev->qpt_lock, flags); if (rcu_dereference_protected(ibp->qp0, lockdep_is_held(&dev->qpt_lock)) == qp) { - atomic_dec(&qp->refcount); rcu_assign_pointer(ibp->qp0, NULL); } else if (rcu_dereference_protected(ibp->qp1, lockdep_is_held(&dev->qpt_lock)) == qp) { - atomic_dec(&qp->refcount); rcu_assign_pointer(ibp->qp1, NULL); } else { struct qib_qp *q; struct qib_qp __rcu **qpp; + removed = 0; qpp = &dev->qp_table[n]; for (; (q = rcu_dereference_protected(*qpp, lockdep_is_held(&dev->qpt_lock))) != NULL; qpp = &q->next) if (q == qp) { - atomic_dec(&qp->refcount); rcu_assign_pointer(*qpp, rcu_dereference_protected(qp->next, lockdep_is_held(&dev->qpt_lock))); + removed = 1; break; } } spin_unlock_irqrestore(&dev->qpt_lock, flags); - synchronize_rcu(); + if (removed) { + synchronize_rcu(); + atomic_dec(&qp->refcount); + } } /** @@ -334,26 +339,25 @@ struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) { struct qib_qp *qp = NULL; + rcu_read_lock(); if (unlikely(qpn <= 1)) { - rcu_read_lock(); if (qpn == 0) qp = rcu_dereference(ibp->qp0); else qp = rcu_dereference(ibp->qp1); + if (qp) + atomic_inc(&qp->refcount); } else { struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; unsigned n = qpn_hash(dev, qpn); - rcu_read_lock(); for (qp = rcu_dereference(dev->qp_table[n]); qp; qp = rcu_dereference(qp->next)) - if (qp->ibqp.qp_num == qpn) + if (qp->ibqp.qp_num == qpn) { + atomic_inc(&qp->refcount); break; + } } - if (qp) - if (unlikely(!atomic_inc_not_zero(&qp->refcount))) - qp = NULL; - rcu_read_unlock(); return qp; } @@ -1286,3 +1290,94 @@ void qib_get_credit(struct qib_qp *qp, u32 aeth) } } } + +#ifdef CONFIG_DEBUG_FS + +struct qib_qp_iter { + struct qib_ibdev *dev; + struct qib_qp *qp; + int n; +}; + +struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev) +{ + struct qib_qp_iter *iter; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return NULL; + + iter->dev = dev; + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +int qib_qp_iter_next(struct qib_qp_iter *iter) +{ + struct qib_ibdev *dev = iter->dev; + int n = iter->n; + int ret = 1; + struct qib_qp *pqp = iter->qp; + struct qib_qp *qp; + + rcu_read_lock(); + for (; n < dev->qp_table_size; n++) { + if (pqp) + qp = rcu_dereference(pqp->next); + else + qp = rcu_dereference(dev->qp_table[n]); + pqp = qp; + if (qp) { + if (iter->qp) + atomic_dec(&iter->qp->refcount); + atomic_inc(&qp->refcount); + rcu_read_unlock(); + iter->qp = qp; + iter->n = n; + return 0; + } + } + rcu_read_unlock(); + if (iter->qp) + atomic_dec(&iter->qp->refcount); + return ret; +} + +static const char * const qp_type_str[] = { + "SMI", "GSI", "RC", "UC", "UD", +}; + +void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter) +{ + struct qib_swqe *wqe; + struct qib_qp *qp = iter->qp; + + wqe = get_swqe_ptr(qp, qp->s_last); + seq_printf(s, + "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n", + iter->n, + qp->ibqp.qp_num, + qp_type_str[qp->ibqp.qp_type], + qp->state, + wqe->wr.opcode, + qp->s_hdrwords, + qp->s_flags, + atomic_read(&qp->s_dma_busy), + !list_empty(&qp->iowait), + qp->timeout, + wqe->ssn, + qp->s_lsn, + qp->s_last_psn, + qp->s_psn, qp->s_next_psn, + qp->s_sending_psn, qp->s_sending_hpsn, + qp->s_last, qp->s_acked, qp->s_cur, + qp->s_tail, qp->s_head, qp->s_size, + qp->remote_qpn, + qp->remote_ah_attr.dlid); +} + +#endif diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 3fc5144..32162d3 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -708,6 +708,62 @@ unlock: return ret; } +/* + * sdma_lock should be acquired before calling this routine + */ +void dump_sdma_state(struct qib_pportdata *ppd) +{ + struct qib_sdma_desc *descq; + struct qib_sdma_txreq *txp, *txpnext; + __le64 *descqp; + u64 desc[2]; + dma_addr_t addr; + u16 gen, dwlen, dwoffset; + u16 head, tail, cnt; + + head = ppd->sdma_descq_head; + tail = ppd->sdma_descq_tail; + cnt = qib_sdma_descq_freecnt(ppd); + descq = ppd->sdma_descq; + + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA ppd->sdma_descq_head: %u\n", head); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA ppd->sdma_descq_tail: %u\n", tail); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA sdma_descq_freecnt: %u\n", cnt); + + /* print info for each entry in the descriptor queue */ + while (head != tail) { + char flags[6] = { 'x', 'x', 'x', 'x', 'x', 0 }; + + descqp = &descq[head].qw[0]; + desc[0] = le64_to_cpu(descqp[0]); + desc[1] = le64_to_cpu(descqp[1]); + flags[0] = (desc[0] & 1<<15) ? 'I' : '-'; + flags[1] = (desc[0] & 1<<14) ? 'L' : 'S'; + flags[2] = (desc[0] & 1<<13) ? 'H' : '-'; + flags[3] = (desc[0] & 1<<12) ? 'F' : '-'; + flags[4] = (desc[0] & 1<<11) ? 'L' : '-'; + addr = (desc[1] << 32) | ((desc[0] >> 32) & 0xfffffffcULL); + gen = (desc[0] >> 30) & 3ULL; + dwlen = (desc[0] >> 14) & (0x7ffULL << 2); + dwoffset = (desc[0] & 0x7ffULL) << 2; + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes offset:%u bytes\n", + head, flags, addr, gen, dwlen, dwoffset); + if (++head == ppd->sdma_descq_cnt) + head = 0; + } + + /* print dma descriptor indices from the TX requests */ + list_for_each_entry_safe(txp, txpnext, &ppd->sdma_activelist, + list) + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA txp->start_idx: %u txp->next_descq_idx: %u\n", + txp->start_idx, txp->next_descq_idx); +} + void qib_sdma_process_event(struct qib_pportdata *ppd, enum qib_sdma_events event) { diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 904c384..092b0bb 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -645,9 +645,11 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) } else goto drop; - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - ibp->opstats[opcode & 0x7f].n_bytes += tlen; - ibp->opstats[opcode & 0x7f].n_packets++; + opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f; +#ifdef CONFIG_DEBUG_FS + rcd->opstats->stats[opcode].n_bytes += tlen; + rcd->opstats->stats[opcode].n_packets++; +#endif /* Get the destination QP number. */ qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index aff8b2c..012e2c7 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -41,6 +41,7 @@ #include <linux/interrupt.h> #include <linux/kref.h> #include <linux/workqueue.h> +#include <linux/kthread.h> #include <linux/completion.h> #include <rdma/ib_pack.h> #include <rdma/ib_user_verbs.h> @@ -267,7 +268,8 @@ struct qib_cq_wc { */ struct qib_cq { struct ib_cq ibcq; - struct work_struct comptask; + struct kthread_work comptask; + struct qib_devdata *dd; spinlock_t lock; /* protect changes in this struct */ u8 notify; u8 triggered; @@ -658,6 +660,10 @@ struct qib_opcode_stats { u64 n_bytes; /* total number of bytes */ }; +struct qib_opcode_stats_perctx { + struct qib_opcode_stats stats[128]; +}; + struct qib_ibport { struct qib_qp __rcu *qp0; struct qib_qp __rcu *qp1; @@ -724,7 +730,6 @@ struct qib_ibport { u8 vl_high_limit; u8 sl_to_vl[16]; - struct qib_opcode_stats opstats[128]; }; @@ -768,6 +773,10 @@ struct qib_ibdev { spinlock_t n_srqs_lock; u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ spinlock_t n_mcast_grps_lock; +#ifdef CONFIG_DEBUG_FS + /* per HCA debugfs */ + struct dentry *qib_ibdev_dbg; +#endif }; struct qib_verbs_counters { @@ -832,8 +841,6 @@ static inline int qib_send_ok(struct qib_qp *qp) !(qp->s_flags & QIB_S_ANY_WAIT_SEND)); } -extern struct workqueue_struct *qib_cq_wq; - /* * This must be called with s_lock held. */ @@ -910,6 +917,18 @@ void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt); void qib_free_qpn_table(struct qib_qpn_table *qpt); +#ifdef CONFIG_DEBUG_FS + +struct qib_qp_iter; + +struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev); + +int qib_qp_iter_next(struct qib_qp_iter *iter); + +void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter); + +#endif + void qib_get_credit(struct qib_qp *qp, u32 aeth); unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult); @@ -972,6 +991,10 @@ int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); int qib_destroy_srq(struct ib_srq *ibsrq); +int qib_cq_init(struct qib_devdata *dd); + +void qib_cq_exit(struct qib_devdata *dd); + void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig); int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 7ccf328..f93baf8 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -53,8 +53,8 @@ #define DRV_NAME "ib_srp" #define PFX DRV_NAME ": " -#define DRV_VERSION "0.2" -#define DRV_RELDATE "November 1, 2005" +#define DRV_VERSION "1.0" +#define DRV_RELDATE "July 1, 2013" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator " @@ -231,14 +231,16 @@ static int srp_create_target_ib(struct srp_target_port *target) return -ENOMEM; recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, - srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0); + srp_recv_completion, NULL, target, SRP_RQ_SIZE, + target->comp_vector); if (IS_ERR(recv_cq)) { ret = PTR_ERR(recv_cq); goto err; } send_cq = ib_create_cq(target->srp_host->srp_dev->dev, - srp_send_completion, NULL, target, SRP_SQ_SIZE, 0); + srp_send_completion, NULL, target, SRP_SQ_SIZE, + target->comp_vector); if (IS_ERR(send_cq)) { ret = PTR_ERR(send_cq); goto err_recv_cq; @@ -542,11 +544,11 @@ static void srp_remove_work(struct work_struct *work) WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); + srp_remove_target(target); + spin_lock(&target->srp_host->target_lock); list_del(&target->list); spin_unlock(&target->srp_host->target_lock); - - srp_remove_target(target); } static void srp_rport_delete(struct srp_rport *rport) @@ -1744,18 +1746,24 @@ static int srp_abort(struct scsi_cmnd *scmnd) { struct srp_target_port *target = host_to_target(scmnd->device->host); struct srp_request *req = (struct srp_request *) scmnd->host_scribble; + int ret; shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); if (!req || !srp_claim_req(target, req, scmnd)) return FAILED; - srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, - SRP_TSK_ABORT_TASK); + if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, + SRP_TSK_ABORT_TASK) == 0) + ret = SUCCESS; + else if (target->transport_offline) + ret = FAST_IO_FAIL; + else + ret = FAILED; srp_free_req(target, req, scmnd, 0); scmnd->result = DID_ABORT << 16; scmnd->scsi_done(scmnd); - return SUCCESS; + return ret; } static int srp_reset_device(struct scsi_cmnd *scmnd) @@ -1891,6 +1899,14 @@ static ssize_t show_local_ib_device(struct device *dev, return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name); } +static ssize_t show_comp_vector(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "%d\n", target->comp_vector); +} + static ssize_t show_cmd_sg_entries(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1917,6 +1933,7 @@ static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); +static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL); static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); @@ -1931,6 +1948,7 @@ static struct device_attribute *srp_host_attrs[] = { &dev_attr_zero_req_lim, &dev_attr_local_ib_port, &dev_attr_local_ib_device, + &dev_attr_comp_vector, &dev_attr_cmd_sg_entries, &dev_attr_allow_ext_sg, NULL @@ -1946,6 +1964,7 @@ static struct scsi_host_template srp_template = { .eh_abort_handler = srp_abort, .eh_device_reset_handler = srp_reset_device, .eh_host_reset_handler = srp_reset_host, + .skip_settle_delay = true, .sg_tablesize = SRP_DEF_SG_TABLESIZE, .can_queue = SRP_CMD_SQ_SIZE, .this_id = -1, @@ -2001,6 +2020,36 @@ static struct class srp_class = { .dev_release = srp_release_dev }; +/** + * srp_conn_unique() - check whether the connection to a target is unique + */ +static bool srp_conn_unique(struct srp_host *host, + struct srp_target_port *target) +{ + struct srp_target_port *t; + bool ret = false; + + if (target->state == SRP_TARGET_REMOVED) + goto out; + + ret = true; + + spin_lock(&host->target_lock); + list_for_each_entry(t, &host->target_list, list) { + if (t != target && + target->id_ext == t->id_ext && + target->ioc_guid == t->ioc_guid && + target->initiator_ext == t->initiator_ext) { + ret = false; + break; + } + } + spin_unlock(&host->target_lock); + +out: + return ret; +} + /* * Target ports are added by writing * @@ -2023,6 +2072,7 @@ enum { SRP_OPT_CMD_SG_ENTRIES = 1 << 9, SRP_OPT_ALLOW_EXT_SG = 1 << 10, SRP_OPT_SG_TABLESIZE = 1 << 11, + SRP_OPT_COMP_VECTOR = 1 << 12, SRP_OPT_ALL = (SRP_OPT_ID_EXT | SRP_OPT_IOC_GUID | SRP_OPT_DGID | @@ -2043,6 +2093,7 @@ static const match_table_t srp_opt_tokens = { { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" }, { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, + { SRP_OPT_COMP_VECTOR, "comp_vector=%u" }, { SRP_OPT_ERR, NULL } }; @@ -2198,6 +2249,14 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) target->sg_tablesize = token; break; + case SRP_OPT_COMP_VECTOR: + if (match_int(args, &token) || token < 0) { + pr_warn("bad comp_vector parameter '%s'\n", p); + goto out; + } + target->comp_vector = token; + break; + default: pr_warn("unknown parameter or missing value '%s' in target creation request\n", p); @@ -2257,6 +2316,16 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err; + if (!srp_conn_unique(target->srp_host, target)) { + shost_printk(KERN_INFO, target->scsi_host, + PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", + be64_to_cpu(target->id_ext), + be64_to_cpu(target->ioc_guid), + be64_to_cpu(target->initiator_ext)); + ret = -EEXIST; + goto err; + } + if (!host->srp_dev->fmr_pool && !target->allow_ext_sg && target->cmd_sg_cnt < target->sg_tablesize) { pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); @@ -2507,6 +2576,8 @@ static void srp_remove_one(struct ib_device *device) struct srp_target_port *target; srp_dev = ib_get_client_data(device, &srp_client); + if (!srp_dev) + return; list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { device_unregister(&host->dev); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 66fbedd..e641088 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -156,6 +156,7 @@ struct srp_target_port { char target_name[32]; unsigned int scsi_id; unsigned int sg_tablesize; + int comp_vector; struct ib_sa_path_rec path; __be16 orig_dgid[8]; diff --git a/drivers/net/ethernet/mellanox/Kconfig b/drivers/net/ethernet/mellanox/Kconfig index bcdbc14..8cf7563 100644 --- a/drivers/net/ethernet/mellanox/Kconfig +++ b/drivers/net/ethernet/mellanox/Kconfig @@ -19,5 +19,6 @@ config NET_VENDOR_MELLANOX if NET_VENDOR_MELLANOX source "drivers/net/ethernet/mellanox/mlx4/Kconfig" +source "drivers/net/ethernet/mellanox/mlx5/core/Kconfig" endif # NET_VENDOR_MELLANOX diff --git a/drivers/net/ethernet/mellanox/Makefile b/drivers/net/ethernet/mellanox/Makefile index 37afb96..38fe32ef 100644 --- a/drivers/net/ethernet/mellanox/Makefile +++ b/drivers/net/ethernet/mellanox/Makefile @@ -3,3 +3,4 @@ # obj-$(CONFIG_MLX4_CORE) += mlx4/ +obj-$(CONFIG_MLX5_CORE) += mlx5/core/ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig new file mode 100644 index 0000000..2196282 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -0,0 +1,18 @@ +# +# Mellanox driver configuration +# + +config MLX5_CORE + tristate + depends on PCI && X86 + default n + +config MLX5_DEBUG + bool "Verbose debugging output" if (MLX5_CORE && EXPERT) + depends on MLX5_CORE + default y + ---help--- + This option causes debugging code to be compiled into the + mlx5_core driver. The output can be turned on via the + debug_mask module parameter (which can also be set after + the driver is loaded through sysfs). diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile new file mode 100644 index 0000000..105780b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_MLX5_CORE) += mlx5_core.o + +mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ + health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ + mad.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c new file mode 100644 index 0000000..b215742 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/export.h> +#include <linux/bitmap.h> +#include <linux/dma-mapping.h> +#include <linux/vmalloc.h> +#include <linux/mlx5/driver.h> + +#include "mlx5_core.h" + +/* Handling for queue buffers -- we allocate a bunch of memory and + * register it in a memory region at HCA virtual address 0. If the + * requested size is > max_direct, we split the allocation into + * multiple pages, so we don't require too much contiguous memory. + */ + +int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct, + struct mlx5_buf *buf) +{ + dma_addr_t t; + + buf->size = size; + if (size <= max_direct) { + buf->nbufs = 1; + buf->npages = 1; + buf->page_shift = get_order(size) + PAGE_SHIFT; + buf->direct.buf = dma_zalloc_coherent(&dev->pdev->dev, + size, &t, GFP_KERNEL); + if (!buf->direct.buf) + return -ENOMEM; + + buf->direct.map = t; + + while (t & ((1 << buf->page_shift) - 1)) { + --buf->page_shift; + buf->npages *= 2; + } + } else { + int i; + + buf->direct.buf = NULL; + buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; + buf->npages = buf->nbufs; + buf->page_shift = PAGE_SHIFT; + buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), + GFP_KERNEL); + if (!buf->page_list) + return -ENOMEM; + + for (i = 0; i < buf->nbufs; i++) { + buf->page_list[i].buf = + dma_zalloc_coherent(&dev->pdev->dev, PAGE_SIZE, + &t, GFP_KERNEL); + if (!buf->page_list[i].buf) + goto err_free; + + buf->page_list[i].map = t; + } + + if (BITS_PER_LONG == 64) { + struct page **pages; + pages = kmalloc(sizeof(*pages) * buf->nbufs, GFP_KERNEL); + if (!pages) + goto err_free; + for (i = 0; i < buf->nbufs; i++) + pages[i] = virt_to_page(buf->page_list[i].buf); + buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); + kfree(pages); + if (!buf->direct.buf) + goto err_free; + } + } + + return 0; + +err_free: + mlx5_buf_free(dev, buf); + + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(mlx5_buf_alloc); + +void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf) +{ + int i; + + if (buf->nbufs == 1) + dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf, + buf->direct.map); + else { + if (BITS_PER_LONG == 64 && buf->direct.buf) + vunmap(buf->direct.buf); + + for (i = 0; i < buf->nbufs; i++) + if (buf->page_list[i].buf) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + buf->page_list[i].buf, + buf->page_list[i].map); + kfree(buf->page_list); + } +} +EXPORT_SYMBOL_GPL(mlx5_buf_free); + +static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct device *dma_device) +{ + struct mlx5_db_pgdir *pgdir; + + pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL); + if (!pgdir) + return NULL; + + bitmap_fill(pgdir->bitmap, MLX5_DB_PER_PAGE); + pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE, + &pgdir->db_dma, GFP_KERNEL); + if (!pgdir->db_page) { + kfree(pgdir); + return NULL; + } + + return pgdir; +} + +static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir *pgdir, + struct mlx5_db *db) +{ + int offset; + int i; + + i = find_first_bit(pgdir->bitmap, MLX5_DB_PER_PAGE); + if (i >= MLX5_DB_PER_PAGE) + return -ENOMEM; + + __clear_bit(i, pgdir->bitmap); + + db->u.pgdir = pgdir; + db->index = i; + offset = db->index * L1_CACHE_BYTES; + db->db = pgdir->db_page + offset / sizeof(*pgdir->db_page); + db->dma = pgdir->db_dma + offset; + + return 0; +} + +int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db) +{ + struct mlx5_db_pgdir *pgdir; + int ret = 0; + + mutex_lock(&dev->priv.pgdir_mutex); + + list_for_each_entry(pgdir, &dev->priv.pgdir_list, list) + if (!mlx5_alloc_db_from_pgdir(pgdir, db)) + goto out; + + pgdir = mlx5_alloc_db_pgdir(&(dev->pdev->dev)); + if (!pgdir) { + ret = -ENOMEM; + goto out; + } + + list_add(&pgdir->list, &dev->priv.pgdir_list); + + /* This should never fail -- we just allocated an empty page: */ + WARN_ON(mlx5_alloc_db_from_pgdir(pgdir, db)); + +out: + mutex_unlock(&dev->priv.pgdir_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(mlx5_db_alloc); + +void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) +{ + mutex_lock(&dev->priv.pgdir_mutex); + + __set_bit(db->index, db->u.pgdir->bitmap); + + if (bitmap_full(db->u.pgdir->bitmap, MLX5_DB_PER_PAGE)) { + dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + db->u.pgdir->db_page, db->u.pgdir->db_dma); + list_del(&db->u.pgdir->list); + kfree(db->u.pgdir); + } + + mutex_unlock(&dev->priv.pgdir_mutex); +} +EXPORT_SYMBOL_GPL(mlx5_db_free); + + +void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas) +{ + u64 addr; + int i; + + for (i = 0; i < buf->npages; i++) { + if (buf->nbufs == 1) + addr = buf->direct.map + (i << buf->page_shift); + else + addr = buf->page_list[i].map; + + pas[i] = cpu_to_be64(addr); + } +} +EXPORT_SYMBOL_GPL(mlx5_fill_page_array); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c new file mode 100644 index 0000000..205753a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -0,0 +1,1515 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <asm-generic/kmap_types.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/random.h> +#include <linux/io-mapping.h> +#include <linux/mlx5/driver.h> +#include <linux/debugfs.h> + +#include "mlx5_core.h" + +enum { + CMD_IF_REV = 3, +}; + +enum { + CMD_MODE_POLLING, + CMD_MODE_EVENTS +}; + +enum { + NUM_LONG_LISTS = 2, + NUM_MED_LISTS = 64, + LONG_LIST_SIZE = (2ULL * 1024 * 1024 * 1024 / PAGE_SIZE) * 8 + 16 + + MLX5_CMD_DATA_BLOCK_SIZE, + MED_LIST_SIZE = 16 + MLX5_CMD_DATA_BLOCK_SIZE, +}; + +enum { + MLX5_CMD_DELIVERY_STAT_OK = 0x0, + MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR = 0x1, + MLX5_CMD_DELIVERY_STAT_TOK_ERR = 0x2, + MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR = 0x3, + MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR = 0x4, + MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR = 0x5, + MLX5_CMD_DELIVERY_STAT_FW_ERR = 0x6, + MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR = 0x7, + MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR = 0x8, + MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR = 0x9, + MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10, +}; + +enum { + MLX5_CMD_STAT_OK = 0x0, + MLX5_CMD_STAT_INT_ERR = 0x1, + MLX5_CMD_STAT_BAD_OP_ERR = 0x2, + MLX5_CMD_STAT_BAD_PARAM_ERR = 0x3, + MLX5_CMD_STAT_BAD_SYS_STATE_ERR = 0x4, + MLX5_CMD_STAT_BAD_RES_ERR = 0x5, + MLX5_CMD_STAT_RES_BUSY = 0x6, + MLX5_CMD_STAT_LIM_ERR = 0x8, + MLX5_CMD_STAT_BAD_RES_STATE_ERR = 0x9, + MLX5_CMD_STAT_IX_ERR = 0xa, + MLX5_CMD_STAT_NO_RES_ERR = 0xf, + MLX5_CMD_STAT_BAD_INP_LEN_ERR = 0x50, + MLX5_CMD_STAT_BAD_OUTP_LEN_ERR = 0x51, + MLX5_CMD_STAT_BAD_QP_STATE_ERR = 0x10, + MLX5_CMD_STAT_BAD_PKT_ERR = 0x30, + MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40, +}; + +static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, + struct mlx5_cmd_msg *in, + struct mlx5_cmd_msg *out, + mlx5_cmd_cbk_t cbk, + void *context, int page_queue) +{ + gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL; + struct mlx5_cmd_work_ent *ent; + + ent = kzalloc(sizeof(*ent), alloc_flags); + if (!ent) + return ERR_PTR(-ENOMEM); + + ent->in = in; + ent->out = out; + ent->callback = cbk; + ent->context = context; + ent->cmd = cmd; + ent->page_queue = page_queue; + + return ent; +} + +static u8 alloc_token(struct mlx5_cmd *cmd) +{ + u8 token; + + spin_lock(&cmd->token_lock); + token = cmd->token++ % 255 + 1; + spin_unlock(&cmd->token_lock); + + return token; +} + +static int alloc_ent(struct mlx5_cmd *cmd) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&cmd->alloc_lock, flags); + ret = find_first_bit(&cmd->bitmask, cmd->max_reg_cmds); + if (ret < cmd->max_reg_cmds) + clear_bit(ret, &cmd->bitmask); + spin_unlock_irqrestore(&cmd->alloc_lock, flags); + + return ret < cmd->max_reg_cmds ? ret : -ENOMEM; +} + +static void free_ent(struct mlx5_cmd *cmd, int idx) +{ + unsigned long flags; + + spin_lock_irqsave(&cmd->alloc_lock, flags); + set_bit(idx, &cmd->bitmask); + spin_unlock_irqrestore(&cmd->alloc_lock, flags); +} + +static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) +{ + return cmd->cmd_buf + (idx << cmd->log_stride); +} + +static u8 xor8_buf(void *buf, int len) +{ + u8 *ptr = buf; + u8 sum = 0; + int i; + + for (i = 0; i < len; i++) + sum ^= ptr[i]; + + return sum; +} + +static int verify_block_sig(struct mlx5_cmd_prot_block *block) +{ + if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff) + return -EINVAL; + + if (xor8_buf(block, sizeof(*block)) != 0xff) + return -EINVAL; + + return 0; +} + +static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token) +{ + block->token = token; + block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 2); + block->sig = ~xor8_buf(block, sizeof(*block) - 1); +} + +static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token) +{ + struct mlx5_cmd_mailbox *next = msg->next; + + while (next) { + calc_block_sig(next->buf, token); + next = next->next; + } +} + +static void set_signature(struct mlx5_cmd_work_ent *ent) +{ + ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay)); + calc_chain_sig(ent->in, ent->token); + calc_chain_sig(ent->out, ent->token); +} + +static void poll_timeout(struct mlx5_cmd_work_ent *ent) +{ + unsigned long poll_end = jiffies + msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC + 1000); + u8 own; + + do { + own = ent->lay->status_own; + if (!(own & CMD_OWNER_HW)) { + ent->ret = 0; + return; + } + usleep_range(5000, 10000); + } while (time_before(jiffies, poll_end)); + + ent->ret = -ETIMEDOUT; +} + +static void free_cmd(struct mlx5_cmd_work_ent *ent) +{ + kfree(ent); +} + + +static int verify_signature(struct mlx5_cmd_work_ent *ent) +{ + struct mlx5_cmd_mailbox *next = ent->out->next; + int err; + u8 sig; + + sig = xor8_buf(ent->lay, sizeof(*ent->lay)); + if (sig != 0xff) + return -EINVAL; + + while (next) { + err = verify_block_sig(next->buf); + if (err) + return err; + + next = next->next; + } + + return 0; +} + +static void dump_buf(void *buf, int size, int data_only, int offset) +{ + __be32 *p = buf; + int i; + + for (i = 0; i < size; i += 16) { + pr_debug("%03x: %08x %08x %08x %08x\n", offset, be32_to_cpu(p[0]), + be32_to_cpu(p[1]), be32_to_cpu(p[2]), + be32_to_cpu(p[3])); + p += 4; + offset += 16; + } + if (!data_only) + pr_debug("\n"); +} + +const char *mlx5_command_str(int command) +{ + switch (command) { + case MLX5_CMD_OP_QUERY_HCA_CAP: + return "QUERY_HCA_CAP"; + + case MLX5_CMD_OP_SET_HCA_CAP: + return "SET_HCA_CAP"; + + case MLX5_CMD_OP_QUERY_ADAPTER: + return "QUERY_ADAPTER"; + + case MLX5_CMD_OP_INIT_HCA: + return "INIT_HCA"; + + case MLX5_CMD_OP_TEARDOWN_HCA: + return "TEARDOWN_HCA"; + + case MLX5_CMD_OP_QUERY_PAGES: + return "QUERY_PAGES"; + + case MLX5_CMD_OP_MANAGE_PAGES: + return "MANAGE_PAGES"; + + case MLX5_CMD_OP_CREATE_MKEY: + return "CREATE_MKEY"; + + case MLX5_CMD_OP_QUERY_MKEY: + return "QUERY_MKEY"; + + case MLX5_CMD_OP_DESTROY_MKEY: + return "DESTROY_MKEY"; + + case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS: + return "QUERY_SPECIAL_CONTEXTS"; + + case MLX5_CMD_OP_CREATE_EQ: + return "CREATE_EQ"; + + case MLX5_CMD_OP_DESTROY_EQ: + return "DESTROY_EQ"; + + case MLX5_CMD_OP_QUERY_EQ: + return "QUERY_EQ"; + + case MLX5_CMD_OP_CREATE_CQ: + return "CREATE_CQ"; + + case MLX5_CMD_OP_DESTROY_CQ: + return "DESTROY_CQ"; + + case MLX5_CMD_OP_QUERY_CQ: + return "QUERY_CQ"; + + case MLX5_CMD_OP_MODIFY_CQ: + return "MODIFY_CQ"; + + case MLX5_CMD_OP_CREATE_QP: + return "CREATE_QP"; + + case MLX5_CMD_OP_DESTROY_QP: + return "DESTROY_QP"; + + case MLX5_CMD_OP_RST2INIT_QP: + return "RST2INIT_QP"; + + case MLX5_CMD_OP_INIT2RTR_QP: + return "INIT2RTR_QP"; + + case MLX5_CMD_OP_RTR2RTS_QP: + return "RTR2RTS_QP"; + + case MLX5_CMD_OP_RTS2RTS_QP: + return "RTS2RTS_QP"; + + case MLX5_CMD_OP_SQERR2RTS_QP: + return "SQERR2RTS_QP"; + + case MLX5_CMD_OP_2ERR_QP: + return "2ERR_QP"; + + case MLX5_CMD_OP_RTS2SQD_QP: + return "RTS2SQD_QP"; + + case MLX5_CMD_OP_SQD2RTS_QP: + return "SQD2RTS_QP"; + + case MLX5_CMD_OP_2RST_QP: + return "2RST_QP"; + + case MLX5_CMD_OP_QUERY_QP: + return "QUERY_QP"; + + case MLX5_CMD_OP_CONF_SQP: + return "CONF_SQP"; + + case MLX5_CMD_OP_MAD_IFC: + return "MAD_IFC"; + + case MLX5_CMD_OP_INIT2INIT_QP: + return "INIT2INIT_QP"; + + case MLX5_CMD_OP_SUSPEND_QP: + return "SUSPEND_QP"; + + case MLX5_CMD_OP_UNSUSPEND_QP: + return "UNSUSPEND_QP"; + + case MLX5_CMD_OP_SQD2SQD_QP: + return "SQD2SQD_QP"; + + case MLX5_CMD_OP_ALLOC_QP_COUNTER_SET: + return "ALLOC_QP_COUNTER_SET"; + + case MLX5_CMD_OP_DEALLOC_QP_COUNTER_SET: + return "DEALLOC_QP_COUNTER_SET"; + + case MLX5_CMD_OP_QUERY_QP_COUNTER_SET: + return "QUERY_QP_COUNTER_SET"; + + case MLX5_CMD_OP_CREATE_PSV: + return "CREATE_PSV"; + + case MLX5_CMD_OP_DESTROY_PSV: + return "DESTROY_PSV"; + + case MLX5_CMD_OP_QUERY_PSV: + return "QUERY_PSV"; + + case MLX5_CMD_OP_QUERY_SIG_RULE_TABLE: + return "QUERY_SIG_RULE_TABLE"; + + case MLX5_CMD_OP_QUERY_BLOCK_SIZE_TABLE: + return "QUERY_BLOCK_SIZE_TABLE"; + + case MLX5_CMD_OP_CREATE_SRQ: + return "CREATE_SRQ"; + + case MLX5_CMD_OP_DESTROY_SRQ: + return "DESTROY_SRQ"; + + case MLX5_CMD_OP_QUERY_SRQ: + return "QUERY_SRQ"; + + case MLX5_CMD_OP_ARM_RQ: + return "ARM_RQ"; + + case MLX5_CMD_OP_RESIZE_SRQ: + return "RESIZE_SRQ"; + + case MLX5_CMD_OP_ALLOC_PD: + return "ALLOC_PD"; + + case MLX5_CMD_OP_DEALLOC_PD: + return "DEALLOC_PD"; + + case MLX5_CMD_OP_ALLOC_UAR: + return "ALLOC_UAR"; + + case MLX5_CMD_OP_DEALLOC_UAR: + return "DEALLOC_UAR"; + + case MLX5_CMD_OP_ATTACH_TO_MCG: + return "ATTACH_TO_MCG"; + + case MLX5_CMD_OP_DETACH_FROM_MCG: + return "DETACH_FROM_MCG"; + + case MLX5_CMD_OP_ALLOC_XRCD: + return "ALLOC_XRCD"; + + case MLX5_CMD_OP_DEALLOC_XRCD: + return "DEALLOC_XRCD"; + + case MLX5_CMD_OP_ACCESS_REG: + return "MLX5_CMD_OP_ACCESS_REG"; + + default: return "unknown command opcode"; + } +} + +static void dump_command(struct mlx5_core_dev *dev, + struct mlx5_cmd_work_ent *ent, int input) +{ + u16 op = be16_to_cpu(((struct mlx5_inbox_hdr *)(ent->lay->in))->opcode); + struct mlx5_cmd_msg *msg = input ? ent->in : ent->out; + struct mlx5_cmd_mailbox *next = msg->next; + int data_only; + int offset = 0; + int dump_len; + + data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA)); + + if (data_only) + mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_DATA, + "dump command data %s(0x%x) %s\n", + mlx5_command_str(op), op, + input ? "INPUT" : "OUTPUT"); + else + mlx5_core_dbg(dev, "dump command %s(0x%x) %s\n", + mlx5_command_str(op), op, + input ? "INPUT" : "OUTPUT"); + + if (data_only) { + if (input) { + dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset); + offset += sizeof(ent->lay->in); + } else { + dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset); + offset += sizeof(ent->lay->out); + } + } else { + dump_buf(ent->lay, sizeof(*ent->lay), 0, offset); + offset += sizeof(*ent->lay); + } + + while (next && offset < msg->len) { + if (data_only) { + dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg->len - offset); + dump_buf(next->buf, dump_len, 1, offset); + offset += MLX5_CMD_DATA_BLOCK_SIZE; + } else { + mlx5_core_dbg(dev, "command block:\n"); + dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset); + offset += sizeof(struct mlx5_cmd_prot_block); + } + next = next->next; + } + + if (data_only) + pr_debug("\n"); +} + +static void cmd_work_handler(struct work_struct *work) +{ + struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); + struct mlx5_cmd *cmd = ent->cmd; + struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd); + struct mlx5_cmd_layout *lay; + struct semaphore *sem; + + sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; + down(sem); + if (!ent->page_queue) { + ent->idx = alloc_ent(cmd); + if (ent->idx < 0) { + mlx5_core_err(dev, "failed to allocate command entry\n"); + up(sem); + return; + } + } else { + ent->idx = cmd->max_reg_cmds; + } + + ent->token = alloc_token(cmd); + cmd->ent_arr[ent->idx] = ent; + lay = get_inst(cmd, ent->idx); + ent->lay = lay; + memset(lay, 0, sizeof(*lay)); + memcpy(lay->in, ent->in->first.data, sizeof(lay->in)); + if (ent->in->next) + lay->in_ptr = cpu_to_be64(ent->in->next->dma); + lay->inlen = cpu_to_be32(ent->in->len); + if (ent->out->next) + lay->out_ptr = cpu_to_be64(ent->out->next->dma); + lay->outlen = cpu_to_be32(ent->out->len); + lay->type = MLX5_PCI_CMD_XPORT; + lay->token = ent->token; + lay->status_own = CMD_OWNER_HW; + if (!cmd->checksum_disabled) + set_signature(ent); + dump_command(dev, ent, 1); + ktime_get_ts(&ent->ts1); + + /* ring doorbell after the descriptor is valid */ + wmb(); + iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell); + mlx5_core_dbg(dev, "write 0x%x to command doorbell\n", 1 << ent->idx); + mmiowb(); + if (cmd->mode == CMD_MODE_POLLING) { + poll_timeout(ent); + /* make sure we read the descriptor after ownership is SW */ + rmb(); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx); + } +} + +static const char *deliv_status_to_str(u8 status) +{ + switch (status) { + case MLX5_CMD_DELIVERY_STAT_OK: + return "no errors"; + case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR: + return "signature error"; + case MLX5_CMD_DELIVERY_STAT_TOK_ERR: + return "token error"; + case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR: + return "bad block number"; + case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR: + return "output pointer not aligned to block size"; + case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR: + return "input pointer not aligned to block size"; + case MLX5_CMD_DELIVERY_STAT_FW_ERR: + return "firmware internal error"; + case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: + return "command input length error"; + case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: + return "command ouput length error"; + case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: + return "reserved fields not cleared"; + case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: + return "bad command descriptor type"; + default: + return "unknown status code"; + } +} + +static u16 msg_to_opcode(struct mlx5_cmd_msg *in) +{ + struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data); + + return be16_to_cpu(hdr->opcode); +} + +static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) +{ + unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); + struct mlx5_cmd *cmd = &dev->cmd; + int err; + + if (cmd->mode == CMD_MODE_POLLING) { + wait_for_completion(&ent->done); + err = ent->ret; + } else { + if (!wait_for_completion_timeout(&ent->done, timeout)) + err = -ETIMEDOUT; + else + err = 0; + } + if (err == -ETIMEDOUT) { + mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", + mlx5_command_str(msg_to_opcode(ent->in)), + msg_to_opcode(ent->in)); + } + mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n", err, + deliv_status_to_str(ent->status), ent->status); + + return err; +} + +/* Notes: + * 1. Callback functions may not sleep + * 2. page queue commands do not support asynchrous completion + */ +static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, + struct mlx5_cmd_msg *out, mlx5_cmd_cbk_t callback, + void *context, int page_queue, u8 *status) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_work_ent *ent; + ktime_t t1, t2, delta; + struct mlx5_cmd_stats *stats; + int err = 0; + s64 ds; + u16 op; + + if (callback && page_queue) + return -EINVAL; + + ent = alloc_cmd(cmd, in, out, callback, context, page_queue); + if (IS_ERR(ent)) + return PTR_ERR(ent); + + if (!callback) + init_completion(&ent->done); + + INIT_WORK(&ent->work, cmd_work_handler); + if (page_queue) { + cmd_work_handler(&ent->work); + } else if (!queue_work(cmd->wq, &ent->work)) { + mlx5_core_warn(dev, "failed to queue work\n"); + err = -ENOMEM; + goto out_free; + } + + if (!callback) { + err = wait_func(dev, ent); + if (err == -ETIMEDOUT) + goto out; + + t1 = timespec_to_ktime(ent->ts1); + t2 = timespec_to_ktime(ent->ts2); + delta = ktime_sub(t2, t1); + ds = ktime_to_ns(delta); + op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode); + if (op < ARRAY_SIZE(cmd->stats)) { + stats = &cmd->stats[op]; + spin_lock(&stats->lock); + stats->sum += ds; + ++stats->n; + spin_unlock(&stats->lock); + } + mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, + "fw exec time for %s is %lld nsec\n", + mlx5_command_str(op), ds); + *status = ent->status; + free_cmd(ent); + } + + return err; + +out_free: + free_cmd(ent); +out: + return err; +} + +static ssize_t dbg_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + char lbuf[3]; + int err; + + if (!dbg->in_msg || !dbg->out_msg) + return -ENOMEM; + + if (copy_from_user(lbuf, buf, sizeof(lbuf))) + return -EFAULT; + + lbuf[sizeof(lbuf) - 1] = 0; + + if (strcmp(lbuf, "go")) + return -EINVAL; + + err = mlx5_cmd_exec(dev, dbg->in_msg, dbg->inlen, dbg->out_msg, dbg->outlen); + + return err ? err : count; +} + + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = dbg_write, +}; + +static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size) +{ + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_mailbox *next; + int copy; + + if (!to || !from) + return -ENOMEM; + + copy = min_t(int, size, sizeof(to->first.data)); + memcpy(to->first.data, from, copy); + size -= copy; + from += copy; + + next = to->next; + while (size) { + if (!next) { + /* this is a BUG */ + return -ENOMEM; + } + + copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE); + block = next->buf; + memcpy(block->data, from, copy); + from += copy; + size -= copy; + next = next->next; + } + + return 0; +} + +static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size) +{ + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_mailbox *next; + int copy; + + if (!to || !from) + return -ENOMEM; + + copy = min_t(int, size, sizeof(from->first.data)); + memcpy(to, from->first.data, copy); + size -= copy; + to += copy; + + next = from->next; + while (size) { + if (!next) { + /* this is a BUG */ + return -ENOMEM; + } + + copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE); + block = next->buf; + if (xor8_buf(block, sizeof(*block)) != 0xff) + return -EINVAL; + + memcpy(to, block->data, copy); + to += copy; + size -= copy; + next = next->next; + } + + return 0; +} + +static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev, + gfp_t flags) +{ + struct mlx5_cmd_mailbox *mailbox; + + mailbox = kmalloc(sizeof(*mailbox), flags); + if (!mailbox) + return ERR_PTR(-ENOMEM); + + mailbox->buf = pci_pool_alloc(dev->cmd.pool, flags, + &mailbox->dma); + if (!mailbox->buf) { + mlx5_core_dbg(dev, "failed allocation\n"); + kfree(mailbox); + return ERR_PTR(-ENOMEM); + } + memset(mailbox->buf, 0, sizeof(struct mlx5_cmd_prot_block)); + mailbox->next = NULL; + + return mailbox; +} + +static void free_cmd_box(struct mlx5_core_dev *dev, + struct mlx5_cmd_mailbox *mailbox) +{ + pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); + kfree(mailbox); +} + +static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev, + gfp_t flags, int size) +{ + struct mlx5_cmd_mailbox *tmp, *head = NULL; + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_msg *msg; + int blen; + int err; + int n; + int i; + + msg = kzalloc(sizeof(*msg), GFP_KERNEL); + if (!msg) + return ERR_PTR(-ENOMEM); + + blen = size - min_t(int, sizeof(msg->first.data), size); + n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1) / MLX5_CMD_DATA_BLOCK_SIZE; + + for (i = 0; i < n; i++) { + tmp = alloc_cmd_box(dev, flags); + if (IS_ERR(tmp)) { + mlx5_core_warn(dev, "failed allocating block\n"); + err = PTR_ERR(tmp); + goto err_alloc; + } + + block = tmp->buf; + tmp->next = head; + block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0); + block->block_num = cpu_to_be32(n - i - 1); + head = tmp; + } + msg->next = head; + msg->len = size; + return msg; + +err_alloc: + while (head) { + tmp = head->next; + free_cmd_box(dev, head); + head = tmp; + } + kfree(msg); + + return ERR_PTR(err); +} + +static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, + struct mlx5_cmd_msg *msg) +{ + struct mlx5_cmd_mailbox *head = msg->next; + struct mlx5_cmd_mailbox *next; + + while (head) { + next = head->next; + free_cmd_box(dev, head); + head = next; + } + kfree(msg); +} + +static ssize_t data_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + void *ptr; + int err; + + if (*pos != 0) + return -EINVAL; + + kfree(dbg->in_msg); + dbg->in_msg = NULL; + dbg->inlen = 0; + + ptr = kzalloc(count, GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + if (copy_from_user(ptr, buf, count)) { + err = -EFAULT; + goto out; + } + dbg->in_msg = ptr; + dbg->inlen = count; + + *pos = count; + + return count; + +out: + kfree(ptr); + return err; +} + +static ssize_t data_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + int copy; + + if (*pos) + return 0; + + if (!dbg->out_msg) + return -ENOMEM; + + copy = min_t(int, count, dbg->outlen); + if (copy_to_user(buf, dbg->out_msg, copy)) + return -EFAULT; + + *pos += copy; + + return copy; +} + +static const struct file_operations dfops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = data_write, + .read = data_read, +}; + +static ssize_t outlen_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + char outlen[8]; + int err; + + if (*pos) + return 0; + + err = snprintf(outlen, sizeof(outlen), "%d", dbg->outlen); + if (err < 0) + return err; + + if (copy_to_user(buf, &outlen, err)) + return -EFAULT; + + *pos += err; + + return err; +} + +static ssize_t outlen_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + char outlen_str[8]; + int outlen; + void *ptr; + int err; + + if (*pos != 0 || count > 6) + return -EINVAL; + + kfree(dbg->out_msg); + dbg->out_msg = NULL; + dbg->outlen = 0; + + if (copy_from_user(outlen_str, buf, count)) + return -EFAULT; + + outlen_str[7] = 0; + + err = sscanf(outlen_str, "%d", &outlen); + if (err < 0) + return err; + + ptr = kzalloc(outlen, GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + dbg->out_msg = ptr; + dbg->outlen = outlen; + + *pos = count; + + return count; +} + +static const struct file_operations olfops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = outlen_write, + .read = outlen_read, +}; + +static void set_wqname(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + + snprintf(cmd->wq_name, sizeof(cmd->wq_name), "mlx5_cmd_%s", + dev_name(&dev->pdev->dev)); +} + +static void clean_debug_files(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + + if (!mlx5_debugfs_root) + return; + + mlx5_cmdif_debugfs_cleanup(dev); + debugfs_remove_recursive(dbg->dbg_root); +} + +static int create_debugfs_files(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; + int err = -ENOMEM; + + if (!mlx5_debugfs_root) + return 0; + + dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root); + if (!dbg->dbg_root) + return err; + + dbg->dbg_in = debugfs_create_file("in", 0400, dbg->dbg_root, + dev, &dfops); + if (!dbg->dbg_in) + goto err_dbg; + + dbg->dbg_out = debugfs_create_file("out", 0200, dbg->dbg_root, + dev, &dfops); + if (!dbg->dbg_out) + goto err_dbg; + + dbg->dbg_outlen = debugfs_create_file("out_len", 0600, dbg->dbg_root, + dev, &olfops); + if (!dbg->dbg_outlen) + goto err_dbg; + + dbg->dbg_status = debugfs_create_u8("status", 0600, dbg->dbg_root, + &dbg->status); + if (!dbg->dbg_status) + goto err_dbg; + + dbg->dbg_run = debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops); + if (!dbg->dbg_run) + goto err_dbg; + + mlx5_cmdif_debugfs_init(dev); + + return 0; + +err_dbg: + clean_debug_files(dev); + return err; +} + +void mlx5_cmd_use_events(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->max_reg_cmds; i++) + down(&cmd->sem); + + down(&cmd->pages_sem); + + flush_workqueue(cmd->wq); + + cmd->mode = CMD_MODE_EVENTS; + + up(&cmd->pages_sem); + for (i = 0; i < cmd->max_reg_cmds; i++) + up(&cmd->sem); +} + +void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->max_reg_cmds; i++) + down(&cmd->sem); + + down(&cmd->pages_sem); + + flush_workqueue(cmd->wq); + cmd->mode = CMD_MODE_POLLING; + + up(&cmd->pages_sem); + for (i = 0; i < cmd->max_reg_cmds; i++) + up(&cmd->sem); +} + +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_work_ent *ent; + mlx5_cmd_cbk_t callback; + void *context; + int err; + int i; + + for (i = 0; i < (1 << cmd->log_sz); i++) { + if (test_bit(i, &vector)) { + ent = cmd->ent_arr[i]; + ktime_get_ts(&ent->ts2); + memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out)); + dump_command(dev, ent, 0); + if (!ent->ret) { + if (!cmd->checksum_disabled) + ent->ret = verify_signature(ent); + else + ent->ret = 0; + ent->status = ent->lay->status_own >> 1; + mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n", + ent->ret, deliv_status_to_str(ent->status), ent->status); + } + free_ent(cmd, ent->idx); + if (ent->callback) { + callback = ent->callback; + context = ent->context; + err = ent->ret; + free_cmd(ent); + callback(err, context); + } else { + complete(&ent->done); + } + if (ent->page_queue) + up(&cmd->pages_sem); + else + up(&cmd->sem); + } + } +} +EXPORT_SYMBOL(mlx5_cmd_comp_handler); + +static int status_to_err(u8 status) +{ + return status ? -1 : 0; /* TBD more meaningful codes */ +} + +static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size) +{ + struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM); + struct mlx5_cmd *cmd = &dev->cmd; + struct cache_ent *ent = NULL; + + if (in_size > MED_LIST_SIZE && in_size <= LONG_LIST_SIZE) + ent = &cmd->cache.large; + else if (in_size > 16 && in_size <= MED_LIST_SIZE) + ent = &cmd->cache.med; + + if (ent) { + spin_lock(&ent->lock); + if (!list_empty(&ent->head)) { + msg = list_entry(ent->head.next, typeof(*msg), list); + /* For cached lists, we must explicitly state what is + * the real size + */ + msg->len = in_size; + list_del(&msg->list); + } + spin_unlock(&ent->lock); + } + + if (IS_ERR(msg)) + msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, in_size); + + return msg; +} + +static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) +{ + if (msg->cache) { + spin_lock(&msg->cache->lock); + list_add_tail(&msg->list, &msg->cache->head); + spin_unlock(&msg->cache->lock); + } else { + mlx5_free_cmd_msg(dev, msg); + } +} + +static int is_manage_pages(struct mlx5_inbox_hdr *in) +{ + return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES; +} + +int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, + int out_size) +{ + struct mlx5_cmd_msg *inb; + struct mlx5_cmd_msg *outb; + int pages_queue; + int err; + u8 status = 0; + + pages_queue = is_manage_pages(in); + + inb = alloc_msg(dev, in_size); + if (IS_ERR(inb)) { + err = PTR_ERR(inb); + return err; + } + + err = mlx5_copy_to_msg(inb, in, in_size); + if (err) { + mlx5_core_warn(dev, "err %d\n", err); + goto out_in; + } + + outb = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, out_size); + if (IS_ERR(outb)) { + err = PTR_ERR(outb); + goto out_in; + } + + err = mlx5_cmd_invoke(dev, inb, outb, NULL, NULL, pages_queue, &status); + if (err) + goto out_out; + + mlx5_core_dbg(dev, "err %d, status %d\n", err, status); + if (status) { + err = status_to_err(status); + goto out_out; + } + + err = mlx5_copy_from_msg(out, outb, out_size); + +out_out: + mlx5_free_cmd_msg(dev, outb); + +out_in: + free_msg(dev, inb); + return err; +} +EXPORT_SYMBOL(mlx5_cmd_exec); + +static void destroy_msg_cache(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_msg *msg; + struct mlx5_cmd_msg *n; + + list_for_each_entry_safe(msg, n, &cmd->cache.large.head, list) { + list_del(&msg->list); + mlx5_free_cmd_msg(dev, msg); + } + + list_for_each_entry_safe(msg, n, &cmd->cache.med.head, list) { + list_del(&msg->list); + mlx5_free_cmd_msg(dev, msg); + } +} + +static int create_msg_cache(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_msg *msg; + int err; + int i; + + spin_lock_init(&cmd->cache.large.lock); + INIT_LIST_HEAD(&cmd->cache.large.head); + spin_lock_init(&cmd->cache.med.lock); + INIT_LIST_HEAD(&cmd->cache.med.head); + + for (i = 0; i < NUM_LONG_LISTS; i++) { + msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE); + if (IS_ERR(msg)) { + err = PTR_ERR(msg); + goto ex_err; + } + msg->cache = &cmd->cache.large; + list_add_tail(&msg->list, &cmd->cache.large.head); + } + + for (i = 0; i < NUM_MED_LISTS; i++) { + msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE); + if (IS_ERR(msg)) { + err = PTR_ERR(msg); + goto ex_err; + } + msg->cache = &cmd->cache.med; + list_add_tail(&msg->list, &cmd->cache.med.head); + } + + return 0; + +ex_err: + destroy_msg_cache(dev); + return err; +} + +int mlx5_cmd_init(struct mlx5_core_dev *dev) +{ + int size = sizeof(struct mlx5_cmd_prot_block); + int align = roundup_pow_of_two(size); + struct mlx5_cmd *cmd = &dev->cmd; + u32 cmd_h, cmd_l; + u16 cmd_if_rev; + int err; + int i; + + cmd_if_rev = cmdif_rev(dev); + if (cmd_if_rev != CMD_IF_REV) { + dev_err(&dev->pdev->dev, + "Driver cmdif rev(%d) differs from firmware's(%d)\n", + CMD_IF_REV, cmd_if_rev); + return -EINVAL; + } + + cmd->pool = pci_pool_create("mlx5_cmd", dev->pdev, size, align, 0); + if (!cmd->pool) + return -ENOMEM; + + cmd->cmd_buf = (void *)__get_free_pages(GFP_ATOMIC, 0); + if (!cmd->cmd_buf) { + err = -ENOMEM; + goto err_free_pool; + } + cmd->dma = dma_map_single(&dev->pdev->dev, cmd->cmd_buf, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&dev->pdev->dev, cmd->dma)) { + err = -ENOMEM; + goto err_free; + } + + cmd_l = ioread32be(&dev->iseg->cmdq_addr_l_sz) & 0xff; + cmd->log_sz = cmd_l >> 4 & 0xf; + cmd->log_stride = cmd_l & 0xf; + if (1 << cmd->log_sz > MLX5_MAX_COMMANDS) { + dev_err(&dev->pdev->dev, "firmware reports too many outstanding commands %d\n", + 1 << cmd->log_sz); + err = -EINVAL; + goto err_map; + } + + if (cmd->log_sz + cmd->log_stride > PAGE_SHIFT) { + dev_err(&dev->pdev->dev, "command queue size overflow\n"); + err = -EINVAL; + goto err_map; + } + + cmd->max_reg_cmds = (1 << cmd->log_sz) - 1; + cmd->bitmask = (1 << cmd->max_reg_cmds) - 1; + + cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; + if (cmd->cmdif_rev > CMD_IF_REV) { + dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n", + CMD_IF_REV, cmd->cmdif_rev); + err = -ENOTSUPP; + goto err_map; + } + + spin_lock_init(&cmd->alloc_lock); + spin_lock_init(&cmd->token_lock); + for (i = 0; i < ARRAY_SIZE(cmd->stats); i++) + spin_lock_init(&cmd->stats[i].lock); + + sema_init(&cmd->sem, cmd->max_reg_cmds); + sema_init(&cmd->pages_sem, 1); + + cmd_h = (u32)((u64)(cmd->dma) >> 32); + cmd_l = (u32)(cmd->dma); + if (cmd_l & 0xfff) { + dev_err(&dev->pdev->dev, "invalid command queue address\n"); + err = -ENOMEM; + goto err_map; + } + + iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h); + iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz); + + /* Make sure firmware sees the complete address before we proceed */ + wmb(); + + mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma)); + + cmd->mode = CMD_MODE_POLLING; + + err = create_msg_cache(dev); + if (err) { + dev_err(&dev->pdev->dev, "failed to create command cache\n"); + goto err_map; + } + + set_wqname(dev); + cmd->wq = create_singlethread_workqueue(cmd->wq_name); + if (!cmd->wq) { + dev_err(&dev->pdev->dev, "failed to create command workqueue\n"); + err = -ENOMEM; + goto err_cache; + } + + err = create_debugfs_files(dev); + if (err) { + err = -ENOMEM; + goto err_wq; + } + + return 0; + +err_wq: + destroy_workqueue(cmd->wq); + +err_cache: + destroy_msg_cache(dev); + +err_map: + dma_unmap_single(&dev->pdev->dev, cmd->dma, PAGE_SIZE, + DMA_BIDIRECTIONAL); +err_free: + free_pages((unsigned long)cmd->cmd_buf, 0); + +err_free_pool: + pci_pool_destroy(cmd->pool); + + return err; +} +EXPORT_SYMBOL(mlx5_cmd_init); + +void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + + clean_debug_files(dev); + destroy_workqueue(cmd->wq); + destroy_msg_cache(dev); + dma_unmap_single(&dev->pdev->dev, cmd->dma, PAGE_SIZE, + DMA_BIDIRECTIONAL); + free_pages((unsigned long)cmd->cmd_buf, 0); + pci_pool_destroy(cmd->pool); +} +EXPORT_SYMBOL(mlx5_cmd_cleanup); + +static const char *cmd_status_str(u8 status) +{ + switch (status) { + case MLX5_CMD_STAT_OK: + return "OK"; + case MLX5_CMD_STAT_INT_ERR: + return "internal error"; + case MLX5_CMD_STAT_BAD_OP_ERR: + return "bad operation"; + case MLX5_CMD_STAT_BAD_PARAM_ERR: + return "bad parameter"; + case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: + return "bad system state"; + case MLX5_CMD_STAT_BAD_RES_ERR: + return "bad resource"; + case MLX5_CMD_STAT_RES_BUSY: + return "resource busy"; + case MLX5_CMD_STAT_LIM_ERR: + return "limits exceeded"; + case MLX5_CMD_STAT_BAD_RES_STATE_ERR: + return "bad resource state"; + case MLX5_CMD_STAT_IX_ERR: + return "bad index"; + case MLX5_CMD_STAT_NO_RES_ERR: + return "no resources"; + case MLX5_CMD_STAT_BAD_INP_LEN_ERR: + return "bad input length"; + case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: + return "bad output length"; + case MLX5_CMD_STAT_BAD_QP_STATE_ERR: + return "bad QP state"; + case MLX5_CMD_STAT_BAD_PKT_ERR: + return "bad packet (discarded)"; + case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: + return "bad size too many outstanding CQEs"; + default: + return "unknown status"; + } +} + +int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr) +{ + if (!hdr->status) + return 0; + + pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n", + cmd_status_str(hdr->status), hdr->status, + be32_to_cpu(hdr->syndrome)); + + switch (hdr->status) { + case MLX5_CMD_STAT_OK: return 0; + case MLX5_CMD_STAT_INT_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_OP_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_PARAM_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_RES_ERR: return -EINVAL; + case MLX5_CMD_STAT_RES_BUSY: return -EBUSY; + case MLX5_CMD_STAT_LIM_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_RES_STATE_ERR: return -EINVAL; + case MLX5_CMD_STAT_IX_ERR: return -EINVAL; + case MLX5_CMD_STAT_NO_RES_ERR: return -EAGAIN; + case MLX5_CMD_STAT_BAD_INP_LEN_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_QP_STATE_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_PKT_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: return -EINVAL; + default: return -EIO; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c new file mode 100644 index 0000000..c2d660b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/hardirq.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include <rdma/ib_verbs.h> +#include <linux/mlx5/cq.h> +#include "mlx5_core.h" + +void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn) +{ + struct mlx5_core_cq *cq; + struct mlx5_cq_table *table = &dev->priv.cq_table; + + spin_lock(&table->lock); + cq = radix_tree_lookup(&table->tree, cqn); + if (likely(cq)) + atomic_inc(&cq->refcount); + spin_unlock(&table->lock); + + if (!cq) { + mlx5_core_warn(dev, "Completion event for bogus CQ 0x%x\n", cqn); + return; + } + + ++cq->arm_sn; + + cq->comp(cq); + + if (atomic_dec_and_test(&cq->refcount)) + complete(&cq->free); +} + +void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type) +{ + struct mlx5_cq_table *table = &dev->priv.cq_table; + struct mlx5_core_cq *cq; + + spin_lock(&table->lock); + + cq = radix_tree_lookup(&table->tree, cqn); + if (cq) + atomic_inc(&cq->refcount); + + spin_unlock(&table->lock); + + if (!cq) { + mlx5_core_warn(dev, "Async event for bogus CQ 0x%x\n", cqn); + return; + } + + cq->event(cq, event_type); + + if (atomic_dec_and_test(&cq->refcount)) + complete(&cq->free); +} + + +int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + struct mlx5_create_cq_mbox_in *in, int inlen) +{ + int err; + struct mlx5_cq_table *table = &dev->priv.cq_table; + struct mlx5_create_cq_mbox_out out; + struct mlx5_destroy_cq_mbox_in din; + struct mlx5_destroy_cq_mbox_out dout; + + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ); + memset(&out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + cq->cqn = be32_to_cpu(out.cqn) & 0xffffff; + cq->cons_index = 0; + cq->arm_sn = 0; + atomic_set(&cq->refcount, 1); + init_completion(&cq->free); + + spin_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, cq->cqn, cq); + spin_unlock_irq(&table->lock); + if (err) + goto err_cmd; + + cq->pid = current->pid; + err = mlx5_debug_cq_add(dev, cq); + if (err) + mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n", + cq->cqn); + + return 0; + +err_cmd: + memset(&din, 0, sizeof(din)); + memset(&dout, 0, sizeof(dout)); + din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ); + mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout)); + return err; +} +EXPORT_SYMBOL(mlx5_core_create_cq); + +int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) +{ + struct mlx5_cq_table *table = &dev->priv.cq_table; + struct mlx5_destroy_cq_mbox_in in; + struct mlx5_destroy_cq_mbox_out out; + struct mlx5_core_cq *tmp; + int err; + + spin_lock_irq(&table->lock); + tmp = radix_tree_delete(&table->tree, cq->cqn); + spin_unlock_irq(&table->lock); + if (!tmp) { + mlx5_core_warn(dev, "cq 0x%x not found in tree\n", cq->cqn); + return -EINVAL; + } + if (tmp != cq) { + mlx5_core_warn(dev, "corruption on srqn 0x%x\n", cq->cqn); + return -EINVAL; + } + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ); + in.cqn = cpu_to_be32(cq->cqn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + synchronize_irq(cq->irqn); + + mlx5_debug_cq_remove(dev, cq); + if (atomic_dec_and_test(&cq->refcount)) + complete(&cq->free); + wait_for_completion(&cq->free); + + return 0; +} +EXPORT_SYMBOL(mlx5_core_destroy_cq); + +int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + struct mlx5_query_cq_mbox_out *out) +{ + struct mlx5_query_cq_mbox_in in; + int err; + + memset(&in, 0, sizeof(in)); + memset(out, 0, sizeof(*out)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_CQ); + in.cqn = cpu_to_be32(cq->cqn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out)); + if (err) + return err; + + if (out->hdr.status) + return mlx5_cmd_status_to_err(&out->hdr); + + return err; +} +EXPORT_SYMBOL(mlx5_core_query_cq); + + +int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + int type, struct mlx5_cq_modify_params *params) +{ + return -ENOSYS; +} + +int mlx5_init_cq_table(struct mlx5_core_dev *dev) +{ + struct mlx5_cq_table *table = &dev->priv.cq_table; + int err; + + spin_lock_init(&table->lock); + INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + err = mlx5_cq_debugfs_init(dev); + + return err; +} + +void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev) +{ + mlx5_cq_debugfs_cleanup(dev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c new file mode 100644 index 0000000..4273c06 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -0,0 +1,583 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/debugfs.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +enum { + QP_PID, + QP_STATE, + QP_XPORT, + QP_MTU, + QP_N_RECV, + QP_RECV_SZ, + QP_N_SEND, + QP_LOG_PG_SZ, + QP_RQPN, +}; + +static char *qp_fields[] = { + [QP_PID] = "pid", + [QP_STATE] = "state", + [QP_XPORT] = "transport", + [QP_MTU] = "mtu", + [QP_N_RECV] = "num_recv", + [QP_RECV_SZ] = "rcv_wqe_sz", + [QP_N_SEND] = "num_send", + [QP_LOG_PG_SZ] = "log2_page_sz", + [QP_RQPN] = "remote_qpn", +}; + +enum { + EQ_NUM_EQES, + EQ_INTR, + EQ_LOG_PG_SZ, +}; + +static char *eq_fields[] = { + [EQ_NUM_EQES] = "num_eqes", + [EQ_INTR] = "intr", + [EQ_LOG_PG_SZ] = "log_page_size", +}; + +enum { + CQ_PID, + CQ_NUM_CQES, + CQ_LOG_PG_SZ, +}; + +static char *cq_fields[] = { + [CQ_PID] = "pid", + [CQ_NUM_CQES] = "num_cqes", + [CQ_LOG_PG_SZ] = "log_page_size", +}; + +struct dentry *mlx5_debugfs_root; +EXPORT_SYMBOL(mlx5_debugfs_root); + +void mlx5_register_debugfs(void) +{ + mlx5_debugfs_root = debugfs_create_dir("mlx5", NULL); + if (IS_ERR_OR_NULL(mlx5_debugfs_root)) + mlx5_debugfs_root = NULL; +} + +void mlx5_unregister_debugfs(void) +{ + debugfs_remove(mlx5_debugfs_root); +} + +int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return 0; + + atomic_set(&dev->num_qps, 0); + + dev->priv.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg_root); + if (!dev->priv.qp_debugfs) + return -ENOMEM; + + return 0; +} + +void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->priv.qp_debugfs); +} + +int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return 0; + + dev->priv.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg_root); + if (!dev->priv.eq_debugfs) + return -ENOMEM; + + return 0; +} + +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->priv.eq_debugfs); +} + +static ssize_t average_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_cmd_stats *stats; + u64 field = 0; + int ret; + char tbuf[22]; + + if (*pos) + return 0; + + stats = filp->private_data; + spin_lock(&stats->lock); + if (stats->n) + field = stats->sum / stats->n; + spin_unlock(&stats->lock); + ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field); + if (ret > 0) { + if (copy_to_user(buf, tbuf, ret)) + return -EFAULT; + } + + *pos += ret; + return ret; +} + + +static ssize_t average_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_cmd_stats *stats; + + stats = filp->private_data; + spin_lock(&stats->lock); + stats->sum = 0; + stats->n = 0; + spin_unlock(&stats->lock); + + *pos += count; + + return count; +} + +static const struct file_operations stats_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = average_read, + .write = average_write, +}; + +int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_stats *stats; + struct dentry **cmd; + const char *namep; + int err; + int i; + + if (!mlx5_debugfs_root) + return 0; + + cmd = &dev->priv.cmdif_debugfs; + *cmd = debugfs_create_dir("commands", dev->priv.dbg_root); + if (!*cmd) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(dev->cmd.stats); i++) { + stats = &dev->cmd.stats[i]; + namep = mlx5_command_str(i); + if (strcmp(namep, "unknown command opcode")) { + stats->root = debugfs_create_dir(namep, *cmd); + if (!stats->root) { + mlx5_core_warn(dev, "failed adding command %d\n", + i); + err = -ENOMEM; + goto out; + } + + stats->avg = debugfs_create_file("average", 0400, + stats->root, stats, + &stats_fops); + if (!stats->avg) { + mlx5_core_warn(dev, "failed creating debugfs file\n"); + err = -ENOMEM; + goto out; + } + + stats->count = debugfs_create_u64("n", 0400, + stats->root, + &stats->n); + if (!stats->count) { + mlx5_core_warn(dev, "failed creating debugfs file\n"); + err = -ENOMEM; + goto out; + } + } + } + + return 0; +out: + debugfs_remove_recursive(dev->priv.cmdif_debugfs); + return err; +} + +void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->priv.cmdif_debugfs); +} + +int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return 0; + + dev->priv.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg_root); + if (!dev->priv.cq_debugfs) + return -ENOMEM; + + return 0; +} + +void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->priv.cq_debugfs); +} + +static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, + int index) +{ + struct mlx5_query_qp_mbox_out *out; + struct mlx5_qp_context *ctx; + u64 param = 0; + int err; + int no_sq; + + out = kzalloc(sizeof(*out), GFP_KERNEL); + if (!out) + return param; + + err = mlx5_core_qp_query(dev, qp, out, sizeof(*out)); + if (err) { + mlx5_core_warn(dev, "failed to query qp\n"); + goto out; + } + + ctx = &out->ctx; + switch (index) { + case QP_PID: + param = qp->pid; + break; + case QP_STATE: + param = be32_to_cpu(ctx->flags) >> 28; + break; + case QP_XPORT: + param = (be32_to_cpu(ctx->flags) >> 16) & 0xff; + break; + case QP_MTU: + param = ctx->mtu_msgmax >> 5; + break; + case QP_N_RECV: + param = 1 << ((ctx->rq_size_stride >> 3) & 0xf); + break; + case QP_RECV_SZ: + param = 1 << ((ctx->rq_size_stride & 7) + 4); + break; + case QP_N_SEND: + no_sq = be16_to_cpu(ctx->sq_crq_size) >> 15; + if (!no_sq) + param = 1 << (be16_to_cpu(ctx->sq_crq_size) >> 11); + else + param = 0; + break; + case QP_LOG_PG_SZ: + param = (be32_to_cpu(ctx->log_pg_sz_remote_qpn) >> 24) & 0x1f; + param += 12; + break; + case QP_RQPN: + param = be32_to_cpu(ctx->log_pg_sz_remote_qpn) & 0xffffff; + break; + } + +out: + kfree(out); + return param; +} + +static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + int index) +{ + struct mlx5_query_eq_mbox_out *out; + struct mlx5_eq_context *ctx; + u64 param = 0; + int err; + + out = kzalloc(sizeof(*out), GFP_KERNEL); + if (!out) + return param; + + ctx = &out->ctx; + + err = mlx5_core_eq_query(dev, eq, out, sizeof(*out)); + if (err) { + mlx5_core_warn(dev, "failed to query eq\n"); + goto out; + } + + switch (index) { + case EQ_NUM_EQES: + param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f); + break; + case EQ_INTR: + param = ctx->intr; + break; + case EQ_LOG_PG_SZ: + param = (ctx->log_page_size & 0x1f) + 12; + break; + } + +out: + kfree(out); + return param; +} + +static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + int index) +{ + struct mlx5_query_cq_mbox_out *out; + struct mlx5_cq_context *ctx; + u64 param = 0; + int err; + + out = kzalloc(sizeof(*out), GFP_KERNEL); + if (!out) + return param; + + ctx = &out->ctx; + + err = mlx5_core_query_cq(dev, cq, out); + if (err) { + mlx5_core_warn(dev, "failed to query cq\n"); + goto out; + } + + switch (index) { + case CQ_PID: + param = cq->pid; + break; + case CQ_NUM_CQES: + param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f); + break; + case CQ_LOG_PG_SZ: + param = (ctx->log_pg_sz & 0x1f) + 12; + break; + } + +out: + kfree(out); + return param; +} + +static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_field_desc *desc; + struct mlx5_rsc_debug *d; + char tbuf[18]; + u64 field; + int ret; + + if (*pos) + return 0; + + desc = filp->private_data; + d = (void *)(desc - desc->i) - sizeof(*d); + switch (d->type) { + case MLX5_DBG_RSC_QP: + field = qp_read_field(d->dev, d->object, desc->i); + break; + + case MLX5_DBG_RSC_EQ: + field = eq_read_field(d->dev, d->object, desc->i); + break; + + case MLX5_DBG_RSC_CQ: + field = cq_read_field(d->dev, d->object, desc->i); + break; + + default: + mlx5_core_warn(d->dev, "invalid resource type %d\n", d->type); + return -EINVAL; + } + + ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field); + if (ret > 0) { + if (copy_to_user(buf, tbuf, ret)) + return -EFAULT; + } + + *pos += ret; + return ret; +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = dbg_read, +}; + +static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type, + struct dentry *root, struct mlx5_rsc_debug **dbg, + int rsn, char **field, int nfile, void *data) +{ + struct mlx5_rsc_debug *d; + char resn[32]; + int err; + int i; + + d = kzalloc(sizeof(*d) + nfile * sizeof(d->fields[0]), GFP_KERNEL); + if (!d) + return -ENOMEM; + + d->dev = dev; + d->object = data; + d->type = type; + sprintf(resn, "0x%x", rsn); + d->root = debugfs_create_dir(resn, root); + if (!d->root) { + err = -ENOMEM; + goto out_free; + } + + for (i = 0; i < nfile; i++) { + d->fields[i].i = i; + d->fields[i].dent = debugfs_create_file(field[i], 0400, + d->root, &d->fields[i], + &fops); + if (!d->fields[i].dent) { + err = -ENOMEM; + goto out_rem; + } + } + *dbg = d; + + return 0; +out_rem: + debugfs_remove_recursive(d->root); + +out_free: + kfree(d); + return err; +} + +static void rem_res_tree(struct mlx5_rsc_debug *d) +{ + debugfs_remove_recursive(d->root); + kfree(d); +} + +int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) +{ + int err; + + if (!mlx5_debugfs_root) + return 0; + + err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.qp_debugfs, + &qp->dbg, qp->qpn, qp_fields, + ARRAY_SIZE(qp_fields), qp); + if (err) + qp->dbg = NULL; + + return err; +} + +void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) +{ + if (!mlx5_debugfs_root) + return; + + if (qp->dbg) + rem_res_tree(qp->dbg); +} + + +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + if (!mlx5_debugfs_root) + return 0; + + err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.eq_debugfs, + &eq->dbg, eq->eqn, eq_fields, + ARRAY_SIZE(eq_fields), eq); + if (err) + eq->dbg = NULL; + + return err; +} + +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + if (!mlx5_debugfs_root) + return; + + if (eq->dbg) + rem_res_tree(eq->dbg); +} + +int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) +{ + int err; + + if (!mlx5_debugfs_root) + return 0; + + err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.cq_debugfs, + &cq->dbg, cq->cqn, cq_fields, + ARRAY_SIZE(cq_fields), cq); + if (err) + cq->dbg = NULL; + + return err; +} + +void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) +{ + if (!mlx5_debugfs_root) + return; + + if (cq->dbg) + rem_res_tree(cq->dbg); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c new file mode 100644 index 0000000..c02cbcf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -0,0 +1,521 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +enum { + MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), + MLX5_EQE_OWNER_INIT_VAL = 0x1, +}; + +enum { + MLX5_EQ_STATE_ARMED = 0x9, + MLX5_EQ_STATE_FIRED = 0xa, + MLX5_EQ_STATE_ALWAYS_ARMED = 0xb, +}; + +enum { + MLX5_NUM_SPARE_EQE = 0x80, + MLX5_NUM_ASYNC_EQE = 0x100, + MLX5_NUM_CMD_EQE = 32, +}; + +enum { + MLX5_EQ_DOORBEL_OFFSET = 0x40, +}; + +#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ + (1ull << MLX5_EVENT_TYPE_COMM_EST) | \ + (1ull << MLX5_EVENT_TYPE_SQ_DRAINED) | \ + (1ull << MLX5_EVENT_TYPE_CQ_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED) | \ + (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_PORT_CHANGE) | \ + (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ + (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) + +struct map_eq_in { + u64 mask; + u32 reserved; + u32 unmap_eqn; +}; + +struct cre_des_eq { + u8 reserved[15]; + u8 eqn; +}; + +static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) +{ + struct mlx5_destroy_eq_mbox_in in; + struct mlx5_destroy_eq_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_EQ); + in.eqn = eqn; + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (!err) + goto ex; + + if (out.hdr.status) + err = mlx5_cmd_status_to_err(&out.hdr); + +ex: + return err; +} + +static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) +{ + return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE); +} + +static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq) +{ + struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1)); + + return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe; +} + +static const char *eqe_type_str(u8 type) +{ + switch (type) { + case MLX5_EVENT_TYPE_COMP: + return "MLX5_EVENT_TYPE_COMP"; + case MLX5_EVENT_TYPE_PATH_MIG: + return "MLX5_EVENT_TYPE_PATH_MIG"; + case MLX5_EVENT_TYPE_COMM_EST: + return "MLX5_EVENT_TYPE_COMM_EST"; + case MLX5_EVENT_TYPE_SQ_DRAINED: + return "MLX5_EVENT_TYPE_SQ_DRAINED"; + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; + case MLX5_EVENT_TYPE_CQ_ERROR: + return "MLX5_EVENT_TYPE_CQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_INTERNAL_ERROR: + return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; + case MLX5_EVENT_TYPE_PORT_CHANGE: + return "MLX5_EVENT_TYPE_PORT_CHANGE"; + case MLX5_EVENT_TYPE_GPIO_EVENT: + return "MLX5_EVENT_TYPE_GPIO_EVENT"; + case MLX5_EVENT_TYPE_REMOTE_CONFIG: + return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; + case MLX5_EVENT_TYPE_DB_BF_CONGESTION: + return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; + case MLX5_EVENT_TYPE_STALL_EVENT: + return "MLX5_EVENT_TYPE_STALL_EVENT"; + case MLX5_EVENT_TYPE_CMD: + return "MLX5_EVENT_TYPE_CMD"; + case MLX5_EVENT_TYPE_PAGE_REQUEST: + return "MLX5_EVENT_TYPE_PAGE_REQUEST"; + default: + return "Unrecognized event"; + } +} + +static enum mlx5_dev_event port_subtype_event(u8 subtype) +{ + switch (subtype) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + return MLX5_DEV_EVENT_PORT_DOWN; + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + return MLX5_DEV_EVENT_PORT_UP; + case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: + return MLX5_DEV_EVENT_PORT_INITIALIZED; + case MLX5_PORT_CHANGE_SUBTYPE_LID: + return MLX5_DEV_EVENT_LID_CHANGE; + case MLX5_PORT_CHANGE_SUBTYPE_PKEY: + return MLX5_DEV_EVENT_PKEY_CHANGE; + case MLX5_PORT_CHANGE_SUBTYPE_GUID: + return MLX5_DEV_EVENT_GUID_CHANGE; + case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: + return MLX5_DEV_EVENT_CLIENT_REREG; + } + return -1; +} + +static void eq_update_ci(struct mlx5_eq *eq, int arm) +{ + __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); + u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + __raw_writel((__force u32) cpu_to_be32(val), addr); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} + +static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + struct mlx5_eqe *eqe; + int eqes_found = 0; + int set_ci = 0; + u32 cqn; + u32 srqn; + u8 port; + + while ((eqe = next_eqe_sw(eq))) { + /* + * Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + rmb(); + + mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", eq->eqn, eqe_type_str(eqe->type)); + switch (eqe->type) { + case MLX5_EVENT_TYPE_COMP: + cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; + mlx5_cq_completion(dev, cqn); + break; + + case MLX5_EVENT_TYPE_PATH_MIG: + case MLX5_EVENT_TYPE_COMM_EST: + case MLX5_EVENT_TYPE_SQ_DRAINED: + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + mlx5_core_dbg(dev, "event %s(%d) arrived\n", + eqe_type_str(eqe->type), eqe->type); + mlx5_qp_event(dev, be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff, + eqe->type); + break; + + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n", + eqe_type_str(eqe->type), eqe->type, srqn); + mlx5_srq_event(dev, srqn, eqe->type); + break; + + case MLX5_EVENT_TYPE_CMD: + mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector)); + break; + + case MLX5_EVENT_TYPE_PORT_CHANGE: + port = (eqe->data.port.port >> 4) & 0xf; + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + case MLX5_PORT_CHANGE_SUBTYPE_LID: + case MLX5_PORT_CHANGE_SUBTYPE_PKEY: + case MLX5_PORT_CHANGE_SUBTYPE_GUID: + case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: + case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: + dev->event(dev, port_subtype_event(eqe->sub_type), &port); + break; + default: + mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n", + port, eqe->sub_type); + } + break; + case MLX5_EVENT_TYPE_CQ_ERROR: + cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; + mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n", + cqn, eqe->data.cq_err.syndrome); + mlx5_cq_event(dev, cqn, eqe->type); + break; + + case MLX5_EVENT_TYPE_PAGE_REQUEST: + { + u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id); + s16 npages = be16_to_cpu(eqe->data.req_pages.num_pages); + + mlx5_core_dbg(dev, "page request for func 0x%x, napges %d\n", func_id, npages); + mlx5_core_req_pages_handler(dev, func_id, npages); + } + break; + + + default: + mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn); + break; + } + + ++eq->cons_index; + eqes_found = 1; + ++set_ci; + + /* The HCA will think the queue has overflowed if we + * don't tell it we've been processing events. We + * create our EQs with MLX5_NUM_SPARE_EQE extra + * entries, so we must update our consumer index at + * least that often. + */ + if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { + eq_update_ci(eq, 0); + set_ci = 0; + } + } + + eq_update_ci(eq, 1); + + return eqes_found; +} + +static irqreturn_t mlx5_msix_handler(int irq, void *eq_ptr) +{ + struct mlx5_eq *eq = eq_ptr; + struct mlx5_core_dev *dev = eq->dev; + + mlx5_eq_int(dev, eq); + + /* MSI-X vectors always belong to us */ + return IRQ_HANDLED; +} + +static void init_eq_buf(struct mlx5_eq *eq) +{ + struct mlx5_eqe *eqe; + int i; + + for (i = 0; i < eq->nent; i++) { + eqe = get_eqe(eq, i); + eqe->owner = MLX5_EQE_OWNER_INIT_VAL; + } +} + +int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, + int nent, u64 mask, const char *name, struct mlx5_uar *uar) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_create_eq_mbox_in *in; + struct mlx5_create_eq_mbox_out out; + int err; + int inlen; + + eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); + err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, 2 * PAGE_SIZE, + &eq->buf); + if (err) + return err; + + init_eq_buf(eq); + + inlen = sizeof(*in) + sizeof(in->pas[0]) * eq->buf.npages; + in = mlx5_vzalloc(inlen); + if (!in) { + err = -ENOMEM; + goto err_buf; + } + memset(&out, 0, sizeof(out)); + + mlx5_fill_page_array(&eq->buf, in->pas); + + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ); + in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index); + in->ctx.intr = vecidx; + in->ctx.log_page_size = PAGE_SHIFT - 12; + in->events_mask = cpu_to_be64(mask); + + err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + if (err) + goto err_in; + + if (out.hdr.status) { + err = mlx5_cmd_status_to_err(&out.hdr); + goto err_in; + } + + eq->eqn = out.eq_number; + err = request_irq(table->msix_arr[vecidx].vector, mlx5_msix_handler, 0, + name, eq); + if (err) + goto err_eq; + + eq->irqn = vecidx; + eq->dev = dev; + eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET; + + err = mlx5_debug_eq_add(dev, eq); + if (err) + goto err_irq; + + /* EQs are created in ARMED state + */ + eq_update_ci(eq, 1); + + mlx5_vfree(in); + return 0; + +err_irq: + free_irq(table->msix_arr[vecidx].vector, eq); + +err_eq: + mlx5_cmd_destroy_eq(dev, eq->eqn); + +err_in: + mlx5_vfree(in); + +err_buf: + mlx5_buf_free(dev, &eq->buf); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_create_map_eq); + +int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + int err; + + mlx5_debug_eq_remove(dev, eq); + free_irq(table->msix_arr[eq->irqn].vector, eq); + err = mlx5_cmd_destroy_eq(dev, eq->eqn); + if (err) + mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", + eq->eqn); + mlx5_buf_free(dev, &eq->buf); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq); + +int mlx5_eq_init(struct mlx5_core_dev *dev) +{ + int err; + + spin_lock_init(&dev->priv.eq_table.lock); + + err = mlx5_eq_debugfs_init(dev); + + return err; +} + + +void mlx5_eq_cleanup(struct mlx5_core_dev *dev) +{ + mlx5_eq_debugfs_cleanup(dev); +} + +int mlx5_start_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + int err; + + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, + MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, + "mlx5_cmd_eq", &dev->priv.uuari.uars[0]); + if (err) { + mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); + return err; + } + + mlx5_cmd_use_events(dev); + + err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC, + MLX5_NUM_ASYNC_EQE, MLX5_ASYNC_EVENT_MASK, + "mlx5_async_eq", &dev->priv.uuari.uars[0]); + if (err) { + mlx5_core_warn(dev, "failed to create async EQ %d\n", err); + goto err1; + } + + err = mlx5_create_map_eq(dev, &table->pages_eq, + MLX5_EQ_VEC_PAGES, + dev->caps.max_vf + 1, + 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", + &dev->priv.uuari.uars[0]); + if (err) { + mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); + goto err2; + } + + return err; + +err2: + mlx5_destroy_unmap_eq(dev, &table->async_eq); + +err1: + mlx5_cmd_use_polling(dev); + mlx5_destroy_unmap_eq(dev, &table->cmd_eq); + return err; +} + +int mlx5_stop_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + int err; + + err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); + if (err) + return err; + + mlx5_destroy_unmap_eq(dev, &table->async_eq); + mlx5_cmd_use_polling(dev); + + err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq); + if (err) + mlx5_cmd_use_events(dev); + + return err; +} + +int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct mlx5_query_eq_mbox_out *out, int outlen) +{ + struct mlx5_query_eq_mbox_in in; + int err; + + memset(&in, 0, sizeof(in)); + memset(out, 0, outlen); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_EQ); + in.eqn = eq->eqn; + err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); + if (err) + return err; + + if (out->hdr.status) + err = mlx5_cmd_status_to_err(&out->hdr); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_eq_query); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c new file mode 100644 index 0000000..72a5222 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include <linux/module.h> +#include "mlx5_core.h" + +int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_query_adapter_mbox_out *out; + struct mlx5_cmd_query_adapter_mbox_in in; + int err; + + out = kzalloc(sizeof(*out), GFP_KERNEL); + if (!out) + return -ENOMEM; + + memset(&in, 0, sizeof(in)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_ADAPTER); + err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out)); + if (err) + goto out_out; + + if (out->hdr.status) { + err = mlx5_cmd_status_to_err(&out->hdr); + goto out_out; + } + + memcpy(dev->board_id, out->vsd_psid, sizeof(out->vsd_psid)); + +out_out: + kfree(out); + + return err; +} + +int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, + struct mlx5_caps *caps) +{ + struct mlx5_cmd_query_hca_cap_mbox_out *out; + struct mlx5_cmd_query_hca_cap_mbox_in in; + struct mlx5_query_special_ctxs_mbox_out ctx_out; + struct mlx5_query_special_ctxs_mbox_in ctx_in; + int err; + u16 t16; + + out = kzalloc(sizeof(*out), GFP_KERNEL); + if (!out) + return -ENOMEM; + + memset(&in, 0, sizeof(in)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP); + in.hdr.opmod = cpu_to_be16(0x1); + err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out)); + if (err) + goto out_out; + + if (out->hdr.status) { + err = mlx5_cmd_status_to_err(&out->hdr); + goto out_out; + } + + + caps->log_max_eq = out->hca_cap.log_max_eq & 0xf; + caps->max_cqes = 1 << out->hca_cap.log_max_cq_sz; + caps->max_wqes = 1 << out->hca_cap.log_max_qp_sz; + caps->max_sq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_sq); + caps->max_rq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_rq); + caps->flags = be64_to_cpu(out->hca_cap.flags); + caps->stat_rate_support = be16_to_cpu(out->hca_cap.stat_rate_support); + caps->log_max_msg = out->hca_cap.log_max_msg & 0x1f; + caps->num_ports = out->hca_cap.num_ports & 0xf; + caps->log_max_cq = out->hca_cap.log_max_cq & 0x1f; + if (caps->num_ports > MLX5_MAX_PORTS) { + mlx5_core_err(dev, "device has %d ports while the driver supports max %d ports\n", + caps->num_ports, MLX5_MAX_PORTS); + err = -EINVAL; + goto out_out; + } + caps->log_max_qp = out->hca_cap.log_max_qp & 0x1f; + caps->log_max_mkey = out->hca_cap.log_max_mkey & 0x3f; + caps->log_max_pd = out->hca_cap.log_max_pd & 0x1f; + caps->log_max_srq = out->hca_cap.log_max_srqs & 0x1f; + caps->local_ca_ack_delay = out->hca_cap.local_ca_ack_delay & 0x1f; + caps->log_max_mcg = out->hca_cap.log_max_mcg; + caps->max_qp_mcg = be16_to_cpu(out->hca_cap.max_qp_mcg); + caps->max_ra_res_qp = 1 << (out->hca_cap.log_max_ra_res_qp & 0x3f); + caps->max_ra_req_qp = 1 << (out->hca_cap.log_max_ra_req_qp & 0x3f); + caps->max_srq_wqes = 1 << out->hca_cap.log_max_srq_sz; + t16 = be16_to_cpu(out->hca_cap.bf_log_bf_reg_size); + if (t16 & 0x8000) { + caps->bf_reg_size = 1 << (t16 & 0x1f); + caps->bf_regs_per_page = MLX5_BF_REGS_PER_PAGE; + } else { + caps->bf_reg_size = 0; + caps->bf_regs_per_page = 0; + } + caps->min_page_sz = ~(u32)((1 << out->hca_cap.log_pg_sz) - 1); + + memset(&ctx_in, 0, sizeof(ctx_in)); + memset(&ctx_out, 0, sizeof(ctx_out)); + ctx_in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); + err = mlx5_cmd_exec(dev, &ctx_in, sizeof(ctx_in), + &ctx_out, sizeof(ctx_out)); + if (err) + goto out_out; + + if (ctx_out.hdr.status) + err = mlx5_cmd_status_to_err(&ctx_out.hdr); + + caps->reserved_lkey = be32_to_cpu(ctx_out.reserved_lkey); + +out_out: + kfree(out); + + return err; +} + +int mlx5_cmd_init_hca(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_init_hca_mbox_in in; + struct mlx5_cmd_init_hca_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_INIT_HCA); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + err = mlx5_cmd_status_to_err(&out.hdr); + + return err; +} + +int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_teardown_hca_mbox_in in; + struct mlx5_cmd_teardown_hca_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_TEARDOWN_HCA); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + err = mlx5_cmd_status_to_err(&out.hdr); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c new file mode 100644 index 0000000..748f10a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/vmalloc.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +enum { + MLX5_HEALTH_POLL_INTERVAL = 2 * HZ, + MAX_MISSES = 3, +}; + +enum { + MLX5_HEALTH_SYNDR_FW_ERR = 0x1, + MLX5_HEALTH_SYNDR_IRISC_ERR = 0x7, + MLX5_HEALTH_SYNDR_CRC_ERR = 0x9, + MLX5_HEALTH_SYNDR_FETCH_PCI_ERR = 0xa, + MLX5_HEALTH_SYNDR_HW_FTL_ERR = 0xb, + MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR = 0xc, + MLX5_HEALTH_SYNDR_EQ_ERR = 0xd, + MLX5_HEALTH_SYNDR_FFSER_ERR = 0xf, +}; + +static DEFINE_SPINLOCK(health_lock); + +static LIST_HEAD(health_list); +static struct work_struct health_work; + +static health_handler_t reg_handler; +int mlx5_register_health_report_handler(health_handler_t handler) +{ + spin_lock_irq(&health_lock); + if (reg_handler) { + spin_unlock_irq(&health_lock); + return -EEXIST; + } + reg_handler = handler; + spin_unlock_irq(&health_lock); + + return 0; +} +EXPORT_SYMBOL(mlx5_register_health_report_handler); + +void mlx5_unregister_health_report_handler(void) +{ + spin_lock_irq(&health_lock); + reg_handler = NULL; + spin_unlock_irq(&health_lock); +} +EXPORT_SYMBOL(mlx5_unregister_health_report_handler); + +static void health_care(struct work_struct *work) +{ + struct mlx5_core_health *health, *n; + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + LIST_HEAD(tlist); + + spin_lock_irq(&health_lock); + list_splice_init(&health_list, &tlist); + + spin_unlock_irq(&health_lock); + + list_for_each_entry_safe(health, n, &tlist, list) { + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + mlx5_core_warn(dev, "handling bad device here\n"); + spin_lock_irq(&health_lock); + if (reg_handler) + reg_handler(dev->pdev, health->health, + sizeof(health->health)); + + list_del_init(&health->list); + spin_unlock_irq(&health_lock); + } +} + +static const char *hsynd_str(u8 synd) +{ + switch (synd) { + case MLX5_HEALTH_SYNDR_FW_ERR: + return "firmware internal error"; + case MLX5_HEALTH_SYNDR_IRISC_ERR: + return "irisc not responding"; + case MLX5_HEALTH_SYNDR_CRC_ERR: + return "firmware CRC error"; + case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR: + return "ICM fetch PCI error"; + case MLX5_HEALTH_SYNDR_HW_FTL_ERR: + return "HW fatal error\n"; + case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR: + return "async EQ buffer overrun"; + case MLX5_HEALTH_SYNDR_EQ_ERR: + return "EQ error"; + case MLX5_HEALTH_SYNDR_FFSER_ERR: + return "FFSER error"; + default: + return "unrecognized error"; + } +} + +static u16 read_be16(__be16 __iomem *p) +{ + return swab16(readl((__force u16 __iomem *) p)); +} + +static u32 read_be32(__be32 __iomem *p) +{ + return swab32(readl((__force u32 __iomem *) p)); +} + +static void print_health_info(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + int i; + + for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) + pr_info("assert_var[%d] 0x%08x\n", i, read_be32(h->assert_var + i)); + + pr_info("assert_exit_ptr 0x%08x\n", read_be32(&h->assert_exit_ptr)); + pr_info("assert_callra 0x%08x\n", read_be32(&h->assert_callra)); + pr_info("fw_ver 0x%08x\n", read_be32(&h->fw_ver)); + pr_info("hw_id 0x%08x\n", read_be32(&h->hw_id)); + pr_info("irisc_index %d\n", readb(&h->irisc_index)); + pr_info("synd 0x%x: %s\n", readb(&h->synd), hsynd_str(readb(&h->synd))); + pr_info("ext_sync 0x%04x\n", read_be16(&h->ext_sync)); +} + +static void poll_health(unsigned long data) +{ + struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; + struct mlx5_core_health *health = &dev->priv.health; + unsigned long next; + u32 count; + + count = ioread32be(health->health_counter); + if (count == health->prev) + ++health->miss_counter; + else + health->miss_counter = 0; + + health->prev = count; + if (health->miss_counter == MAX_MISSES) { + mlx5_core_err(dev, "device's health compromised\n"); + print_health_info(dev); + spin_lock_irq(&health_lock); + list_add_tail(&health->list, &health_list); + spin_unlock_irq(&health_lock); + + queue_work(mlx5_core_wq, &health_work); + } else { + get_random_bytes(&next, sizeof(next)); + next %= HZ; + next += jiffies + MLX5_HEALTH_POLL_INTERVAL; + mod_timer(&health->timer, next); + } +} + +void mlx5_start_health_poll(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + INIT_LIST_HEAD(&health->list); + init_timer(&health->timer); + health->health = &dev->iseg->health; + health->health_counter = &dev->iseg->health_counter; + + health->timer.data = (unsigned long)dev; + health->timer.function = poll_health; + health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL); + add_timer(&health->timer); +} + +void mlx5_stop_health_poll(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + del_timer_sync(&health->timer); + + spin_lock_irq(&health_lock); + if (!list_empty(&health->list)) + list_del_init(&health->list); + spin_unlock_irq(&health_lock); +} + +void mlx5_health_cleanup(void) +{ +} + +void __init mlx5_health_init(void) +{ + INIT_WORK(&health_work, health_care); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c new file mode 100644 index 0000000..18d6fd5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb, + u16 opmod, int port) +{ + struct mlx5_mad_ifc_mbox_in *in = NULL; + struct mlx5_mad_ifc_mbox_out *out = NULL; + int err; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + out = kzalloc(sizeof(*out), GFP_KERNEL); + if (!out) { + err = -ENOMEM; + goto out; + } + + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MAD_IFC); + in->hdr.opmod = cpu_to_be16(opmod); + in->port = port; + + memcpy(in->data, inb, sizeof(in->data)); + + err = mlx5_cmd_exec(dev, in, sizeof(*in), out, sizeof(*out)); + if (err) + goto out; + + if (out->hdr.status) { + err = mlx5_cmd_status_to_err(&out->hdr); + goto out; + } + + memcpy(outb, out->data, sizeof(out->data)); + +out: + kfree(out); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c new file mode 100644 index 0000000..12242de --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -0,0 +1,475 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <asm-generic/kmap_types.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/io-mapping.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/srq.h> +#include <linux/debugfs.h> +#include "mlx5_core.h" + +#define DRIVER_NAME "mlx5_core" +#define DRIVER_VERSION "1.0" +#define DRIVER_RELDATE "June 2013" + +MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox ConnectX-IB HCA core library"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRIVER_VERSION); + +int mlx5_core_debug_mask; +module_param_named(debug_mask, mlx5_core_debug_mask, int, 0644); +MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0"); + +struct workqueue_struct *mlx5_core_wq; + +static int set_dma_caps(struct pci_dev *pdev) +{ + int err; + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) { + dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); + return err; + } + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) { + dev_warn(&pdev->dev, + "Warning: couldn't set 64-bit consistent PCI DMA mask.\n"); + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, + "Can't set consistent PCI DMA mask, aborting.\n"); + return err; + } + } + + dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024); + return err; +} + +static int request_bar(struct pci_dev *pdev) +{ + int err = 0; + + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { + dev_err(&pdev->dev, "Missing registers BAR, aborting.\n"); + return -ENODEV; + } + + err = pci_request_regions(pdev, DRIVER_NAME); + if (err) + dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); + + return err; +} + +static void release_bar(struct pci_dev *pdev) +{ + pci_release_regions(pdev); +} + +static int mlx5_enable_msix(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + int num_eqs = 1 << dev->caps.log_max_eq; + int nvec; + int err; + int i; + + nvec = dev->caps.num_ports * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE; + nvec = min_t(int, nvec, num_eqs); + if (nvec <= MLX5_EQ_VEC_COMP_BASE) + return -ENOMEM; + + table->msix_arr = kzalloc(nvec * sizeof(*table->msix_arr), GFP_KERNEL); + if (!table->msix_arr) + return -ENOMEM; + + for (i = 0; i < nvec; i++) + table->msix_arr[i].entry = i; + +retry: + table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; + err = pci_enable_msix(dev->pdev, table->msix_arr, nvec); + if (err <= 0) { + return err; + } else if (err > 2) { + nvec = err; + goto retry; + } + + mlx5_core_dbg(dev, "received %d MSI vectors out of %d requested\n", err, nvec); + + return 0; +} + +static void mlx5_disable_msix(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + + pci_disable_msix(dev->pdev); + kfree(table->msix_arr); +} + +struct mlx5_reg_host_endianess { + u8 he; + u8 rsvd[15]; +}; + +static int handle_hca_cap(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd_query_hca_cap_mbox_out *query_out = NULL; + struct mlx5_cmd_set_hca_cap_mbox_in *set_ctx = NULL; + struct mlx5_cmd_query_hca_cap_mbox_in query_ctx; + struct mlx5_cmd_set_hca_cap_mbox_out set_out; + struct mlx5_profile *prof = dev->profile; + u64 flags; + int csum = 1; + int err; + + memset(&query_ctx, 0, sizeof(query_ctx)); + query_out = kzalloc(sizeof(*query_out), GFP_KERNEL); + if (!query_out) + return -ENOMEM; + + set_ctx = kzalloc(sizeof(*set_ctx), GFP_KERNEL); + if (!set_ctx) { + err = -ENOMEM; + goto query_ex; + } + + query_ctx.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP); + query_ctx.hdr.opmod = cpu_to_be16(0x1); + err = mlx5_cmd_exec(dev, &query_ctx, sizeof(query_ctx), + query_out, sizeof(*query_out)); + if (err) + goto query_ex; + + err = mlx5_cmd_status_to_err(&query_out->hdr); + if (err) { + mlx5_core_warn(dev, "query hca cap failed, %d\n", err); + goto query_ex; + } + + memcpy(&set_ctx->hca_cap, &query_out->hca_cap, + sizeof(set_ctx->hca_cap)); + + if (prof->mask & MLX5_PROF_MASK_CMDIF_CSUM) { + csum = !!prof->cmdif_csum; + flags = be64_to_cpu(set_ctx->hca_cap.flags); + if (csum) + flags |= MLX5_DEV_CAP_FLAG_CMDIF_CSUM; + else + flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM; + + set_ctx->hca_cap.flags = cpu_to_be64(flags); + } + + if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE) + set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp; + + memset(&set_out, 0, sizeof(set_out)); + set_ctx->hca_cap.log_uar_page_sz = cpu_to_be16(PAGE_SHIFT - 12); + set_ctx->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_SET_HCA_CAP); + err = mlx5_cmd_exec(dev, set_ctx, sizeof(*set_ctx), + &set_out, sizeof(set_out)); + if (err) { + mlx5_core_warn(dev, "set hca cap failed, %d\n", err); + goto query_ex; + } + + err = mlx5_cmd_status_to_err(&set_out.hdr); + if (err) + goto query_ex; + + if (!csum) + dev->cmd.checksum_disabled = 1; + +query_ex: + kfree(query_out); + kfree(set_ctx); + + return err; +} + +static int set_hca_ctrl(struct mlx5_core_dev *dev) +{ + struct mlx5_reg_host_endianess he_in; + struct mlx5_reg_host_endianess he_out; + int err; + + memset(&he_in, 0, sizeof(he_in)); + he_in.he = MLX5_SET_HOST_ENDIANNESS; + err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in), + &he_out, sizeof(he_out), + MLX5_REG_HOST_ENDIANNESS, 0, 1); + return err; +} + +int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) +{ + struct mlx5_priv *priv = &dev->priv; + int err; + + dev->pdev = pdev; + pci_set_drvdata(dev->pdev, dev); + strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN); + priv->name[MLX5_MAX_NAME_LEN - 1] = 0; + + mutex_init(&priv->pgdir_mutex); + INIT_LIST_HEAD(&priv->pgdir_list); + spin_lock_init(&priv->mkey_lock); + + priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root); + if (!priv->dbg_root) + return -ENOMEM; + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "Cannot enable PCI device, aborting.\n"); + goto err_dbg; + } + + err = request_bar(pdev); + if (err) { + dev_err(&pdev->dev, "error requesting BARs, aborting.\n"); + goto err_disable; + } + + pci_set_master(pdev); + + err = set_dma_caps(pdev); + if (err) { + dev_err(&pdev->dev, "Failed setting DMA capabilities mask, aborting\n"); + goto err_clr_master; + } + + dev->iseg_base = pci_resource_start(dev->pdev, 0); + dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg)); + if (!dev->iseg) { + err = -ENOMEM; + dev_err(&pdev->dev, "Failed mapping initialization segment, aborting\n"); + goto err_clr_master; + } + dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev), + fw_rev_min(dev), fw_rev_sub(dev)); + + err = mlx5_cmd_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed initializing command interface, aborting\n"); + goto err_unmap; + } + + mlx5_pagealloc_init(dev); + err = set_hca_ctrl(dev); + if (err) { + dev_err(&pdev->dev, "set_hca_ctrl failed\n"); + goto err_pagealloc_cleanup; + } + + err = handle_hca_cap(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap failed\n"); + goto err_pagealloc_cleanup; + } + + err = mlx5_satisfy_startup_pages(dev); + if (err) { + dev_err(&pdev->dev, "failed to allocate startup pages\n"); + goto err_pagealloc_cleanup; + } + + err = mlx5_pagealloc_start(dev); + if (err) { + dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n"); + goto err_reclaim_pages; + } + + err = mlx5_cmd_init_hca(dev); + if (err) { + dev_err(&pdev->dev, "init hca failed\n"); + goto err_pagealloc_stop; + } + + mlx5_start_health_poll(dev); + + err = mlx5_cmd_query_hca_cap(dev, &dev->caps); + if (err) { + dev_err(&pdev->dev, "query hca failed\n"); + goto err_stop_poll; + } + + err = mlx5_cmd_query_adapter(dev); + if (err) { + dev_err(&pdev->dev, "query adapter failed\n"); + goto err_stop_poll; + } + + err = mlx5_enable_msix(dev); + if (err) { + dev_err(&pdev->dev, "enable msix failed\n"); + goto err_stop_poll; + } + + err = mlx5_eq_init(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize eq\n"); + goto disable_msix; + } + + err = mlx5_alloc_uuars(dev, &priv->uuari); + if (err) { + dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); + goto err_eq_cleanup; + } + + err = mlx5_start_eqs(dev); + if (err) { + dev_err(&pdev->dev, "Failed to start pages and async EQs\n"); + goto err_free_uar; + } + + MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); + + mlx5_init_cq_table(dev); + mlx5_init_qp_table(dev); + mlx5_init_srq_table(dev); + + return 0; + +err_free_uar: + mlx5_free_uuars(dev, &priv->uuari); + +err_eq_cleanup: + mlx5_eq_cleanup(dev); + +disable_msix: + mlx5_disable_msix(dev); + +err_stop_poll: + mlx5_stop_health_poll(dev); + mlx5_cmd_teardown_hca(dev); + +err_pagealloc_stop: + mlx5_pagealloc_stop(dev); + +err_reclaim_pages: + mlx5_reclaim_startup_pages(dev); + +err_pagealloc_cleanup: + mlx5_pagealloc_cleanup(dev); + mlx5_cmd_cleanup(dev); + +err_unmap: + iounmap(dev->iseg); + +err_clr_master: + pci_clear_master(dev->pdev); + release_bar(dev->pdev); + +err_disable: + pci_disable_device(dev->pdev); + +err_dbg: + debugfs_remove(priv->dbg_root); + return err; +} +EXPORT_SYMBOL(mlx5_dev_init); + +void mlx5_dev_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + + mlx5_cleanup_srq_table(dev); + mlx5_cleanup_qp_table(dev); + mlx5_cleanup_cq_table(dev); + mlx5_stop_eqs(dev); + mlx5_free_uuars(dev, &priv->uuari); + mlx5_eq_cleanup(dev); + mlx5_disable_msix(dev); + mlx5_stop_health_poll(dev); + mlx5_cmd_teardown_hca(dev); + mlx5_pagealloc_stop(dev); + mlx5_reclaim_startup_pages(dev); + mlx5_pagealloc_cleanup(dev); + mlx5_cmd_cleanup(dev); + iounmap(dev->iseg); + pci_clear_master(dev->pdev); + release_bar(dev->pdev); + pci_disable_device(dev->pdev); + debugfs_remove(priv->dbg_root); +} +EXPORT_SYMBOL(mlx5_dev_cleanup); + +static int __init init(void) +{ + int err; + + mlx5_register_debugfs(); + mlx5_core_wq = create_singlethread_workqueue("mlx5_core_wq"); + if (!mlx5_core_wq) { + err = -ENOMEM; + goto err_debug; + } + mlx5_health_init(); + + return 0; + + mlx5_health_cleanup(); +err_debug: + mlx5_unregister_debugfs(); + return err; +} + +static void __exit cleanup(void) +{ + mlx5_health_cleanup(); + destroy_workqueue(mlx5_core_wq); + mlx5_unregister_debugfs(); +} + +module_init(init); +module_exit(cleanup); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c new file mode 100644 index 0000000..4483764 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include <rdma/ib_verbs.h> +#include "mlx5_core.h" + +struct mlx5_attach_mcg_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 qpn; + __be32 rsvd; + u8 gid[16]; +}; + +struct mlx5_attach_mcg_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvf[8]; +}; + +struct mlx5_detach_mcg_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 qpn; + __be32 rsvd; + u8 gid[16]; +}; + +struct mlx5_detach_mcg_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvf[8]; +}; + +int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) +{ + struct mlx5_attach_mcg_mbox_in in; + struct mlx5_attach_mcg_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ATTACH_TO_MCG); + memcpy(in.gid, mgid, sizeof(*mgid)); + in.qpn = cpu_to_be32(qpn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + err = mlx5_cmd_status_to_err(&out.hdr); + + return err; +} +EXPORT_SYMBOL(mlx5_core_attach_mcg); + +int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) +{ + struct mlx5_detach_mcg_mbox_in in; + struct mlx5_detach_mcg_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETACH_FROM_MCG); + memcpy(in.gid, mgid, sizeof(*mgid)); + in.qpn = cpu_to_be32(qpn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + err = mlx5_cmd_status_to_err(&out.hdr); + + return err; +} +EXPORT_SYMBOL(mlx5_core_detach_mcg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h new file mode 100644 index 0000000..68b74e1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_CORE_H__ +#define __MLX5_CORE_H__ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> + +extern int mlx5_core_debug_mask; + +#define mlx5_core_dbg(dev, format, arg...) \ +pr_debug("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__, \ + current->pid, ##arg) + +#define mlx5_core_dbg_mask(dev, mask, format, arg...) \ +do { \ + if ((mask) & mlx5_core_debug_mask) \ + pr_debug("%s:%s:%d:(pid %d): " format, (dev)->priv.name, \ + __func__, __LINE__, current->pid, ##arg); \ +} while (0) + +#define mlx5_core_err(dev, format, arg...) \ +pr_err("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__, \ + current->pid, ##arg) + +#define mlx5_core_warn(dev, format, arg...) \ +pr_warn("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__, \ + current->pid, ##arg) + +enum { + MLX5_CMD_DATA, /* print command payload only */ + MLX5_CMD_TIME, /* print command execution time */ +}; + + +int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, + struct mlx5_caps *caps); +int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev); +int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); +int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); + +#endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c new file mode 100644 index 0000000..5b44e2e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, + struct mlx5_create_mkey_mbox_in *in, int inlen) +{ + struct mlx5_create_mkey_mbox_out out; + int err; + u8 key; + + memset(&out, 0, sizeof(out)); + spin_lock(&dev->priv.mkey_lock); + key = dev->priv.mkey_key++; + spin_unlock(&dev->priv.mkey_lock); + in->seg.qpn_mkey7_0 |= cpu_to_be32(key); + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY); + err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + if (err) { + mlx5_core_dbg(dev, "cmd exec faile %d\n", err); + return err; + } + + if (out.hdr.status) { + mlx5_core_dbg(dev, "status %d\n", out.hdr.status); + return mlx5_cmd_status_to_err(&out.hdr); + } + + mr->key = mlx5_idx_to_mkey(be32_to_cpu(out.mkey) & 0xffffff) | key; + mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(out.mkey), key, mr->key); + + return err; +} +EXPORT_SYMBOL(mlx5_core_create_mkey); + +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) +{ + struct mlx5_destroy_mkey_mbox_in in; + struct mlx5_destroy_mkey_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY); + in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key)); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + return err; +} +EXPORT_SYMBOL(mlx5_core_destroy_mkey); + +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, + struct mlx5_query_mkey_mbox_out *out, int outlen) +{ + struct mlx5_destroy_mkey_mbox_in in; + int err; + + memset(&in, 0, sizeof(in)); + memset(out, 0, outlen); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_MKEY); + in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key)); + err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); + if (err) + return err; + + if (out->hdr.status) + return mlx5_cmd_status_to_err(&out->hdr); + + return err; +} +EXPORT_SYMBOL(mlx5_core_query_mkey); + +int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, + u32 *mkey) +{ + struct mlx5_query_special_ctxs_mbox_in in; + struct mlx5_query_special_ctxs_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + *mkey = be32_to_cpu(out.dump_fill_mkey); + + return err; +} +EXPORT_SYMBOL(mlx5_core_dump_fill_mkey); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c new file mode 100644 index 0000000..f0bf463 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <asm-generic/kmap_types.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +enum { + MLX5_PAGES_CANT_GIVE = 0, + MLX5_PAGES_GIVE = 1, + MLX5_PAGES_TAKE = 2 +}; + +struct mlx5_pages_req { + struct mlx5_core_dev *dev; + u32 func_id; + s16 npages; + struct work_struct work; +}; + +struct fw_page { + struct rb_node rb_node; + u64 addr; + struct page *page; + u16 func_id; +}; + +struct mlx5_query_pages_inbox { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_query_pages_outbox { + struct mlx5_outbox_hdr hdr; + u8 reserved[2]; + __be16 func_id; + __be16 init_pages; + __be16 num_pages; +}; + +struct mlx5_manage_pages_inbox { + struct mlx5_inbox_hdr hdr; + __be16 rsvd0; + __be16 func_id; + __be16 rsvd1; + __be16 num_entries; + u8 rsvd2[16]; + __be64 pas[0]; +}; + +struct mlx5_manage_pages_outbox { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[2]; + __be16 num_entries; + u8 rsvd1[20]; + __be64 pas[0]; +}; + +static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id) +{ + struct rb_root *root = &dev->priv.page_root; + struct rb_node **new = &root->rb_node; + struct rb_node *parent = NULL; + struct fw_page *nfp; + struct fw_page *tfp; + + while (*new) { + parent = *new; + tfp = rb_entry(parent, struct fw_page, rb_node); + if (tfp->addr < addr) + new = &parent->rb_left; + else if (tfp->addr > addr) + new = &parent->rb_right; + else + return -EEXIST; + } + + nfp = kmalloc(sizeof(*nfp), GFP_KERNEL); + if (!nfp) + return -ENOMEM; + + nfp->addr = addr; + nfp->page = page; + nfp->func_id = func_id; + + rb_link_node(&nfp->rb_node, parent, new); + rb_insert_color(&nfp->rb_node, root); + + return 0; +} + +static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr) +{ + struct rb_root *root = &dev->priv.page_root; + struct rb_node *tmp = root->rb_node; + struct page *result = NULL; + struct fw_page *tfp; + + while (tmp) { + tfp = rb_entry(tmp, struct fw_page, rb_node); + if (tfp->addr < addr) { + tmp = tmp->rb_left; + } else if (tfp->addr > addr) { + tmp = tmp->rb_right; + } else { + rb_erase(&tfp->rb_node, root); + result = tfp->page; + kfree(tfp); + break; + } + } + + return result; +} + +static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, + s16 *pages, s16 *init_pages) +{ + struct mlx5_query_pages_inbox in; + struct mlx5_query_pages_outbox out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_PAGES); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + if (pages) + *pages = be16_to_cpu(out.num_pages); + if (init_pages) + *init_pages = be16_to_cpu(out.init_pages); + *func_id = be16_to_cpu(out.func_id); + + return err; +} + +static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, + int notify_fail) +{ + struct mlx5_manage_pages_inbox *in; + struct mlx5_manage_pages_outbox out; + struct page *page; + int inlen; + u64 addr; + int err; + int i; + + inlen = sizeof(*in) + npages * sizeof(in->pas[0]); + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(dev, "vzalloc failed %d\n", inlen); + return -ENOMEM; + } + memset(&out, 0, sizeof(out)); + + for (i = 0; i < npages; i++) { + page = alloc_page(GFP_HIGHUSER); + if (!page) { + err = -ENOMEM; + mlx5_core_warn(dev, "failed to allocate page\n"); + goto out_alloc; + } + addr = dma_map_page(&dev->pdev->dev, page, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(&dev->pdev->dev, addr)) { + mlx5_core_warn(dev, "failed dma mapping page\n"); + __free_page(page); + err = -ENOMEM; + goto out_alloc; + } + err = insert_page(dev, addr, page, func_id); + if (err) { + mlx5_core_err(dev, "failed to track allocated page\n"); + dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(page); + err = -ENOMEM; + goto out_alloc; + } + in->pas[i] = cpu_to_be64(addr); + } + + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); + in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE); + in->func_id = cpu_to_be16(func_id); + in->num_entries = cpu_to_be16(npages); + err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + mlx5_core_dbg(dev, "err %d\n", err); + if (err) { + mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err); + goto out_alloc; + } + dev->priv.fw_pages += npages; + + if (out.hdr.status) { + err = mlx5_cmd_status_to_err(&out.hdr); + if (err) { + mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n", func_id, npages, out.hdr.status); + goto out_alloc; + } + } + + mlx5_core_dbg(dev, "err %d\n", err); + + goto out_free; + +out_alloc: + if (notify_fail) { + memset(in, 0, inlen); + memset(&out, 0, sizeof(out)); + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); + in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE); + if (mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out))) + mlx5_core_warn(dev, "\n"); + } + for (i--; i >= 0; i--) { + addr = be64_to_cpu(in->pas[i]); + page = remove_page(dev, addr); + if (!page) { + mlx5_core_err(dev, "BUG: can't remove page at addr 0x%llx\n", + addr); + continue; + } + dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(page); + } + +out_free: + mlx5_vfree(in); + return err; +} + +static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, + int *nclaimed) +{ + struct mlx5_manage_pages_inbox in; + struct mlx5_manage_pages_outbox *out; + struct page *page; + int num_claimed; + int outlen; + u64 addr; + int err; + int i; + + memset(&in, 0, sizeof(in)); + outlen = sizeof(*out) + npages * sizeof(out->pas[0]); + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); + in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE); + in.func_id = cpu_to_be16(func_id); + in.num_entries = cpu_to_be16(npages); + mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); + err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); + if (err) { + mlx5_core_err(dev, "failed recliaming pages\n"); + goto out_free; + } + dev->priv.fw_pages -= npages; + + if (out->hdr.status) { + err = mlx5_cmd_status_to_err(&out->hdr); + goto out_free; + } + + num_claimed = be16_to_cpu(out->num_entries); + if (nclaimed) + *nclaimed = num_claimed; + + for (i = 0; i < num_claimed; i++) { + addr = be64_to_cpu(out->pas[i]); + page = remove_page(dev, addr); + if (!page) { + mlx5_core_warn(dev, "FW reported unknown DMA address 0x%llx\n", addr); + } else { + dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(page); + } + } + +out_free: + mlx5_vfree(out); + return err; +} + +static void pages_work_handler(struct work_struct *work) +{ + struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work); + struct mlx5_core_dev *dev = req->dev; + int err = 0; + + if (req->npages < 0) + err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL); + else if (req->npages > 0) + err = give_pages(dev, req->func_id, req->npages, 1); + + if (err) + mlx5_core_warn(dev, "%s fail %d\n", req->npages < 0 ? + "reclaim" : "give", err); + + kfree(req); +} + +void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, + s16 npages) +{ + struct mlx5_pages_req *req; + + req = kzalloc(sizeof(*req), GFP_ATOMIC); + if (!req) { + mlx5_core_warn(dev, "failed to allocate pages request\n"); + return; + } + + req->dev = dev; + req->func_id = func_id; + req->npages = npages; + INIT_WORK(&req->work, pages_work_handler); + queue_work(dev->priv.pg_wq, &req->work); +} + +int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev) +{ + s16 uninitialized_var(init_pages); + u16 uninitialized_var(func_id); + int err; + + err = mlx5_cmd_query_pages(dev, &func_id, NULL, &init_pages); + if (err) + return err; + + mlx5_core_dbg(dev, "requested %d init pages for func_id 0x%x\n", init_pages, func_id); + + return give_pages(dev, func_id, init_pages, 0); +} + +static int optimal_reclaimed_pages(void) +{ + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_layout *lay; + int ret; + + ret = (sizeof(lay->in) + sizeof(block->data) - + sizeof(struct mlx5_manage_pages_outbox)) / 8; + + return ret; +} + +int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) +{ + unsigned long end = jiffies + msecs_to_jiffies(5000); + struct fw_page *fwp; + struct rb_node *p; + int err; + + do { + p = rb_first(&dev->priv.page_root); + if (p) { + fwp = rb_entry(p, struct fw_page, rb_node); + err = reclaim_pages(dev, fwp->func_id, optimal_reclaimed_pages(), NULL); + if (err) { + mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", err); + return err; + } + } + if (time_after(jiffies, end)) { + mlx5_core_warn(dev, "FW did not return all pages. giving up...\n"); + break; + } + } while (p); + + return 0; +} + +void mlx5_pagealloc_init(struct mlx5_core_dev *dev) +{ + dev->priv.page_root = RB_ROOT; +} + +void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) +{ + /* nothing */ +} + +int mlx5_pagealloc_start(struct mlx5_core_dev *dev) +{ + dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); + if (!dev->priv.pg_wq) + return -ENOMEM; + + return 0; +} + +void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) +{ + destroy_workqueue(dev->priv.pg_wq); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c new file mode 100644 index 0000000..790da5c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +struct mlx5_alloc_pd_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_alloc_pd_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 pdn; + u8 rsvd[4]; +}; + +struct mlx5_dealloc_pd_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 pdn; + u8 rsvd[4]; +}; + +struct mlx5_dealloc_pd_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn) +{ + struct mlx5_alloc_pd_mbox_in in; + struct mlx5_alloc_pd_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_PD); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + *pdn = be32_to_cpu(out.pdn) & 0xffffff; + return err; +} +EXPORT_SYMBOL(mlx5_core_alloc_pd); + +int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn) +{ + struct mlx5_dealloc_pd_mbox_in in; + struct mlx5_dealloc_pd_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_PD); + in.pdn = cpu_to_be32(pdn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + return err; +} +EXPORT_SYMBOL(mlx5_core_dealloc_pd); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c new file mode 100644 index 0000000..f6afe7b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, + int size_in, void *data_out, int size_out, + u16 reg_num, int arg, int write) +{ + struct mlx5_access_reg_mbox_in *in = NULL; + struct mlx5_access_reg_mbox_out *out = NULL; + int err = -ENOMEM; + + in = mlx5_vzalloc(sizeof(*in) + size_in); + if (!in) + return -ENOMEM; + + out = mlx5_vzalloc(sizeof(*out) + size_out); + if (!out) + goto ex1; + + memcpy(in->data, data_in, size_in); + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ACCESS_REG); + in->hdr.opmod = cpu_to_be16(!write); + in->arg = cpu_to_be32(arg); + in->register_id = cpu_to_be16(reg_num); + err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out, + sizeof(out) + size_out); + if (err) + goto ex2; + + if (out->hdr.status) + err = mlx5_cmd_status_to_err(&out->hdr); + + if (!err) + memcpy(data_out, out->data, size_out); + +ex2: + mlx5_vfree(out); +ex1: + mlx5_vfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_access_reg); + + +struct mlx5_reg_pcap { + u8 rsvd0; + u8 port_num; + u8 rsvd1[2]; + __be32 caps_127_96; + __be32 caps_95_64; + __be32 caps_63_32; + __be32 caps_31_0; +}; + +int mlx5_set_port_caps(struct mlx5_core_dev *dev, int port_num, u32 caps) +{ + struct mlx5_reg_pcap in; + struct mlx5_reg_pcap out; + int err; + + memset(&in, 0, sizeof(in)); + in.caps_127_96 = cpu_to_be32(caps); + in.port_num = port_num; + + err = mlx5_core_access_reg(dev, &in, sizeof(in), &out, + sizeof(out), MLX5_REG_PCAP, 0, 1); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_set_port_caps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c new file mode 100644 index 0000000..54faf8b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include <linux/gfp.h> +#include <linux/export.h> +#include <linux/mlx5/cmd.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/driver.h> + +#include "mlx5_core.h" + +void mlx5_qp_event(struct mlx5_core_dev *dev, u32 qpn, int event_type) +{ + struct mlx5_qp_table *table = &dev->priv.qp_table; + struct mlx5_core_qp *qp; + + spin_lock(&table->lock); + + qp = radix_tree_lookup(&table->tree, qpn); + if (qp) + atomic_inc(&qp->refcount); + + spin_unlock(&table->lock); + + if (!qp) { + mlx5_core_warn(dev, "Async event for bogus QP 0x%x\n", qpn); + return; + } + + qp->event(qp, event_type); + + if (atomic_dec_and_test(&qp->refcount)) + complete(&qp->free); +} + +int mlx5_core_create_qp(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp, + struct mlx5_create_qp_mbox_in *in, + int inlen) +{ + struct mlx5_qp_table *table = &dev->priv.qp_table; + struct mlx5_create_qp_mbox_out out; + struct mlx5_destroy_qp_mbox_in din; + struct mlx5_destroy_qp_mbox_out dout; + int err; + + memset(&dout, 0, sizeof(dout)); + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP); + + err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + if (err) { + mlx5_core_warn(dev, "ret %d", err); + return err; + } + + if (out.hdr.status) { + pr_warn("current num of QPs 0x%x\n", atomic_read(&dev->num_qps)); + return mlx5_cmd_status_to_err(&out.hdr); + } + + qp->qpn = be32_to_cpu(out.qpn) & 0xffffff; + mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn); + + spin_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, qp->qpn, qp); + spin_unlock_irq(&table->lock); + if (err) { + mlx5_core_warn(dev, "err %d", err); + goto err_cmd; + } + + err = mlx5_debug_qp_add(dev, qp); + if (err) + mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n", + qp->qpn); + + qp->pid = current->pid; + atomic_set(&qp->refcount, 1); + atomic_inc(&dev->num_qps); + init_completion(&qp->free); + + return 0; + +err_cmd: + memset(&din, 0, sizeof(din)); + memset(&dout, 0, sizeof(dout)); + din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP); + din.qpn = cpu_to_be32(qp->qpn); + mlx5_cmd_exec(dev, &din, sizeof(din), &out, sizeof(dout)); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_create_qp); + +int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp) +{ + struct mlx5_destroy_qp_mbox_in in; + struct mlx5_destroy_qp_mbox_out out; + struct mlx5_qp_table *table = &dev->priv.qp_table; + unsigned long flags; + int err; + + mlx5_debug_qp_remove(dev, qp); + + spin_lock_irqsave(&table->lock, flags); + radix_tree_delete(&table->tree, qp->qpn); + spin_unlock_irqrestore(&table->lock, flags); + + if (atomic_dec_and_test(&qp->refcount)) + complete(&qp->free); + wait_for_completion(&qp->free); + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP); + in.qpn = cpu_to_be32(qp->qpn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + atomic_dec(&dev->num_qps); + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp); + +int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state, + enum mlx5_qp_state new_state, + struct mlx5_modify_qp_mbox_in *in, int sqd_event, + struct mlx5_core_qp *qp) +{ + static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = { + [MLX5_QP_STATE_RST] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP, + }, + [MLX5_QP_STATE_INIT] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP, + [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP, + }, + [MLX5_QP_STATE_RTR] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP, + }, + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP, + [MLX5_QP_STATE_SQD] = MLX5_CMD_OP_RTS2SQD_QP, + }, + [MLX5_QP_STATE_SQD] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQD2RTS_QP, + [MLX5_QP_STATE_SQD] = MLX5_CMD_OP_SQD2SQD_QP, + }, + [MLX5_QP_STATE_SQER] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP, + }, + [MLX5_QP_STATE_ERR] = { + [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP, + [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP, + } + }; + + struct mlx5_modify_qp_mbox_out out; + int err = 0; + u16 op; + + if (cur_state >= MLX5_QP_NUM_STATE || new_state >= MLX5_QP_NUM_STATE || + !optab[cur_state][new_state]) + return -EINVAL; + + memset(&out, 0, sizeof(out)); + op = optab[cur_state][new_state]; + in->hdr.opcode = cpu_to_be16(op); + in->qpn = cpu_to_be32(qp->qpn); + err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)); + if (err) + return err; + + return mlx5_cmd_status_to_err(&out.hdr); +} +EXPORT_SYMBOL_GPL(mlx5_core_qp_modify); + +void mlx5_init_qp_table(struct mlx5_core_dev *dev) +{ + struct mlx5_qp_table *table = &dev->priv.qp_table; + + spin_lock_init(&table->lock); + INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + mlx5_qp_debugfs_init(dev); +} + +void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev) +{ + mlx5_qp_debugfs_cleanup(dev); +} + +int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, + struct mlx5_query_qp_mbox_out *out, int outlen) +{ + struct mlx5_query_qp_mbox_in in; + int err; + + memset(&in, 0, sizeof(in)); + memset(out, 0, outlen); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_QP); + in.qpn = cpu_to_be32(qp->qpn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); + if (err) + return err; + + if (out->hdr.status) + return mlx5_cmd_status_to_err(&out->hdr); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_qp_query); + +int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn) +{ + struct mlx5_alloc_xrcd_mbox_in in; + struct mlx5_alloc_xrcd_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_XRCD); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + err = mlx5_cmd_status_to_err(&out.hdr); + else + *xrcdn = be32_to_cpu(out.xrcdn); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_xrcd_alloc); + +int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn) +{ + struct mlx5_dealloc_xrcd_mbox_in in; + struct mlx5_dealloc_xrcd_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_XRCD); + in.xrcdn = cpu_to_be32(xrcdn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + err = mlx5_cmd_status_to_err(&out.hdr); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c new file mode 100644 index 0000000..38bce93 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include <linux/mlx5/srq.h> +#include <rdma/ib_verbs.h> +#include "mlx5_core.h" + +void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) +{ + struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_srq *srq; + + spin_lock(&table->lock); + + srq = radix_tree_lookup(&table->tree, srqn); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock(&table->lock); + + if (!srq) { + mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn); + return; + } + + srq->event(srq, event_type); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); +} + +struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) +{ + struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_srq *srq; + + spin_lock(&table->lock); + + srq = radix_tree_lookup(&table->tree, srqn); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock(&table->lock); + + return srq; +} +EXPORT_SYMBOL(mlx5_core_get_srq); + +int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, int inlen) +{ + struct mlx5_create_srq_mbox_out out; + struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_destroy_srq_mbox_in din; + struct mlx5_destroy_srq_mbox_out dout; + int err; + + memset(&out, 0, sizeof(out)); + in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_SRQ); + err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + srq->srqn = be32_to_cpu(out.srqn) & 0xffffff; + + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); + + spin_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, srq->srqn, srq); + spin_unlock_irq(&table->lock); + if (err) { + mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn); + goto err_cmd; + } + + return 0; + +err_cmd: + memset(&din, 0, sizeof(din)); + memset(&dout, 0, sizeof(dout)); + din.srqn = cpu_to_be32(srq->srqn); + din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); + mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout)); + return err; +} +EXPORT_SYMBOL(mlx5_core_create_srq); + +int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) +{ + struct mlx5_destroy_srq_mbox_in in; + struct mlx5_destroy_srq_mbox_out out; + struct mlx5_srq_table *table = &dev->priv.srq_table; + struct mlx5_core_srq *tmp; + int err; + + spin_lock_irq(&table->lock); + tmp = radix_tree_delete(&table->tree, srq->srqn); + spin_unlock_irq(&table->lock); + if (!tmp) { + mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn); + return -EINVAL; + } + if (tmp != srq) { + mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn); + return -EINVAL; + } + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); + in.srqn = cpu_to_be32(srq->srqn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + wait_for_completion(&srq->free); + + return 0; +} +EXPORT_SYMBOL(mlx5_core_destroy_srq); + +int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_query_srq_mbox_out *out) +{ + struct mlx5_query_srq_mbox_in in; + int err; + + memset(&in, 0, sizeof(in)); + memset(out, 0, sizeof(*out)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ); + in.srqn = cpu_to_be32(srq->srqn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out)); + if (err) + return err; + + if (out->hdr.status) + return mlx5_cmd_status_to_err(&out->hdr); + + return err; +} +EXPORT_SYMBOL(mlx5_core_query_srq); + +int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + struct mlx5_arm_srq_mbox_in in; + struct mlx5_arm_srq_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ); + in.hdr.opmod = cpu_to_be16(!!is_srq); + in.srqn = cpu_to_be32(srq->srqn); + in.lwm = cpu_to_be16(lwm); + + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + return err; + + if (out.hdr.status) + return mlx5_cmd_status_to_err(&out.hdr); + + return err; +} +EXPORT_SYMBOL(mlx5_core_arm_srq); + +void mlx5_init_srq_table(struct mlx5_core_dev *dev) +{ + struct mlx5_srq_table *table = &dev->priv.srq_table; + + spin_lock_init(&table->lock); + INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); +} + +void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev) +{ + /* nothing */ +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c new file mode 100644 index 0000000..71d4a39 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +enum { + NUM_DRIVER_UARS = 4, + NUM_LOW_LAT_UUARS = 4, +}; + + +struct mlx5_alloc_uar_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_alloc_uar_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 uarn; + u8 rsvd[4]; +}; + +struct mlx5_free_uar_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 uarn; + u8 rsvd[4]; +}; + +struct mlx5_free_uar_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn) +{ + struct mlx5_alloc_uar_mbox_in in; + struct mlx5_alloc_uar_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_UAR); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + goto ex; + + if (out.hdr.status) { + err = mlx5_cmd_status_to_err(&out.hdr); + goto ex; + } + + *uarn = be32_to_cpu(out.uarn) & 0xffffff; + +ex: + return err; +} +EXPORT_SYMBOL(mlx5_cmd_alloc_uar); + +int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn) +{ + struct mlx5_free_uar_mbox_in in; + struct mlx5_free_uar_mbox_out out; + int err; + + memset(&in, 0, sizeof(in)); + in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_UAR); + in.uarn = cpu_to_be32(uarn); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + if (err) + goto ex; + + if (out.hdr.status) + err = mlx5_cmd_status_to_err(&out.hdr); + +ex: + return err; +} +EXPORT_SYMBOL(mlx5_cmd_free_uar); + +static int need_uuar_lock(int uuarn) +{ + int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE; + + if (uuarn == 0 || tot_uuars - NUM_LOW_LAT_UUARS) + return 0; + + return 1; +} + +int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) +{ + int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE; + struct mlx5_bf *bf; + phys_addr_t addr; + int err; + int i; + + uuari->num_uars = NUM_DRIVER_UARS; + uuari->num_low_latency_uuars = NUM_LOW_LAT_UUARS; + + mutex_init(&uuari->lock); + uuari->uars = kcalloc(uuari->num_uars, sizeof(*uuari->uars), GFP_KERNEL); + if (!uuari->uars) + return -ENOMEM; + + uuari->bfs = kcalloc(tot_uuars, sizeof(*uuari->bfs), GFP_KERNEL); + if (!uuari->bfs) { + err = -ENOMEM; + goto out_uars; + } + + uuari->bitmap = kcalloc(BITS_TO_LONGS(tot_uuars), sizeof(*uuari->bitmap), + GFP_KERNEL); + if (!uuari->bitmap) { + err = -ENOMEM; + goto out_bfs; + } + + uuari->count = kcalloc(tot_uuars, sizeof(*uuari->count), GFP_KERNEL); + if (!uuari->count) { + err = -ENOMEM; + goto out_bitmap; + } + + for (i = 0; i < uuari->num_uars; i++) { + err = mlx5_cmd_alloc_uar(dev, &uuari->uars[i].index); + if (err) + goto out_count; + + addr = dev->iseg_base + ((phys_addr_t)(uuari->uars[i].index) << PAGE_SHIFT); + uuari->uars[i].map = ioremap(addr, PAGE_SIZE); + if (!uuari->uars[i].map) { + mlx5_cmd_free_uar(dev, uuari->uars[i].index); + goto out_count; + } + mlx5_core_dbg(dev, "allocated uar index 0x%x, mmaped at %p\n", + uuari->uars[i].index, uuari->uars[i].map); + } + + for (i = 0; i < tot_uuars; i++) { + bf = &uuari->bfs[i]; + + bf->buf_size = dev->caps.bf_reg_size / 2; + bf->uar = &uuari->uars[i / MLX5_BF_REGS_PER_PAGE]; + bf->regreg = uuari->uars[i / MLX5_BF_REGS_PER_PAGE].map; + bf->reg = NULL; /* Add WC support */ + bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * dev->caps.bf_reg_size + + MLX5_BF_OFFSET; + bf->need_lock = need_uuar_lock(i); + spin_lock_init(&bf->lock); + spin_lock_init(&bf->lock32); + bf->uuarn = i; + } + + return 0; + +out_count: + for (i--; i >= 0; i--) { + iounmap(uuari->uars[i].map); + mlx5_cmd_free_uar(dev, uuari->uars[i].index); + } + kfree(uuari->count); + +out_bitmap: + kfree(uuari->bitmap); + +out_bfs: + kfree(uuari->bfs); + +out_uars: + kfree(uuari->uars); + return err; +} + +int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) +{ + int i = uuari->num_uars; + + for (i--; i >= 0; i--) { + iounmap(uuari->uars[i].map); + mlx5_cmd_free_uar(dev, uuari->uars[i].index); + } + + kfree(uuari->count); + kfree(uuari->bitmap); + kfree(uuari->bfs); + kfree(uuari->uars); + + return 0; +} diff --git a/include/linux/mlx5/cmd.h b/include/linux/mlx5/cmd.h new file mode 100644 index 0000000..2826a4b --- /dev/null +++ b/include/linux/mlx5/cmd.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_CMD_H +#define MLX5_CMD_H + +#include <linux/types.h> + +struct manage_pages_layout { + u64 ptr; + u32 reserved; + u16 num_entries; + u16 func_id; +}; + + +struct mlx5_cmd_alloc_uar_imm_out { + u32 rsvd[3]; + u32 uarn; +}; + +#endif /* MLX5_CMD_H */ diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h new file mode 100644 index 0000000..3db67f7 --- /dev/null +++ b/include/linux/mlx5/cq.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_CORE_CQ_H +#define MLX5_CORE_CQ_H + +#include <rdma/ib_verbs.h> +#include <linux/mlx5/driver.h> + + +struct mlx5_core_cq { + u32 cqn; + int cqe_sz; + __be32 *set_ci_db; + __be32 *arm_db; + atomic_t refcount; + struct completion free; + unsigned vector; + int irqn; + void (*comp) (struct mlx5_core_cq *); + void (*event) (struct mlx5_core_cq *, enum mlx5_event); + struct mlx5_uar *uar; + u32 cons_index; + unsigned arm_sn; + struct mlx5_rsc_debug *dbg; + int pid; +}; + + +enum { + MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR = 0x01, + MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR = 0x02, + MLX5_CQE_SYNDROME_LOCAL_PROT_ERR = 0x04, + MLX5_CQE_SYNDROME_WR_FLUSH_ERR = 0x05, + MLX5_CQE_SYNDROME_MW_BIND_ERR = 0x06, + MLX5_CQE_SYNDROME_BAD_RESP_ERR = 0x10, + MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR = 0x11, + MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, + MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR = 0x13, + MLX5_CQE_SYNDROME_REMOTE_OP_ERR = 0x14, + MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR = 0x15, + MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR = 0x16, + MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR = 0x22, +}; + +enum { + MLX5_CQE_OWNER_MASK = 1, + MLX5_CQE_REQ = 0, + MLX5_CQE_RESP_WR_IMM = 1, + MLX5_CQE_RESP_SEND = 2, + MLX5_CQE_RESP_SEND_IMM = 3, + MLX5_CQE_RESP_SEND_INV = 4, + MLX5_CQE_RESIZE_CQ = 0xff, /* TBD */ + MLX5_CQE_REQ_ERR = 13, + MLX5_CQE_RESP_ERR = 14, +}; + +enum { + MLX5_CQ_MODIFY_RESEIZE = 0, + MLX5_CQ_MODIFY_MODER = 1, + MLX5_CQ_MODIFY_MAPPING = 2, +}; + +struct mlx5_cq_modify_params { + int type; + union { + struct { + u32 page_offset; + u8 log_cq_size; + } resize; + + struct { + } moder; + + struct { + } mapping; + } params; +}; + +enum { + CQE_SIZE_64 = 0, + CQE_SIZE_128 = 1, +}; + +static inline int cqe_sz_to_mlx_sz(u8 size) +{ + return size == 64 ? CQE_SIZE_64 : CQE_SIZE_128; +} + +static inline void mlx5_cq_set_ci(struct mlx5_core_cq *cq) +{ + *cq->set_ci_db = cpu_to_be32(cq->cons_index & 0xffffff); +} + +enum { + MLX5_CQ_DB_REQ_NOT_SOL = 1 << 24, + MLX5_CQ_DB_REQ_NOT = 0 << 24 +}; + +static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, + void __iomem *uar_page, + spinlock_t *doorbell_lock) +{ + __be32 doorbell[2]; + u32 sn; + u32 ci; + + sn = cq->arm_sn & 3; + ci = cq->cons_index & 0xffffff; + + *cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci); + + /* Make sure that the doorbell record in host memory is + * written before ringing the doorbell via PCI MMIO. + */ + wmb(); + + doorbell[0] = cpu_to_be32(sn << 28 | cmd | ci); + doorbell[1] = cpu_to_be32(cq->cqn); + + mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, doorbell_lock); +} + +int mlx5_init_cq_table(struct mlx5_core_dev *dev); +void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev); +int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + struct mlx5_create_cq_mbox_in *in, int inlen); +int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); +int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + struct mlx5_query_cq_mbox_out *out); +int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + int type, struct mlx5_cq_modify_params *params); +int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); +void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); + +#endif /* MLX5_CORE_CQ_H */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h new file mode 100644 index 0000000..8de8d8f --- /dev/null +++ b/include/linux/mlx5/device.h @@ -0,0 +1,893 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_DEVICE_H +#define MLX5_DEVICE_H + +#include <linux/types.h> +#include <rdma/ib_verbs.h> + +#if defined(__LITTLE_ENDIAN) +#define MLX5_SET_HOST_ENDIANNESS 0 +#elif defined(__BIG_ENDIAN) +#define MLX5_SET_HOST_ENDIANNESS 0x80 +#else +#error Host endianness not defined +#endif + +enum { + MLX5_MAX_COMMANDS = 32, + MLX5_CMD_DATA_BLOCK_SIZE = 512, + MLX5_PCI_CMD_XPORT = 7, +}; + +enum { + MLX5_EXTENDED_UD_AV = 0x80000000, +}; + +enum { + MLX5_CQ_STATE_ARMED = 9, + MLX5_CQ_STATE_ALWAYS_ARMED = 0xb, + MLX5_CQ_STATE_FIRED = 0xa, +}; + +enum { + MLX5_STAT_RATE_OFFSET = 5, +}; + +enum { + MLX5_INLINE_SEG = 0x80000000, +}; + +enum { + MLX5_PERM_LOCAL_READ = 1 << 2, + MLX5_PERM_LOCAL_WRITE = 1 << 3, + MLX5_PERM_REMOTE_READ = 1 << 4, + MLX5_PERM_REMOTE_WRITE = 1 << 5, + MLX5_PERM_ATOMIC = 1 << 6, + MLX5_PERM_UMR_EN = 1 << 7, +}; + +enum { + MLX5_PCIE_CTRL_SMALL_FENCE = 1 << 0, + MLX5_PCIE_CTRL_RELAXED_ORDERING = 1 << 2, + MLX5_PCIE_CTRL_NO_SNOOP = 1 << 3, + MLX5_PCIE_CTRL_TLP_PROCE_EN = 1 << 6, + MLX5_PCIE_CTRL_TPH_MASK = 3 << 4, +}; + +enum { + MLX5_ACCESS_MODE_PA = 0, + MLX5_ACCESS_MODE_MTT = 1, + MLX5_ACCESS_MODE_KLM = 2 +}; + +enum { + MLX5_MKEY_REMOTE_INVAL = 1 << 24, + MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29, + MLX5_MKEY_BSF_EN = 1 << 30, + MLX5_MKEY_LEN64 = 1 << 31, +}; + +enum { + MLX5_EN_RD = (u64)1, + MLX5_EN_WR = (u64)2 +}; + +enum { + MLX5_BF_REGS_PER_PAGE = 4, + MLX5_MAX_UAR_PAGES = 1 << 8, + MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_BF_REGS_PER_PAGE, +}; + +enum { + MLX5_MKEY_MASK_LEN = 1ull << 0, + MLX5_MKEY_MASK_PAGE_SIZE = 1ull << 1, + MLX5_MKEY_MASK_START_ADDR = 1ull << 6, + MLX5_MKEY_MASK_PD = 1ull << 7, + MLX5_MKEY_MASK_EN_RINVAL = 1ull << 8, + MLX5_MKEY_MASK_BSF_EN = 1ull << 12, + MLX5_MKEY_MASK_KEY = 1ull << 13, + MLX5_MKEY_MASK_QPN = 1ull << 14, + MLX5_MKEY_MASK_LR = 1ull << 17, + MLX5_MKEY_MASK_LW = 1ull << 18, + MLX5_MKEY_MASK_RR = 1ull << 19, + MLX5_MKEY_MASK_RW = 1ull << 20, + MLX5_MKEY_MASK_A = 1ull << 21, + MLX5_MKEY_MASK_SMALL_FENCE = 1ull << 23, + MLX5_MKEY_MASK_FREE = 1ull << 29, +}; + +enum mlx5_event { + MLX5_EVENT_TYPE_COMP = 0x0, + + MLX5_EVENT_TYPE_PATH_MIG = 0x01, + MLX5_EVENT_TYPE_COMM_EST = 0x02, + MLX5_EVENT_TYPE_SQ_DRAINED = 0x03, + MLX5_EVENT_TYPE_SRQ_LAST_WQE = 0x13, + MLX5_EVENT_TYPE_SRQ_RQ_LIMIT = 0x14, + + MLX5_EVENT_TYPE_CQ_ERROR = 0x04, + MLX5_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, + MLX5_EVENT_TYPE_PATH_MIG_FAILED = 0x07, + MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10, + MLX5_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11, + MLX5_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12, + + MLX5_EVENT_TYPE_INTERNAL_ERROR = 0x08, + MLX5_EVENT_TYPE_PORT_CHANGE = 0x09, + MLX5_EVENT_TYPE_GPIO_EVENT = 0x15, + MLX5_EVENT_TYPE_REMOTE_CONFIG = 0x19, + + MLX5_EVENT_TYPE_DB_BF_CONGESTION = 0x1a, + MLX5_EVENT_TYPE_STALL_EVENT = 0x1b, + + MLX5_EVENT_TYPE_CMD = 0x0a, + MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb, +}; + +enum { + MLX5_PORT_CHANGE_SUBTYPE_DOWN = 1, + MLX5_PORT_CHANGE_SUBTYPE_ACTIVE = 4, + MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED = 5, + MLX5_PORT_CHANGE_SUBTYPE_LID = 6, + MLX5_PORT_CHANGE_SUBTYPE_PKEY = 7, + MLX5_PORT_CHANGE_SUBTYPE_GUID = 8, + MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG = 9, +}; + +enum { + MLX5_DEV_CAP_FLAG_RC = 1LL << 0, + MLX5_DEV_CAP_FLAG_UC = 1LL << 1, + MLX5_DEV_CAP_FLAG_UD = 1LL << 2, + MLX5_DEV_CAP_FLAG_XRC = 1LL << 3, + MLX5_DEV_CAP_FLAG_SRQ = 1LL << 6, + MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8, + MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9, + MLX5_DEV_CAP_FLAG_APM = 1LL << 17, + MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18, + MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24, + MLX5_DEV_CAP_FLAG_RESIZE_SRQ = 1LL << 32, + MLX5_DEV_CAP_FLAG_REMOTE_FENCE = 1LL << 38, + MLX5_DEV_CAP_FLAG_TLP_HINTS = 1LL << 39, + MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40, + MLX5_DEV_CAP_FLAG_DCT = 1LL << 41, + MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 1LL << 46, +}; + +enum { + MLX5_OPCODE_NOP = 0x00, + MLX5_OPCODE_SEND_INVAL = 0x01, + MLX5_OPCODE_RDMA_WRITE = 0x08, + MLX5_OPCODE_RDMA_WRITE_IMM = 0x09, + MLX5_OPCODE_SEND = 0x0a, + MLX5_OPCODE_SEND_IMM = 0x0b, + MLX5_OPCODE_RDMA_READ = 0x10, + MLX5_OPCODE_ATOMIC_CS = 0x11, + MLX5_OPCODE_ATOMIC_FA = 0x12, + MLX5_OPCODE_ATOMIC_MASKED_CS = 0x14, + MLX5_OPCODE_ATOMIC_MASKED_FA = 0x15, + MLX5_OPCODE_BIND_MW = 0x18, + MLX5_OPCODE_CONFIG_CMD = 0x1f, + + MLX5_RECV_OPCODE_RDMA_WRITE_IMM = 0x00, + MLX5_RECV_OPCODE_SEND = 0x01, + MLX5_RECV_OPCODE_SEND_IMM = 0x02, + MLX5_RECV_OPCODE_SEND_INVAL = 0x03, + + MLX5_CQE_OPCODE_ERROR = 0x1e, + MLX5_CQE_OPCODE_RESIZE = 0x16, + + MLX5_OPCODE_SET_PSV = 0x20, + MLX5_OPCODE_GET_PSV = 0x21, + MLX5_OPCODE_CHECK_PSV = 0x22, + MLX5_OPCODE_RGET_PSV = 0x26, + MLX5_OPCODE_RCHECK_PSV = 0x27, + + MLX5_OPCODE_UMR = 0x25, + +}; + +enum { + MLX5_SET_PORT_RESET_QKEY = 0, + MLX5_SET_PORT_GUID0 = 16, + MLX5_SET_PORT_NODE_GUID = 17, + MLX5_SET_PORT_SYS_GUID = 18, + MLX5_SET_PORT_GID_TABLE = 19, + MLX5_SET_PORT_PKEY_TABLE = 20, +}; + +enum { + MLX5_MAX_PAGE_SHIFT = 31 +}; + +struct mlx5_inbox_hdr { + __be16 opcode; + u8 rsvd[4]; + __be16 opmod; +}; + +struct mlx5_outbox_hdr { + u8 status; + u8 rsvd[3]; + __be32 syndrome; +}; + +struct mlx5_cmd_query_adapter_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_cmd_query_adapter_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[24]; + u8 intapin; + u8 rsvd1[13]; + __be16 vsd_vendor_id; + u8 vsd[208]; + u8 vsd_psid[16]; +}; + +struct mlx5_hca_cap { + u8 rsvd1[16]; + u8 log_max_srq_sz; + u8 log_max_qp_sz; + u8 rsvd2; + u8 log_max_qp; + u8 log_max_strq_sz; + u8 log_max_srqs; + u8 rsvd4[2]; + u8 rsvd5; + u8 log_max_cq_sz; + u8 rsvd6; + u8 log_max_cq; + u8 log_max_eq_sz; + u8 log_max_mkey; + u8 rsvd7; + u8 log_max_eq; + u8 max_indirection; + u8 log_max_mrw_sz; + u8 log_max_bsf_list_sz; + u8 log_max_klm_list_sz; + u8 rsvd_8_0; + u8 log_max_ra_req_dc; + u8 rsvd_8_1; + u8 log_max_ra_res_dc; + u8 rsvd9; + u8 log_max_ra_req_qp; + u8 rsvd10; + u8 log_max_ra_res_qp; + u8 rsvd11[4]; + __be16 max_qp_count; + __be16 rsvd12; + u8 rsvd13; + u8 local_ca_ack_delay; + u8 rsvd14; + u8 num_ports; + u8 log_max_msg; + u8 rsvd15[3]; + __be16 stat_rate_support; + u8 rsvd16[2]; + __be64 flags; + u8 rsvd17; + u8 uar_sz; + u8 rsvd18; + u8 log_pg_sz; + __be16 bf_log_bf_reg_size; + u8 rsvd19[4]; + __be16 max_desc_sz_sq; + u8 rsvd20[2]; + __be16 max_desc_sz_rq; + u8 rsvd21[2]; + __be16 max_desc_sz_sq_dc; + u8 rsvd22[4]; + __be16 max_qp_mcg; + u8 rsvd23; + u8 log_max_mcg; + u8 rsvd24; + u8 log_max_pd; + u8 rsvd25; + u8 log_max_xrcd; + u8 rsvd26[42]; + __be16 log_uar_page_sz; + u8 rsvd27[28]; + u8 log_msx_atomic_size_qp; + u8 rsvd28[2]; + u8 log_msx_atomic_size_dc; + u8 rsvd29[76]; +}; + + +struct mlx5_cmd_query_hca_cap_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + + +struct mlx5_cmd_query_hca_cap_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; + struct mlx5_hca_cap hca_cap; +}; + + +struct mlx5_cmd_set_hca_cap_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; + struct mlx5_hca_cap hca_cap; +}; + + +struct mlx5_cmd_set_hca_cap_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; +}; + + +struct mlx5_cmd_init_hca_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[2]; + __be16 profile; + u8 rsvd1[4]; +}; + +struct mlx5_cmd_init_hca_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_cmd_teardown_hca_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[2]; + __be16 profile; + u8 rsvd1[4]; +}; + +struct mlx5_cmd_teardown_hca_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_cmd_layout { + u8 type; + u8 rsvd0[3]; + __be32 inlen; + __be64 in_ptr; + __be32 in[4]; + __be32 out[4]; + __be64 out_ptr; + __be32 outlen; + u8 token; + u8 sig; + u8 rsvd1; + u8 status_own; +}; + + +struct health_buffer { + __be32 assert_var[5]; + __be32 rsvd0[3]; + __be32 assert_exit_ptr; + __be32 assert_callra; + __be32 rsvd1[2]; + __be32 fw_ver; + __be32 hw_id; + __be32 rsvd2; + u8 irisc_index; + u8 synd; + __be16 ext_sync; +}; + +struct mlx5_init_seg { + __be32 fw_rev; + __be32 cmdif_rev_fw_sub; + __be32 rsvd0[2]; + __be32 cmdq_addr_h; + __be32 cmdq_addr_l_sz; + __be32 cmd_dbell; + __be32 rsvd1[121]; + struct health_buffer health; + __be32 rsvd2[884]; + __be32 health_counter; + __be32 rsvd3[1023]; + __be64 ieee1588_clk; + __be32 ieee1588_clk_type; + __be32 clr_intx; +}; + +struct mlx5_eqe_comp { + __be32 reserved[6]; + __be32 cqn; +}; + +struct mlx5_eqe_qp_srq { + __be32 reserved[6]; + __be32 qp_srq_n; +}; + +struct mlx5_eqe_cq_err { + __be32 cqn; + u8 reserved1[7]; + u8 syndrome; +}; + +struct mlx5_eqe_dropped_packet { +}; + +struct mlx5_eqe_port_state { + u8 reserved0[8]; + u8 port; +}; + +struct mlx5_eqe_gpio { + __be32 reserved0[2]; + __be64 gpio_event; +}; + +struct mlx5_eqe_congestion { + u8 type; + u8 rsvd0; + u8 congestion_level; +}; + +struct mlx5_eqe_stall_vl { + u8 rsvd0[3]; + u8 port_vl; +}; + +struct mlx5_eqe_cmd { + __be32 vector; + __be32 rsvd[6]; +}; + +struct mlx5_eqe_page_req { + u8 rsvd0[2]; + __be16 func_id; + u8 rsvd1[2]; + __be16 num_pages; + __be32 rsvd2[5]; +}; + +union ev_data { + __be32 raw[7]; + struct mlx5_eqe_cmd cmd; + struct mlx5_eqe_comp comp; + struct mlx5_eqe_qp_srq qp_srq; + struct mlx5_eqe_cq_err cq_err; + struct mlx5_eqe_dropped_packet dp; + struct mlx5_eqe_port_state port; + struct mlx5_eqe_gpio gpio; + struct mlx5_eqe_congestion cong; + struct mlx5_eqe_stall_vl stall_vl; + struct mlx5_eqe_page_req req_pages; +} __packed; + +struct mlx5_eqe { + u8 rsvd0; + u8 type; + u8 rsvd1; + u8 sub_type; + __be32 rsvd2[7]; + union ev_data data; + __be16 rsvd3; + u8 signature; + u8 owner; +} __packed; + +struct mlx5_cmd_prot_block { + u8 data[MLX5_CMD_DATA_BLOCK_SIZE]; + u8 rsvd0[48]; + __be64 next; + __be32 block_num; + u8 rsvd1; + u8 token; + u8 ctrl_sig; + u8 sig; +}; + +struct mlx5_err_cqe { + u8 rsvd0[32]; + __be32 srqn; + u8 rsvd1[18]; + u8 vendor_err_synd; + u8 syndrome; + __be32 s_wqe_opcode_qpn; + __be16 wqe_counter; + u8 signature; + u8 op_own; +}; + +struct mlx5_cqe64 { + u8 rsvd0[17]; + u8 ml_path; + u8 rsvd20[4]; + __be16 slid; + __be32 flags_rqpn; + u8 rsvd28[4]; + __be32 srqn; + __be32 imm_inval_pkey; + u8 rsvd40[4]; + __be32 byte_cnt; + __be64 timestamp; + __be32 sop_drop_qpn; + __be16 wqe_counter; + u8 signature; + u8 op_own; +}; + +struct mlx5_wqe_srq_next_seg { + u8 rsvd0[2]; + __be16 next_wqe_index; + u8 signature; + u8 rsvd1[11]; +}; + +union mlx5_ext_cqe { + struct ib_grh grh; + u8 inl[64]; +}; + +struct mlx5_cqe128 { + union mlx5_ext_cqe inl_grh; + struct mlx5_cqe64 cqe64; +}; + +struct mlx5_srq_ctx { + u8 state_log_sz; + u8 rsvd0[3]; + __be32 flags_xrcd; + __be32 pgoff_cqn; + u8 rsvd1[4]; + u8 log_pg_sz; + u8 rsvd2[7]; + __be32 pd; + __be16 lwm; + __be16 wqe_cnt; + u8 rsvd3[8]; + __be64 db_record; +}; + +struct mlx5_create_srq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 input_srqn; + u8 rsvd0[4]; + struct mlx5_srq_ctx ctx; + u8 rsvd1[208]; + __be64 pas[0]; +}; + +struct mlx5_create_srq_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 srqn; + u8 rsvd[4]; +}; + +struct mlx5_destroy_srq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 srqn; + u8 rsvd[4]; +}; + +struct mlx5_destroy_srq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_query_srq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 srqn; + u8 rsvd0[4]; +}; + +struct mlx5_query_srq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; + struct mlx5_srq_ctx ctx; + u8 rsvd1[32]; + __be64 pas[0]; +}; + +struct mlx5_arm_srq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 srqn; + __be16 rsvd; + __be16 lwm; +}; + +struct mlx5_arm_srq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_cq_context { + u8 status; + u8 cqe_sz_flags; + u8 st; + u8 rsvd3; + u8 rsvd4[6]; + __be16 page_offset; + __be32 log_sz_usr_page; + __be16 cq_period; + __be16 cq_max_count; + __be16 rsvd20; + __be16 c_eqn; + u8 log_pg_sz; + u8 rsvd25[7]; + __be32 last_notified_index; + __be32 solicit_producer_index; + __be32 consumer_counter; + __be32 producer_counter; + u8 rsvd48[8]; + __be64 db_record_addr; +}; + +struct mlx5_create_cq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 input_cqn; + u8 rsvdx[4]; + struct mlx5_cq_context ctx; + u8 rsvd6[192]; + __be64 pas[0]; +}; + +struct mlx5_create_cq_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 cqn; + u8 rsvd0[4]; +}; + +struct mlx5_destroy_cq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 cqn; + u8 rsvd0[4]; +}; + +struct mlx5_destroy_cq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; +}; + +struct mlx5_query_cq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 cqn; + u8 rsvd0[4]; +}; + +struct mlx5_query_cq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; + struct mlx5_cq_context ctx; + u8 rsvd6[16]; + __be64 pas[0]; +}; + +struct mlx5_eq_context { + u8 status; + u8 ec_oi; + u8 st; + u8 rsvd2[7]; + __be16 page_pffset; + __be32 log_sz_usr_page; + u8 rsvd3[7]; + u8 intr; + u8 log_page_size; + u8 rsvd4[15]; + __be32 consumer_counter; + __be32 produser_counter; + u8 rsvd5[16]; +}; + +struct mlx5_create_eq_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[3]; + u8 input_eqn; + u8 rsvd1[4]; + struct mlx5_eq_context ctx; + u8 rsvd2[8]; + __be64 events_mask; + u8 rsvd3[176]; + __be64 pas[0]; +}; + +struct mlx5_create_eq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[3]; + u8 eq_number; + u8 rsvd1[4]; +}; + +struct mlx5_destroy_eq_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[3]; + u8 eqn; + u8 rsvd1[4]; +}; + +struct mlx5_destroy_eq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_map_eq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be64 mask; + u8 mu; + u8 rsvd0[2]; + u8 eqn; + u8 rsvd1[24]; +}; + +struct mlx5_map_eq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_query_eq_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[3]; + u8 eqn; + u8 rsvd1[4]; +}; + +struct mlx5_query_eq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; + struct mlx5_eq_context ctx; +}; + +struct mlx5_mkey_seg { + /* This is a two bit field occupying bits 31-30. + * bit 31 is always 0, + * bit 30 is zero for regular MRs and 1 (e.g free) for UMRs that do not have tanslation + */ + u8 status; + u8 pcie_control; + u8 flags; + u8 version; + __be32 qpn_mkey7_0; + u8 rsvd1[4]; + __be32 flags_pd; + __be64 start_addr; + __be64 len; + __be32 bsfs_octo_size; + u8 rsvd2[16]; + __be32 xlt_oct_size; + u8 rsvd3[3]; + u8 log2_page_size; + u8 rsvd4[4]; +}; + +struct mlx5_query_special_ctxs_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_query_special_ctxs_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 dump_fill_mkey; + __be32 reserved_lkey; +}; + +struct mlx5_create_mkey_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 input_mkey_index; + u8 rsvd0[4]; + struct mlx5_mkey_seg seg; + u8 rsvd1[16]; + __be32 xlat_oct_act_size; + __be32 bsf_coto_act_size; + u8 rsvd2[168]; + __be64 pas[0]; +}; + +struct mlx5_create_mkey_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 mkey; + u8 rsvd[4]; +}; + +struct mlx5_destroy_mkey_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 mkey; + u8 rsvd[4]; +}; + +struct mlx5_destroy_mkey_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_query_mkey_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 mkey; +}; + +struct mlx5_query_mkey_mbox_out { + struct mlx5_outbox_hdr hdr; + __be64 pas[0]; +}; + +struct mlx5_modify_mkey_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 mkey; + __be64 pas[0]; +}; + +struct mlx5_modify_mkey_mbox_out { + struct mlx5_outbox_hdr hdr; +}; + +struct mlx5_dump_mkey_mbox_in { + struct mlx5_inbox_hdr hdr; +}; + +struct mlx5_dump_mkey_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 mkey; +}; + +struct mlx5_mad_ifc_mbox_in { + struct mlx5_inbox_hdr hdr; + __be16 remote_lid; + u8 rsvd0; + u8 port; + u8 rsvd1[4]; + u8 data[256]; +}; + +struct mlx5_mad_ifc_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; + u8 data[256]; +}; + +struct mlx5_access_reg_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[2]; + __be16 register_id; + __be32 arg; + __be32 data[0]; +}; + +struct mlx5_access_reg_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; + __be32 data[0]; +}; + +#define MLX5_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) + +enum { + MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO = 1 << 0 +}; + +#endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/doorbell.h b/include/linux/mlx5/doorbell.h new file mode 100644 index 0000000..163a818 --- /dev/null +++ b/include/linux/mlx5/doorbell.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_DOORBELL_H +#define MLX5_DOORBELL_H + +#define MLX5_BF_OFFSET 0x800 +#define MLX5_CQ_DOORBELL 0x20 + +#if BITS_PER_LONG == 64 +/* Assume that we can just write a 64-bit doorbell atomically. s390 + * actually doesn't have writeq() but S/390 systems don't even have + * PCI so we won't worry about it. + */ + +#define MLX5_DECLARE_DOORBELL_LOCK(name) +#define MLX5_INIT_DOORBELL_LOCK(ptr) do { } while (0) +#define MLX5_GET_DOORBELL_LOCK(ptr) (NULL) + +static inline void mlx5_write64(__be32 val[2], void __iomem *dest, + spinlock_t *doorbell_lock) +{ + __raw_writeq(*(u64 *)val, dest); +} + +#else + +/* Just fall back to a spinlock to protect the doorbell if + * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit + * MMIO writes. + */ + +#define MLX5_DECLARE_DOORBELL_LOCK(name) spinlock_t name; +#define MLX5_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr) +#define MLX5_GET_DOORBELL_LOCK(ptr) (ptr) + +static inline void mlx5_write64(__be32 val[2], void __iomem *dest, + spinlock_t *doorbell_lock) +{ + unsigned long flags; + + spin_lock_irqsave(doorbell_lock, flags); + __raw_writel((__force u32) val[0], dest); + __raw_writel((__force u32) val[1], dest + 4); + spin_unlock_irqrestore(doorbell_lock, flags); +} + +#endif + +#endif /* MLX5_DOORBELL_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h new file mode 100644 index 0000000..f22e441 --- /dev/null +++ b/include/linux/mlx5/driver.h @@ -0,0 +1,769 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_DRIVER_H +#define MLX5_DRIVER_H + +#include <linux/kernel.h> +#include <linux/completion.h> +#include <linux/pci.h> +#include <linux/spinlock_types.h> +#include <linux/semaphore.h> +#include <linux/vmalloc.h> +#include <linux/radix-tree.h> +#include <linux/mlx5/device.h> +#include <linux/mlx5/doorbell.h> + +enum { + MLX5_BOARD_ID_LEN = 64, + MLX5_MAX_NAME_LEN = 16, +}; + +enum { + /* one minute for the sake of bringup. Generally, commands must always + * complete and we may need to increase this timeout value + */ + MLX5_CMD_TIMEOUT_MSEC = 7200 * 1000, + MLX5_CMD_WQ_MAX_NAME = 32, +}; + +enum { + CMD_OWNER_SW = 0x0, + CMD_OWNER_HW = 0x1, + CMD_STATUS_SUCCESS = 0, +}; + +enum mlx5_sqp_t { + MLX5_SQP_SMI = 0, + MLX5_SQP_GSI = 1, + MLX5_SQP_IEEE_1588 = 2, + MLX5_SQP_SNIFFER = 3, + MLX5_SQP_SYNC_UMR = 4, +}; + +enum { + MLX5_MAX_PORTS = 2, +}; + +enum { + MLX5_EQ_VEC_PAGES = 0, + MLX5_EQ_VEC_CMD = 1, + MLX5_EQ_VEC_ASYNC = 2, + MLX5_EQ_VEC_COMP_BASE, +}; + +enum { + MLX5_MAX_EQ_NAME = 20 +}; + +enum { + MLX5_ATOMIC_MODE_IB_COMP = 1 << 16, + MLX5_ATOMIC_MODE_CX = 2 << 16, + MLX5_ATOMIC_MODE_8B = 3 << 16, + MLX5_ATOMIC_MODE_16B = 4 << 16, + MLX5_ATOMIC_MODE_32B = 5 << 16, + MLX5_ATOMIC_MODE_64B = 6 << 16, + MLX5_ATOMIC_MODE_128B = 7 << 16, + MLX5_ATOMIC_MODE_256B = 8 << 16, +}; + +enum { + MLX5_CMD_OP_QUERY_HCA_CAP = 0x100, + MLX5_CMD_OP_QUERY_ADAPTER = 0x101, + MLX5_CMD_OP_INIT_HCA = 0x102, + MLX5_CMD_OP_TEARDOWN_HCA = 0x103, + MLX5_CMD_OP_QUERY_PAGES = 0x107, + MLX5_CMD_OP_MANAGE_PAGES = 0x108, + MLX5_CMD_OP_SET_HCA_CAP = 0x109, + + MLX5_CMD_OP_CREATE_MKEY = 0x200, + MLX5_CMD_OP_QUERY_MKEY = 0x201, + MLX5_CMD_OP_DESTROY_MKEY = 0x202, + MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS = 0x203, + + MLX5_CMD_OP_CREATE_EQ = 0x301, + MLX5_CMD_OP_DESTROY_EQ = 0x302, + MLX5_CMD_OP_QUERY_EQ = 0x303, + + MLX5_CMD_OP_CREATE_CQ = 0x400, + MLX5_CMD_OP_DESTROY_CQ = 0x401, + MLX5_CMD_OP_QUERY_CQ = 0x402, + MLX5_CMD_OP_MODIFY_CQ = 0x403, + + MLX5_CMD_OP_CREATE_QP = 0x500, + MLX5_CMD_OP_DESTROY_QP = 0x501, + MLX5_CMD_OP_RST2INIT_QP = 0x502, + MLX5_CMD_OP_INIT2RTR_QP = 0x503, + MLX5_CMD_OP_RTR2RTS_QP = 0x504, + MLX5_CMD_OP_RTS2RTS_QP = 0x505, + MLX5_CMD_OP_SQERR2RTS_QP = 0x506, + MLX5_CMD_OP_2ERR_QP = 0x507, + MLX5_CMD_OP_RTS2SQD_QP = 0x508, + MLX5_CMD_OP_SQD2RTS_QP = 0x509, + MLX5_CMD_OP_2RST_QP = 0x50a, + MLX5_CMD_OP_QUERY_QP = 0x50b, + MLX5_CMD_OP_CONF_SQP = 0x50c, + MLX5_CMD_OP_MAD_IFC = 0x50d, + MLX5_CMD_OP_INIT2INIT_QP = 0x50e, + MLX5_CMD_OP_SUSPEND_QP = 0x50f, + MLX5_CMD_OP_UNSUSPEND_QP = 0x510, + MLX5_CMD_OP_SQD2SQD_QP = 0x511, + MLX5_CMD_OP_ALLOC_QP_COUNTER_SET = 0x512, + MLX5_CMD_OP_DEALLOC_QP_COUNTER_SET = 0x513, + MLX5_CMD_OP_QUERY_QP_COUNTER_SET = 0x514, + + MLX5_CMD_OP_CREATE_PSV = 0x600, + MLX5_CMD_OP_DESTROY_PSV = 0x601, + MLX5_CMD_OP_QUERY_PSV = 0x602, + MLX5_CMD_OP_QUERY_SIG_RULE_TABLE = 0x603, + MLX5_CMD_OP_QUERY_BLOCK_SIZE_TABLE = 0x604, + + MLX5_CMD_OP_CREATE_SRQ = 0x700, + MLX5_CMD_OP_DESTROY_SRQ = 0x701, + MLX5_CMD_OP_QUERY_SRQ = 0x702, + MLX5_CMD_OP_ARM_RQ = 0x703, + MLX5_CMD_OP_RESIZE_SRQ = 0x704, + + MLX5_CMD_OP_ALLOC_PD = 0x800, + MLX5_CMD_OP_DEALLOC_PD = 0x801, + MLX5_CMD_OP_ALLOC_UAR = 0x802, + MLX5_CMD_OP_DEALLOC_UAR = 0x803, + + MLX5_CMD_OP_ATTACH_TO_MCG = 0x806, + MLX5_CMD_OP_DETACH_FROM_MCG = 0x807, + + + MLX5_CMD_OP_ALLOC_XRCD = 0x80e, + MLX5_CMD_OP_DEALLOC_XRCD = 0x80f, + + MLX5_CMD_OP_ACCESS_REG = 0x805, + MLX5_CMD_OP_MAX = 0x810, +}; + +enum { + MLX5_REG_PCAP = 0x5001, + MLX5_REG_PMTU = 0x5003, + MLX5_REG_PTYS = 0x5004, + MLX5_REG_PAOS = 0x5006, + MLX5_REG_PMAOS = 0x5012, + MLX5_REG_PUDE = 0x5009, + MLX5_REG_PMPE = 0x5010, + MLX5_REG_PELC = 0x500e, + MLX5_REG_PMLP = 0, /* TBD */ + MLX5_REG_NODE_DESC = 0x6001, + MLX5_REG_HOST_ENDIANNESS = 0x7004, +}; + +enum dbg_rsc_type { + MLX5_DBG_RSC_QP, + MLX5_DBG_RSC_EQ, + MLX5_DBG_RSC_CQ, +}; + +struct mlx5_field_desc { + struct dentry *dent; + int i; +}; + +struct mlx5_rsc_debug { + struct mlx5_core_dev *dev; + void *object; + enum dbg_rsc_type type; + struct dentry *root; + struct mlx5_field_desc fields[0]; +}; + +enum mlx5_dev_event { + MLX5_DEV_EVENT_SYS_ERROR, + MLX5_DEV_EVENT_PORT_UP, + MLX5_DEV_EVENT_PORT_DOWN, + MLX5_DEV_EVENT_PORT_INITIALIZED, + MLX5_DEV_EVENT_LID_CHANGE, + MLX5_DEV_EVENT_PKEY_CHANGE, + MLX5_DEV_EVENT_GUID_CHANGE, + MLX5_DEV_EVENT_CLIENT_REREG, +}; + +struct mlx5_uuar_info { + struct mlx5_uar *uars; + int num_uars; + int num_low_latency_uuars; + unsigned long *bitmap; + unsigned int *count; + struct mlx5_bf *bfs; + + /* + * protect uuar allocation data structs + */ + struct mutex lock; +}; + +struct mlx5_bf { + void __iomem *reg; + void __iomem *regreg; + int buf_size; + struct mlx5_uar *uar; + unsigned long offset; + int need_lock; + /* protect blue flame buffer selection when needed + */ + spinlock_t lock; + + /* serialize 64 bit writes when done as two 32 bit accesses + */ + spinlock_t lock32; + int uuarn; +}; + +struct mlx5_cmd_first { + __be32 data[4]; +}; + +struct mlx5_cmd_msg { + struct list_head list; + struct cache_ent *cache; + u32 len; + struct mlx5_cmd_first first; + struct mlx5_cmd_mailbox *next; +}; + +struct mlx5_cmd_debug { + struct dentry *dbg_root; + struct dentry *dbg_in; + struct dentry *dbg_out; + struct dentry *dbg_outlen; + struct dentry *dbg_status; + struct dentry *dbg_run; + void *in_msg; + void *out_msg; + u8 status; + u16 inlen; + u16 outlen; +}; + +struct cache_ent { + /* protect block chain allocations + */ + spinlock_t lock; + struct list_head head; +}; + +struct cmd_msg_cache { + struct cache_ent large; + struct cache_ent med; + +}; + +struct mlx5_cmd_stats { + u64 sum; + u64 n; + struct dentry *root; + struct dentry *avg; + struct dentry *count; + /* protect command average calculations */ + spinlock_t lock; +}; + +struct mlx5_cmd { + void *cmd_buf; + dma_addr_t dma; + u16 cmdif_rev; + u8 log_sz; + u8 log_stride; + int max_reg_cmds; + int events; + u32 __iomem *vector; + + /* protect command queue allocations + */ + spinlock_t alloc_lock; + + /* protect token allocations + */ + spinlock_t token_lock; + u8 token; + unsigned long bitmask; + char wq_name[MLX5_CMD_WQ_MAX_NAME]; + struct workqueue_struct *wq; + struct semaphore sem; + struct semaphore pages_sem; + int mode; + struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS]; + struct pci_pool *pool; + struct mlx5_cmd_debug dbg; + struct cmd_msg_cache cache; + int checksum_disabled; + struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; +}; + +struct mlx5_port_caps { + int gid_table_len; + int pkey_table_len; +}; + +struct mlx5_caps { + u8 log_max_eq; + u8 log_max_cq; + u8 log_max_qp; + u8 log_max_mkey; + u8 log_max_pd; + u8 log_max_srq; + u32 max_cqes; + int max_wqes; + int max_sq_desc_sz; + int max_rq_desc_sz; + u64 flags; + u16 stat_rate_support; + int log_max_msg; + int num_ports; + int max_ra_res_qp; + int max_ra_req_qp; + int max_srq_wqes; + int bf_reg_size; + int bf_regs_per_page; + struct mlx5_port_caps port[MLX5_MAX_PORTS]; + u8 ext_port_cap[MLX5_MAX_PORTS]; + int max_vf; + u32 reserved_lkey; + u8 local_ca_ack_delay; + u8 log_max_mcg; + u16 max_qp_mcg; + int min_page_sz; +}; + +struct mlx5_cmd_mailbox { + void *buf; + dma_addr_t dma; + struct mlx5_cmd_mailbox *next; +}; + +struct mlx5_buf_list { + void *buf; + dma_addr_t map; +}; + +struct mlx5_buf { + struct mlx5_buf_list direct; + struct mlx5_buf_list *page_list; + int nbufs; + int npages; + int page_shift; + int size; +}; + +struct mlx5_eq { + struct mlx5_core_dev *dev; + __be32 __iomem *doorbell; + u32 cons_index; + struct mlx5_buf buf; + int size; + u8 irqn; + u8 eqn; + int nent; + u64 mask; + char name[MLX5_MAX_EQ_NAME]; + struct list_head list; + int index; + struct mlx5_rsc_debug *dbg; +}; + + +struct mlx5_core_mr { + u64 iova; + u64 size; + u32 key; + u32 pd; + u32 access; +}; + +struct mlx5_core_srq { + u32 srqn; + int max; + int max_gs; + int max_avail_gather; + int wqe_shift; + void (*event) (struct mlx5_core_srq *, enum mlx5_event); + + atomic_t refcount; + struct completion free; +}; + +struct mlx5_eq_table { + void __iomem *update_ci; + void __iomem *update_arm_ci; + struct list_head *comp_eq_head; + struct mlx5_eq pages_eq; + struct mlx5_eq async_eq; + struct mlx5_eq cmd_eq; + struct msix_entry *msix_arr; + int num_comp_vectors; + /* protect EQs list + */ + spinlock_t lock; +}; + +struct mlx5_uar { + u32 index; + struct list_head bf_list; + unsigned free_bf_bmap; + void __iomem *wc_map; + void __iomem *map; +}; + + +struct mlx5_core_health { + struct health_buffer __iomem *health; + __be32 __iomem *health_counter; + struct timer_list timer; + struct list_head list; + u32 prev; + int miss_counter; +}; + +struct mlx5_cq_table { + /* protect radix tree + */ + spinlock_t lock; + struct radix_tree_root tree; +}; + +struct mlx5_qp_table { + /* protect radix tree + */ + spinlock_t lock; + struct radix_tree_root tree; +}; + +struct mlx5_srq_table { + /* protect radix tree + */ + spinlock_t lock; + struct radix_tree_root tree; +}; + +struct mlx5_priv { + char name[MLX5_MAX_NAME_LEN]; + struct mlx5_eq_table eq_table; + struct mlx5_uuar_info uuari; + MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock); + + /* pages stuff */ + struct workqueue_struct *pg_wq; + struct rb_root page_root; + int fw_pages; + int reg_pages; + + struct mlx5_core_health health; + + struct mlx5_srq_table srq_table; + + /* start: qp staff */ + struct mlx5_qp_table qp_table; + struct dentry *qp_debugfs; + struct dentry *eq_debugfs; + struct dentry *cq_debugfs; + struct dentry *cmdif_debugfs; + /* end: qp staff */ + + /* start: cq staff */ + struct mlx5_cq_table cq_table; + /* end: cq staff */ + + /* start: alloc staff */ + struct mutex pgdir_mutex; + struct list_head pgdir_list; + /* end: alloc staff */ + struct dentry *dbg_root; + + /* protect mkey key part */ + spinlock_t mkey_lock; + u8 mkey_key; +}; + +struct mlx5_core_dev { + struct pci_dev *pdev; + u8 rev_id; + char board_id[MLX5_BOARD_ID_LEN]; + struct mlx5_cmd cmd; + struct mlx5_caps caps; + phys_addr_t iseg_base; + struct mlx5_init_seg __iomem *iseg; + void (*event) (struct mlx5_core_dev *dev, + enum mlx5_dev_event event, + void *data); + struct mlx5_priv priv; + struct mlx5_profile *profile; + atomic_t num_qps; +}; + +struct mlx5_db { + __be32 *db; + union { + struct mlx5_db_pgdir *pgdir; + struct mlx5_ib_user_db_page *user_page; + } u; + dma_addr_t dma; + int index; +}; + +enum { + MLX5_DB_PER_PAGE = PAGE_SIZE / L1_CACHE_BYTES, +}; + +enum { + MLX5_COMP_EQ_SIZE = 1024, +}; + +struct mlx5_db_pgdir { + struct list_head list; + DECLARE_BITMAP(bitmap, MLX5_DB_PER_PAGE); + __be32 *db_page; + dma_addr_t db_dma; +}; + +typedef void (*mlx5_cmd_cbk_t)(int status, void *context); + +struct mlx5_cmd_work_ent { + struct mlx5_cmd_msg *in; + struct mlx5_cmd_msg *out; + mlx5_cmd_cbk_t callback; + void *context; + int idx; + struct completion done; + struct mlx5_cmd *cmd; + struct work_struct work; + struct mlx5_cmd_layout *lay; + int ret; + int page_queue; + u8 status; + u8 token; + struct timespec ts1; + struct timespec ts2; +}; + +struct mlx5_pas { + u64 pa; + u8 log_sz; +}; + +static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset) +{ + if (likely(BITS_PER_LONG == 64 || buf->nbufs == 1)) + return buf->direct.buf + offset; + else + return buf->page_list[offset >> PAGE_SHIFT].buf + + (offset & (PAGE_SIZE - 1)); +} + +extern struct workqueue_struct *mlx5_core_wq; + +#define STRUCT_FIELD(header, field) \ + .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ + .struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field + +struct ib_field { + size_t struct_offset_bytes; + size_t struct_size_bytes; + int offset_bits; + int size_bits; +}; + +static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev) +{ + return pci_get_drvdata(pdev); +} + +extern struct dentry *mlx5_debugfs_root; + +static inline u16 fw_rev_maj(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->fw_rev) & 0xffff; +} + +static inline u16 fw_rev_min(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->fw_rev) >> 16; +} + +static inline u16 fw_rev_sub(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->cmdif_rev_fw_sub) & 0xffff; +} + +static inline u16 cmdif_rev(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; +} + +static inline void *mlx5_vzalloc(unsigned long size) +{ + void *rtn; + + rtn = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); + if (!rtn) + rtn = vzalloc(size); + return rtn; +} + +static inline void mlx5_vfree(const void *addr) +{ + if (addr && is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} + +int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev); +void mlx5_dev_cleanup(struct mlx5_core_dev *dev); +int mlx5_cmd_init(struct mlx5_core_dev *dev); +void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); +void mlx5_cmd_use_events(struct mlx5_core_dev *dev); +void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); +int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr); +int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, + int out_size); +int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); +int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); +int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); +int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); +void mlx5_health_cleanup(void); +void __init mlx5_health_init(void); +void mlx5_start_health_poll(struct mlx5_core_dev *dev); +void mlx5_stop_health_poll(struct mlx5_core_dev *dev); +int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct, + struct mlx5_buf *buf); +void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf); +struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, + gfp_t flags, int npages); +void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, + struct mlx5_cmd_mailbox *head); +int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, int inlen); +int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq); +int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_query_srq_mbox_out *out); +int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq); +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, + struct mlx5_create_mkey_mbox_in *in, int inlen); +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr); +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, + struct mlx5_query_mkey_mbox_out *out, int outlen); +int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, + u32 *mkey); +int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); +int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); +int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb, + u16 opmod, int port); +void mlx5_pagealloc_init(struct mlx5_core_dev *dev); +void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); +int mlx5_pagealloc_start(struct mlx5_core_dev *dev); +void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); +void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, + s16 npages); +int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev); +int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); +void mlx5_register_debugfs(void); +void mlx5_unregister_debugfs(void); +int mlx5_eq_init(struct mlx5_core_dev *dev); +void mlx5_eq_cleanup(struct mlx5_core_dev *dev); +void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); +void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); +void mlx5_qp_event(struct mlx5_core_dev *dev, u32 qpn, int event_type); +void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); +struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector); +void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); +int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, + int nent, u64 mask, const char *name, struct mlx5_uar *uar); +int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_start_eqs(struct mlx5_core_dev *dev); +int mlx5_stop_eqs(struct mlx5_core_dev *dev); +int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); +int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); + +int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, + int size_in, void *data_out, int size_out, + u16 reg_num, int arg, int write); +int mlx5_set_port_caps(struct mlx5_core_dev *dev, int port_num, u32 caps); + +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct mlx5_query_eq_mbox_out *out, int outlen); +int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db); +void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db); + +typedef void (*health_handler_t)(struct pci_dev *pdev, struct health_buffer __iomem *buf, int size); +int mlx5_register_health_report_handler(health_handler_t handler); +void mlx5_unregister_health_report_handler(void); +const char *mlx5_command_str(int command); +int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev); + +static inline u32 mlx5_mkey_to_idx(u32 mkey) +{ + return mkey >> 8; +} + +static inline u32 mlx5_idx_to_mkey(u32 mkey_idx) +{ + return mkey_idx << 8; +} + +enum { + MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, + MLX5_PROF_MASK_CMDIF_CSUM = (u64)1 << 1, + MLX5_PROF_MASK_MR_CACHE = (u64)1 << 2, +}; + +enum { + MAX_MR_CACHE_ENTRIES = 16, +}; + +struct mlx5_profile { + u64 mask; + u32 log_max_qp; + int cmdif_csum; + struct { + int size; + int limit; + } mr_cache[MAX_MR_CACHE_ENTRIES]; +}; + +#endif /* MLX5_DRIVER_H */ diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h new file mode 100644 index 0000000..d9e3eac --- /dev/null +++ b/include/linux/mlx5/qp.h @@ -0,0 +1,467 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_QP_H +#define MLX5_QP_H + +#include <linux/mlx5/device.h> +#include <linux/mlx5/driver.h> + +#define MLX5_INVALID_LKEY 0x100 + +enum mlx5_qp_optpar { + MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, + MLX5_QP_OPTPAR_RRE = 1 << 1, + MLX5_QP_OPTPAR_RAE = 1 << 2, + MLX5_QP_OPTPAR_RWE = 1 << 3, + MLX5_QP_OPTPAR_PKEY_INDEX = 1 << 4, + MLX5_QP_OPTPAR_Q_KEY = 1 << 5, + MLX5_QP_OPTPAR_RNR_TIMEOUT = 1 << 6, + MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7, + MLX5_QP_OPTPAR_SRA_MAX = 1 << 8, + MLX5_QP_OPTPAR_RRA_MAX = 1 << 9, + MLX5_QP_OPTPAR_PM_STATE = 1 << 10, + MLX5_QP_OPTPAR_RETRY_COUNT = 1 << 12, + MLX5_QP_OPTPAR_RNR_RETRY = 1 << 13, + MLX5_QP_OPTPAR_ACK_TIMEOUT = 1 << 14, + MLX5_QP_OPTPAR_PRI_PORT = 1 << 16, + MLX5_QP_OPTPAR_SRQN = 1 << 18, + MLX5_QP_OPTPAR_CQN_RCV = 1 << 19, + MLX5_QP_OPTPAR_DC_HS = 1 << 20, + MLX5_QP_OPTPAR_DC_KEY = 1 << 21, +}; + +enum mlx5_qp_state { + MLX5_QP_STATE_RST = 0, + MLX5_QP_STATE_INIT = 1, + MLX5_QP_STATE_RTR = 2, + MLX5_QP_STATE_RTS = 3, + MLX5_QP_STATE_SQER = 4, + MLX5_QP_STATE_SQD = 5, + MLX5_QP_STATE_ERR = 6, + MLX5_QP_STATE_SQ_DRAINING = 7, + MLX5_QP_STATE_SUSPENDED = 9, + MLX5_QP_NUM_STATE +}; + +enum { + MLX5_QP_ST_RC = 0x0, + MLX5_QP_ST_UC = 0x1, + MLX5_QP_ST_UD = 0x2, + MLX5_QP_ST_XRC = 0x3, + MLX5_QP_ST_MLX = 0x4, + MLX5_QP_ST_DCI = 0x5, + MLX5_QP_ST_DCT = 0x6, + MLX5_QP_ST_QP0 = 0x7, + MLX5_QP_ST_QP1 = 0x8, + MLX5_QP_ST_RAW_ETHERTYPE = 0x9, + MLX5_QP_ST_RAW_IPV6 = 0xa, + MLX5_QP_ST_SNIFFER = 0xb, + MLX5_QP_ST_SYNC_UMR = 0xe, + MLX5_QP_ST_PTP_1588 = 0xd, + MLX5_QP_ST_REG_UMR = 0xc, + MLX5_QP_ST_MAX +}; + +enum { + MLX5_QP_PM_MIGRATED = 0x3, + MLX5_QP_PM_ARMED = 0x0, + MLX5_QP_PM_REARM = 0x1 +}; + +enum { + MLX5_NON_ZERO_RQ = 0 << 24, + MLX5_SRQ_RQ = 1 << 24, + MLX5_CRQ_RQ = 2 << 24, + MLX5_ZERO_LEN_RQ = 3 << 24 +}; + +enum { + /* params1 */ + MLX5_QP_BIT_SRE = 1 << 15, + MLX5_QP_BIT_SWE = 1 << 14, + MLX5_QP_BIT_SAE = 1 << 13, + /* params2 */ + MLX5_QP_BIT_RRE = 1 << 15, + MLX5_QP_BIT_RWE = 1 << 14, + MLX5_QP_BIT_RAE = 1 << 13, + MLX5_QP_BIT_RIC = 1 << 4, +}; + +enum { + MLX5_WQE_CTRL_CQ_UPDATE = 2 << 2, + MLX5_WQE_CTRL_SOLICITED = 1 << 1, +}; + +enum { + MLX5_SEND_WQE_BB = 64, +}; + +enum { + MLX5_WQE_FMR_PERM_LOCAL_READ = 1 << 27, + MLX5_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28, + MLX5_WQE_FMR_PERM_REMOTE_READ = 1 << 29, + MLX5_WQE_FMR_PERM_REMOTE_WRITE = 1 << 30, + MLX5_WQE_FMR_PERM_ATOMIC = 1 << 31 +}; + +enum { + MLX5_FENCE_MODE_NONE = 0 << 5, + MLX5_FENCE_MODE_INITIATOR_SMALL = 1 << 5, + MLX5_FENCE_MODE_STRONG_ORDERING = 3 << 5, + MLX5_FENCE_MODE_SMALL_AND_FENCE = 4 << 5, +}; + +enum { + MLX5_QP_LAT_SENSITIVE = 1 << 28, + MLX5_QP_ENABLE_SIG = 1 << 31, +}; + +enum { + MLX5_RCV_DBR = 0, + MLX5_SND_DBR = 1, +}; + +struct mlx5_wqe_fmr_seg { + __be32 flags; + __be32 mem_key; + __be64 buf_list; + __be64 start_addr; + __be64 reg_len; + __be32 offset; + __be32 page_size; + u32 reserved[2]; +}; + +struct mlx5_wqe_ctrl_seg { + __be32 opmod_idx_opcode; + __be32 qpn_ds; + u8 signature; + u8 rsvd[2]; + u8 fm_ce_se; + __be32 imm; +}; + +struct mlx5_wqe_xrc_seg { + __be32 xrc_srqn; + u8 rsvd[12]; +}; + +struct mlx5_wqe_masked_atomic_seg { + __be64 swap_add; + __be64 compare; + __be64 swap_add_mask; + __be64 compare_mask; +}; + +struct mlx5_av { + union { + struct { + __be32 qkey; + __be32 reserved; + } qkey; + __be64 dc_key; + } key; + __be32 dqp_dct; + u8 stat_rate_sl; + u8 fl_mlid; + __be16 rlid; + u8 reserved0[10]; + u8 tclass; + u8 hop_limit; + __be32 grh_gid_fl; + u8 rgid[16]; +}; + +struct mlx5_wqe_datagram_seg { + struct mlx5_av av; +}; + +struct mlx5_wqe_raddr_seg { + __be64 raddr; + __be32 rkey; + u32 reserved; +}; + +struct mlx5_wqe_atomic_seg { + __be64 swap_add; + __be64 compare; +}; + +struct mlx5_wqe_data_seg { + __be32 byte_count; + __be32 lkey; + __be64 addr; +}; + +struct mlx5_wqe_umr_ctrl_seg { + u8 flags; + u8 rsvd0[3]; + __be16 klm_octowords; + __be16 bsf_octowords; + __be64 mkey_mask; + u8 rsvd1[32]; +}; + +struct mlx5_seg_set_psv { + __be32 psv_num; + __be16 syndrome; + __be16 status; + __be32 transient_sig; + __be32 ref_tag; +}; + +struct mlx5_seg_get_psv { + u8 rsvd[19]; + u8 num_psv; + __be32 l_key; + __be64 va; + __be32 psv_index[4]; +}; + +struct mlx5_seg_check_psv { + u8 rsvd0[2]; + __be16 err_coalescing_op; + u8 rsvd1[2]; + __be16 xport_err_op; + u8 rsvd2[2]; + __be16 xport_err_mask; + u8 rsvd3[7]; + u8 num_psv; + __be32 l_key; + __be64 va; + __be32 psv_index[4]; +}; + +struct mlx5_rwqe_sig { + u8 rsvd0[4]; + u8 signature; + u8 rsvd1[11]; +}; + +struct mlx5_wqe_signature_seg { + u8 rsvd0[4]; + u8 signature; + u8 rsvd1[11]; +}; + +struct mlx5_wqe_inline_seg { + __be32 byte_count; +}; + +struct mlx5_core_qp { + void (*event) (struct mlx5_core_qp *, int); + int qpn; + atomic_t refcount; + struct completion free; + struct mlx5_rsc_debug *dbg; + int pid; +}; + +struct mlx5_qp_path { + u8 fl; + u8 rsvd3; + u8 free_ar; + u8 pkey_index; + u8 rsvd0; + u8 grh_mlid; + __be16 rlid; + u8 ackto_lt; + u8 mgid_index; + u8 static_rate; + u8 hop_limit; + __be32 tclass_flowlabel; + u8 rgid[16]; + u8 rsvd1[4]; + u8 sl; + u8 port; + u8 rsvd2[6]; +}; + +struct mlx5_qp_context { + __be32 flags; + __be32 flags_pd; + u8 mtu_msgmax; + u8 rq_size_stride; + __be16 sq_crq_size; + __be32 qp_counter_set_usr_page; + __be32 wire_qpn; + __be32 log_pg_sz_remote_qpn; + struct mlx5_qp_path pri_path; + struct mlx5_qp_path alt_path; + __be32 params1; + u8 reserved2[4]; + __be32 next_send_psn; + __be32 cqn_send; + u8 reserved3[8]; + __be32 last_acked_psn; + __be32 ssn; + __be32 params2; + __be32 rnr_nextrecvpsn; + __be32 xrcd; + __be32 cqn_recv; + __be64 db_rec_addr; + __be32 qkey; + __be32 rq_type_srqn; + __be32 rmsn; + __be16 hw_sq_wqe_counter; + __be16 sw_sq_wqe_counter; + __be16 hw_rcyclic_byte_counter; + __be16 hw_rq_counter; + __be16 sw_rcyclic_byte_counter; + __be16 sw_rq_counter; + u8 rsvd0[5]; + u8 cgs; + u8 cs_req; + u8 cs_res; + __be64 dc_access_key; + u8 rsvd1[24]; +}; + +struct mlx5_create_qp_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 input_qpn; + u8 rsvd0[4]; + __be32 opt_param_mask; + u8 rsvd1[4]; + struct mlx5_qp_context ctx; + u8 rsvd3[16]; + __be64 pas[0]; +}; + +struct mlx5_create_qp_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 qpn; + u8 rsvd0[4]; +}; + +struct mlx5_destroy_qp_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 qpn; + u8 rsvd0[4]; +}; + +struct mlx5_destroy_qp_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; +}; + +struct mlx5_modify_qp_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 qpn; + u8 rsvd1[4]; + __be32 optparam; + u8 rsvd0[4]; + struct mlx5_qp_context ctx; +}; + +struct mlx5_modify_qp_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; +}; + +struct mlx5_query_qp_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 qpn; + u8 rsvd[4]; +}; + +struct mlx5_query_qp_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd1[8]; + __be32 optparam; + u8 rsvd0[4]; + struct mlx5_qp_context ctx; + u8 rsvd2[16]; + __be64 pas[0]; +}; + +struct mlx5_conf_sqp_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 qpn; + u8 rsvd[3]; + u8 type; +}; + +struct mlx5_conf_sqp_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_alloc_xrcd_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_alloc_xrcd_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 xrcdn; + u8 rsvd[4]; +}; + +struct mlx5_dealloc_xrcd_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 xrcdn; + u8 rsvd[4]; +}; + +struct mlx5_dealloc_xrcd_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u32 qpn) +{ + return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); +} + +int mlx5_core_create_qp(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp, + struct mlx5_create_qp_mbox_in *in, + int inlen); +int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state, + enum mlx5_qp_state new_state, + struct mlx5_modify_qp_mbox_in *in, int sqd_event, + struct mlx5_core_qp *qp); +int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp); +int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, + struct mlx5_query_qp_mbox_out *out, int outlen); + +int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn); +int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn); +void mlx5_init_qp_table(struct mlx5_core_dev *dev); +void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev); +int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); +void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); + +#endif /* MLX5_QP_H */ diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h new file mode 100644 index 0000000..e1a363a --- /dev/null +++ b/include/linux/mlx5/srq.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_SRQ_H +#define MLX5_SRQ_H + +#include <linux/mlx5/driver.h> + +void mlx5_init_srq_table(struct mlx5_core_dev *dev); +void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev); + +#endif /* MLX5_SRQ_H */ diff --git a/include/linux/socket.h b/include/linux/socket.h index b10ce4b..230c04b 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -167,6 +167,7 @@ struct ucred { #define AF_PPPOX 24 /* PPPoX sockets */ #define AF_WANPIPE 25 /* Wanpipe API Sockets */ #define AF_LLC 26 /* Linux LLC */ +#define AF_IB 27 /* Native InfiniBand address */ #define AF_CAN 29 /* Controller Area Network */ #define AF_TIPC 30 /* TIPC sockets */ #define AF_BLUETOOTH 31 /* Bluetooth sockets */ @@ -211,6 +212,7 @@ struct ucred { #define PF_PPPOX AF_PPPOX #define PF_WANPIPE AF_WANPIPE #define PF_LLC AF_LLC +#define PF_IB AF_IB #define PF_CAN AF_CAN #define PF_TIPC AF_TIPC #define PF_BLUETOOTH AF_BLUETOOTH diff --git a/include/rdma/ib.h b/include/rdma/ib.h new file mode 100644 index 0000000..cf8f9e7 --- /dev/null +++ b/include/rdma/ib.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2010 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(_RDMA_IB_H) +#define _RDMA_IB_H + +#include <linux/types.h> + +struct ib_addr { + union { + __u8 uib_addr8[16]; + __be16 uib_addr16[8]; + __be32 uib_addr32[4]; + __be64 uib_addr64[2]; + } ib_u; +#define sib_addr8 ib_u.uib_addr8 +#define sib_addr16 ib_u.uib_addr16 +#define sib_addr32 ib_u.uib_addr32 +#define sib_addr64 ib_u.uib_addr64 +#define sib_raw ib_u.uib_addr8 +#define sib_subnet_prefix ib_u.uib_addr64[0] +#define sib_interface_id ib_u.uib_addr64[1] +}; + +static inline int ib_addr_any(const struct ib_addr *a) +{ + return ((a->sib_addr64[0] | a->sib_addr64[1]) == 0); +} + +static inline int ib_addr_loopback(const struct ib_addr *a) +{ + return ((a->sib_addr32[0] | a->sib_addr32[1] | + a->sib_addr32[2] | (a->sib_addr32[3] ^ htonl(1))) == 0); +} + +static inline void ib_addr_set(struct ib_addr *addr, + __be32 w1, __be32 w2, __be32 w3, __be32 w4) +{ + addr->sib_addr32[0] = w1; + addr->sib_addr32[1] = w2; + addr->sib_addr32[2] = w3; + addr->sib_addr32[3] = w4; +} + +static inline int ib_addr_cmp(const struct ib_addr *a1, const struct ib_addr *a2) +{ + return memcmp(a1, a2, sizeof(struct ib_addr)); +} + +struct sockaddr_ib { + unsigned short int sib_family; /* AF_IB */ + __be16 sib_pkey; + __be32 sib_flowinfo; + struct ib_addr sib_addr; + __be64 sib_sid; + __be64 sib_sid_mask; + __u64 sib_scope_id; +}; + +#endif /* _RDMA_IB_H */ diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 9996539..f3ac0f2 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -102,11 +102,7 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr); int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, const unsigned char *dst_dev_addr); -static inline int ip_addr_size(struct sockaddr *addr) -{ - return addr->sa_family == AF_INET6 ? - sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in); -} +int rdma_addr_size(struct sockaddr *addr); static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) { diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 8275e53..125f871 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -402,6 +402,12 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, struct ib_ah_attr *ah_attr); /** + * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec + * to IB MAD wire format. + */ +void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute); + +/** * ib_sa_unpack_path - Convert a path record from MAD format to struct * ib_sa_path_rec. */ @@ -418,4 +424,5 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, void *context), void *context, struct ib_sa_query **sa_query); + #endif /* IB_SA_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 98cc4b2..645c3ce 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -610,7 +610,21 @@ enum ib_qp_type { IB_QPT_RAW_PACKET = 8, IB_QPT_XRC_INI = 9, IB_QPT_XRC_TGT, - IB_QPT_MAX + IB_QPT_MAX, + /* Reserve a range for qp types internal to the low level driver. + * These qp types will not be visible at the IB core layer, so the + * IB_QPT_MAX usages should not be affected in the core layer + */ + IB_QPT_RESERVED1 = 0x1000, + IB_QPT_RESERVED2, + IB_QPT_RESERVED3, + IB_QPT_RESERVED4, + IB_QPT_RESERVED5, + IB_QPT_RESERVED6, + IB_QPT_RESERVED7, + IB_QPT_RESERVED8, + IB_QPT_RESERVED9, + IB_QPT_RESERVED10, }; enum ib_qp_create_flags { @@ -766,6 +780,19 @@ enum ib_wr_opcode { IB_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, IB_WR_BIND_MW, + /* reserve values for low level drivers' internal use. + * These values will not be used at all in the ib core layer. + */ + IB_WR_RESERVED1 = 0xf0, + IB_WR_RESERVED2, + IB_WR_RESERVED3, + IB_WR_RESERVED4, + IB_WR_RESERVED5, + IB_WR_RESERVED6, + IB_WR_RESERVED7, + IB_WR_RESERVED8, + IB_WR_RESERVED9, + IB_WR_RESERVED10, }; enum ib_send_flags { @@ -773,7 +800,11 @@ enum ib_send_flags { IB_SEND_SIGNALED = (1<<1), IB_SEND_SOLICITED = (1<<2), IB_SEND_INLINE = (1<<3), - IB_SEND_IP_CSUM = (1<<4) + IB_SEND_IP_CSUM = (1<<4), + + /* reserve bits 26-31 for low level drivers' internal use */ + IB_SEND_RESERVED_START = (1 << 26), + IB_SEND_RESERVED_END = (1 << 31), }; struct ib_sge { diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index ad3a314..1ed2088 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -70,6 +70,11 @@ enum rdma_port_space { RDMA_PS_UDP = 0x0111, }; +#define RDMA_IB_IP_PS_MASK 0xFFFFFFFFFFFF0000ULL +#define RDMA_IB_IP_PS_TCP 0x0000000001060000ULL +#define RDMA_IB_IP_PS_UDP 0x0000000001110000ULL +#define RDMA_IB_IP_PS_IB 0x00000000013F0000ULL + struct rdma_addr { struct sockaddr_storage src_addr; struct sockaddr_storage dst_addr; @@ -93,6 +98,7 @@ struct rdma_conn_param { /* Fields below ignored if a QP is created on the rdma_cm_id. */ u8 srq; u32 qp_num; + u32 qkey; }; struct rdma_ud_param { @@ -367,4 +373,11 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse); */ int rdma_set_afonly(struct rdma_cm_id *id, int afonly); + /** + * rdma_get_service_id - Return the IB service ID for a specified address. + * @id: Communication identifier associated with the address. + * @addr: Address for the service ID. + */ +__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr); + #endif /* RDMA_CM_H */ diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 1ee9239..99b80ab 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -45,8 +45,8 @@ enum { RDMA_USER_CM_CMD_CREATE_ID, RDMA_USER_CM_CMD_DESTROY_ID, - RDMA_USER_CM_CMD_BIND_ADDR, - RDMA_USER_CM_CMD_RESOLVE_ADDR, + RDMA_USER_CM_CMD_BIND_IP, + RDMA_USER_CM_CMD_RESOLVE_IP, RDMA_USER_CM_CMD_RESOLVE_ROUTE, RDMA_USER_CM_CMD_QUERY_ROUTE, RDMA_USER_CM_CMD_CONNECT, @@ -59,9 +59,13 @@ enum { RDMA_USER_CM_CMD_GET_OPTION, RDMA_USER_CM_CMD_SET_OPTION, RDMA_USER_CM_CMD_NOTIFY, - RDMA_USER_CM_CMD_JOIN_MCAST, + RDMA_USER_CM_CMD_JOIN_IP_MCAST, RDMA_USER_CM_CMD_LEAVE_MCAST, - RDMA_USER_CM_CMD_MIGRATE_ID + RDMA_USER_CM_CMD_MIGRATE_ID, + RDMA_USER_CM_CMD_QUERY, + RDMA_USER_CM_CMD_BIND, + RDMA_USER_CM_CMD_RESOLVE_ADDR, + RDMA_USER_CM_CMD_JOIN_MCAST }; /* @@ -95,28 +99,51 @@ struct rdma_ucm_destroy_id_resp { __u32 events_reported; }; -struct rdma_ucm_bind_addr { +struct rdma_ucm_bind_ip { __u64 response; struct sockaddr_in6 addr; __u32 id; }; -struct rdma_ucm_resolve_addr { +struct rdma_ucm_bind { + __u32 id; + __u16 addr_size; + __u16 reserved; + struct sockaddr_storage addr; +}; + +struct rdma_ucm_resolve_ip { struct sockaddr_in6 src_addr; struct sockaddr_in6 dst_addr; __u32 id; __u32 timeout_ms; }; +struct rdma_ucm_resolve_addr { + __u32 id; + __u32 timeout_ms; + __u16 src_size; + __u16 dst_size; + __u32 reserved; + struct sockaddr_storage src_addr; + struct sockaddr_storage dst_addr; +}; + struct rdma_ucm_resolve_route { __u32 id; __u32 timeout_ms; }; -struct rdma_ucm_query_route { +enum { + RDMA_USER_CM_QUERY_ADDR, + RDMA_USER_CM_QUERY_PATH, + RDMA_USER_CM_QUERY_GID +}; + +struct rdma_ucm_query { __u64 response; __u32 id; - __u32 reserved; + __u32 option; }; struct rdma_ucm_query_route_resp { @@ -129,9 +156,26 @@ struct rdma_ucm_query_route_resp { __u8 reserved[3]; }; +struct rdma_ucm_query_addr_resp { + __u64 node_guid; + __u8 port_num; + __u8 reserved; + __u16 pkey; + __u16 src_size; + __u16 dst_size; + struct sockaddr_storage src_addr; + struct sockaddr_storage dst_addr; +}; + +struct rdma_ucm_query_path_resp { + __u32 num_paths; + __u32 reserved; + struct ib_path_rec_data path_data[0]; +}; + struct rdma_ucm_conn_param { __u32 qp_num; - __u32 reserved; + __u32 qkey; __u8 private_data[RDMA_MAX_PRIVATE_DATA]; __u8 private_data_len; __u8 srq; @@ -192,13 +236,22 @@ struct rdma_ucm_notify { __u32 event; }; -struct rdma_ucm_join_mcast { +struct rdma_ucm_join_ip_mcast { __u64 response; /* rdma_ucm_create_id_resp */ __u64 uid; struct sockaddr_in6 addr; __u32 id; }; +struct rdma_ucm_join_mcast { + __u64 response; /* rdma_ucma_create_id_resp */ + __u64 uid; + __u32 id; + __u16 addr_size; + __u16 reserved; + struct sockaddr_storage addr; +}; + struct rdma_ucm_get_event { __u64 response; }; |