diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-23 15:53:26 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-23 15:53:26 -0700 |
commit | f9d1b5a31ab02208e29631756630739175cdaa02 (patch) | |
tree | 65cc0f69edd1b7198f9c4428e56942d054de0562 | |
parent | 2ad7b44f5dd63a34c8853ce55f7d3d351b2cbd33 (diff) | |
parent | 8e4349d13f3365273d2ff17667b36f7e846df912 (diff) | |
download | op-kernel-dev-f9d1b5a31ab02208e29631756630739175cdaa02.zip op-kernel-dev-f9d1b5a31ab02208e29631756630739175cdaa02.tar.gz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford:
- a large cleanup of how device capabilities are checked for various
features
- additional cleanups in the MAD processing
- update to the srp driver
- creation and use of centralized log message helpers
- add const to a number of args to calls and clean up call chain
- add support for extended cq create verb
- add support for timestamps on cq completion
- add support for processing OPA MAD packets
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (92 commits)
IB/mad: Add final OPA MAD processing
IB/mad: Add partial Intel OPA MAD support
IB/mad: Add partial Intel OPA MAD support
IB/core: Add OPA MAD core capability flag
IB/mad: Add support for additional MAD info to/from drivers
IB/mad: Convert allocations from kmem_cache to kzalloc
IB/core: Add ability for drivers to report an alternate MAD size.
IB/mad: Support alternate Base Versions when creating MADs
IB/mad: Create a generic helper for DR forwarding checks
IB/mad: Create a generic helper for DR SMP Recv processing
IB/mad: Create a generic helper for DR SMP Send processing
IB/mad: Split IB SMI handling from MAD Recv handler
IB/mad cleanup: Generalize processing of MAD data
IB/mad cleanup: Clean up function params -- find_mad_agent
IB/mlx4: Add support for CQ time-stamping
IB/mlx4: Add mmap call to map the hardware clock
IB/core: Pass hardware specific data in query_device
IB/core: Add timestamp_mask and hca_core_clock to query_device
IB/core: Extend ib_uverbs_create_cq
IB/core: Add CQ creation time-stamping flag
...
112 files changed, 2901 insertions, 1364 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 38339d2..746cdf5 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -457,8 +457,8 @@ static void resolve_cb(int status, struct sockaddr *src_addr, complete(&((struct resolve_cb_context *)context)->comp); } -int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, - u16 *vlan_id) +int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, + u8 *dmac, u16 *vlan_id) { int ret = 0; struct rdma_dev_addr dev_addr; diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index f6d2961..c7dcfe4 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -54,7 +54,7 @@ static DEFINE_SPINLOCK(ib_agent_port_list_lock); static LIST_HEAD(ib_agent_port_list); static struct ib_agent_port_private * -__ib_get_agent_port(struct ib_device *device, int port_num) +__ib_get_agent_port(const struct ib_device *device, int port_num) { struct ib_agent_port_private *entry; @@ -67,7 +67,7 @@ __ib_get_agent_port(struct ib_device *device, int port_num) } static struct ib_agent_port_private * -ib_get_agent_port(struct ib_device *device, int port_num) +ib_get_agent_port(const struct ib_device *device, int port_num) { struct ib_agent_port_private *entry; unsigned long flags; @@ -78,9 +78,9 @@ ib_get_agent_port(struct ib_device *device, int port_num) return entry; } -void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, - struct ib_wc *wc, struct ib_device *device, - int port_num, int qpn) +void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *grh, + const struct ib_wc *wc, const struct ib_device *device, + int port_num, int qpn, size_t resp_mad_len, bool opa) { struct ib_agent_port_private *port_priv; struct ib_mad_agent *agent; @@ -106,15 +106,20 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, return; } + if (opa && mad_hdr->base_version != OPA_MGMT_BASE_VERSION) + resp_mad_len = IB_MGMT_MAD_SIZE; + send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0, - IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, - GFP_KERNEL); + IB_MGMT_MAD_HDR, + resp_mad_len - IB_MGMT_MAD_HDR, + GFP_KERNEL, + mad_hdr->base_version); if (IS_ERR(send_buf)) { dev_err(&device->dev, "ib_create_send_mad error\n"); goto err1; } - memcpy(send_buf->mad, mad, sizeof *mad); + memcpy(send_buf->mad, mad_hdr, resp_mad_len); send_buf->ah = ah; if (device->node_type == RDMA_NODE_IB_SWITCH) { @@ -156,7 +161,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num) goto error1; } - if (rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND) { + if (rdma_cap_ib_smi(device, port_num)) { /* Obtain send only MAD agent for SMI QP */ port_priv->agent[0] = ib_register_mad_agent(device, port_num, IB_QPT_SMI, NULL, 0, diff --git a/drivers/infiniband/core/agent.h b/drivers/infiniband/core/agent.h index 6669287..65f92be 100644 --- a/drivers/infiniband/core/agent.h +++ b/drivers/infiniband/core/agent.h @@ -44,8 +44,8 @@ extern int ib_agent_port_open(struct ib_device *device, int port_num); extern int ib_agent_port_close(struct ib_device *device, int port_num); -extern void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, - struct ib_wc *wc, struct ib_device *device, - int port_num, int qpn); +extern void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *grh, + const struct ib_wc *wc, const struct ib_device *device, + int port_num, int qpn, size_t resp_mad_len, bool opa); #endif /* __AGENT_H_ */ diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 80f6cf2..871da83 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -58,17 +58,6 @@ struct ib_update_work { u8 port_num; }; -static inline int start_port(struct ib_device *device) -{ - return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; -} - -static inline int end_port(struct ib_device *device) -{ - return (device->node_type == RDMA_NODE_IB_SWITCH) ? - 0 : device->phys_port_cnt; -} - int ib_get_cached_gid(struct ib_device *device, u8 port_num, int index, @@ -78,12 +67,12 @@ int ib_get_cached_gid(struct ib_device *device, unsigned long flags; int ret = 0; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - cache = device->cache.gid_cache[port_num - start_port(device)]; + cache = device->cache.gid_cache[port_num - rdma_start_port(device)]; if (index < 0 || index >= cache->table_len) ret = -EINVAL; @@ -96,10 +85,10 @@ int ib_get_cached_gid(struct ib_device *device, } EXPORT_SYMBOL(ib_get_cached_gid); -int ib_find_cached_gid(struct ib_device *device, - union ib_gid *gid, - u8 *port_num, - u16 *index) +int ib_find_cached_gid(struct ib_device *device, + const union ib_gid *gid, + u8 *port_num, + u16 *index) { struct ib_gid_cache *cache; unsigned long flags; @@ -112,11 +101,11 @@ int ib_find_cached_gid(struct ib_device *device, read_lock_irqsave(&device->cache.lock, flags); - for (p = 0; p <= end_port(device) - start_port(device); ++p) { + for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) { cache = device->cache.gid_cache[p]; for (i = 0; i < cache->table_len; ++i) { if (!memcmp(gid, &cache->table[i], sizeof *gid)) { - *port_num = p + start_port(device); + *port_num = p + rdma_start_port(device); if (index) *index = i; ret = 0; @@ -140,12 +129,12 @@ int ib_get_cached_pkey(struct ib_device *device, unsigned long flags; int ret = 0; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - cache = device->cache.pkey_cache[port_num - start_port(device)]; + cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; if (index < 0 || index >= cache->table_len) ret = -EINVAL; @@ -169,12 +158,12 @@ int ib_find_cached_pkey(struct ib_device *device, int ret = -ENOENT; int partial_ix = -1; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - cache = device->cache.pkey_cache[port_num - start_port(device)]; + cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; *index = -1; @@ -209,12 +198,12 @@ int ib_find_exact_cached_pkey(struct ib_device *device, int i; int ret = -ENOENT; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - cache = device->cache.pkey_cache[port_num - start_port(device)]; + cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; *index = -1; @@ -238,11 +227,11 @@ int ib_get_cached_lmc(struct ib_device *device, unsigned long flags; int ret = 0; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - *lmc = device->cache.lmc_cache[port_num - start_port(device)]; + *lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)]; read_unlock_irqrestore(&device->cache.lock, flags); return ret; @@ -303,13 +292,13 @@ static void ib_cache_update(struct ib_device *device, write_lock_irq(&device->cache.lock); - old_pkey_cache = device->cache.pkey_cache[port - start_port(device)]; - old_gid_cache = device->cache.gid_cache [port - start_port(device)]; + old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)]; + old_gid_cache = device->cache.gid_cache [port - rdma_start_port(device)]; - device->cache.pkey_cache[port - start_port(device)] = pkey_cache; - device->cache.gid_cache [port - start_port(device)] = gid_cache; + device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache; + device->cache.gid_cache [port - rdma_start_port(device)] = gid_cache; - device->cache.lmc_cache[port - start_port(device)] = tprops->lmc; + device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc; write_unlock_irq(&device->cache.lock); @@ -363,14 +352,14 @@ static void ib_cache_setup_one(struct ib_device *device) device->cache.pkey_cache = kmalloc(sizeof *device->cache.pkey_cache * - (end_port(device) - start_port(device) + 1), GFP_KERNEL); + (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL); device->cache.gid_cache = kmalloc(sizeof *device->cache.gid_cache * - (end_port(device) - start_port(device) + 1), GFP_KERNEL); + (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL); device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache * - (end_port(device) - - start_port(device) + 1), + (rdma_end_port(device) - + rdma_start_port(device) + 1), GFP_KERNEL); if (!device->cache.pkey_cache || !device->cache.gid_cache || @@ -380,10 +369,10 @@ static void ib_cache_setup_one(struct ib_device *device) goto err; } - for (p = 0; p <= end_port(device) - start_port(device); ++p) { + for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) { device->cache.pkey_cache[p] = NULL; device->cache.gid_cache [p] = NULL; - ib_cache_update(device, p + start_port(device)); + ib_cache_update(device, p + rdma_start_port(device)); } INIT_IB_EVENT_HANDLER(&device->cache.event_handler, @@ -394,7 +383,7 @@ static void ib_cache_setup_one(struct ib_device *device) return; err_cache: - for (p = 0; p <= end_port(device) - start_port(device); ++p) { + for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) { kfree(device->cache.pkey_cache[p]); kfree(device->cache.gid_cache[p]); } @@ -412,7 +401,7 @@ static void ib_cache_cleanup_one(struct ib_device *device) ib_unregister_event_handler(&device->cache.event_handler); flush_workqueue(ib_wq); - for (p = 0; p <= end_port(device) - start_port(device); ++p) { + for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) { kfree(device->cache.pkey_cache[p]); kfree(device->cache.gid_cache[p]); } diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0271608..dbddddd 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -267,7 +267,8 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, cm_id_priv->av.pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, - GFP_ATOMIC); + GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { ib_destroy_ah(ah); return PTR_ERR(m); @@ -297,7 +298,8 @@ static int cm_alloc_response_msg(struct cm_port *port, m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, - GFP_ATOMIC); + GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { ib_destroy_ah(ah); return PTR_ERR(m); @@ -3759,11 +3761,9 @@ static void cm_add_one(struct ib_device *ib_device) }; unsigned long flags; int ret; + int count = 0; u8 i; - if (rdma_node_get_transport(ib_device->node_type) != RDMA_TRANSPORT_IB) - return; - cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) * ib_device->phys_port_cnt, GFP_KERNEL); if (!cm_dev) @@ -3782,6 +3782,9 @@ static void cm_add_one(struct ib_device *ib_device) set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); for (i = 1; i <= ib_device->phys_port_cnt; i++) { + if (!rdma_cap_ib_cm(ib_device, i)) + continue; + port = kzalloc(sizeof *port, GFP_KERNEL); if (!port) goto error1; @@ -3808,7 +3811,13 @@ static void cm_add_one(struct ib_device *ib_device) ret = ib_modify_port(ib_device, i, 0, &port_modify); if (ret) goto error3; + + count++; } + + if (!count) + goto free; + ib_set_client_data(ib_device, &cm_client, cm_dev); write_lock_irqsave(&cm.device_lock, flags); @@ -3824,11 +3833,15 @@ error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; while (--i) { + if (!rdma_cap_ib_cm(ib_device, i)) + continue; + port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } +free: device_unregister(cm_dev->device); kfree(cm_dev); } @@ -3852,6 +3865,9 @@ static void cm_remove_one(struct ib_device *ib_device) write_unlock_irqrestore(&cm.device_lock, flags); for (i = 1; i <= ib_device->phys_port_cnt; i++) { + if (!rdma_cap_ib_cm(ib_device, i)) + continue; + port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 38ffe09..143ded2 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -65,6 +65,34 @@ MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) #define CMA_IBOE_PACKET_LIFETIME 18 +static const char * const cma_events[] = { + [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", + [RDMA_CM_EVENT_ADDR_ERROR] = "address error", + [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", + [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", + [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", + [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", + [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", + [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", + [RDMA_CM_EVENT_REJECTED] = "rejected", + [RDMA_CM_EVENT_ESTABLISHED] = "established", + [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", + [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", + [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", + [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", + [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", + [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", +}; + +const char *rdma_event_msg(enum rdma_cm_event_type event) +{ + size_t index = event; + + return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? + cma_events[index] : "unrecognized event"; +} +EXPORT_SYMBOL(rdma_event_msg); + static void cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device); @@ -349,18 +377,35 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a return ret; } +static inline int cma_validate_port(struct ib_device *device, u8 port, + union ib_gid *gid, int dev_type) +{ + u8 found_port; + int ret = -ENODEV; + + if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) + return ret; + + if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) + return ret; + + ret = ib_find_cached_gid(device, gid, &found_port, NULL); + if (port != found_port) + return -ENODEV; + + return ret; +} + static int cma_acquire_dev(struct rdma_id_private *id_priv, struct rdma_id_private *listen_id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct cma_device *cma_dev; - union ib_gid gid, iboe_gid; + union ib_gid gid, iboe_gid, *gidp; int ret = -ENODEV; - u8 port, found_port; - enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ? - IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; + u8 port; - if (dev_ll != IB_LINK_LAYER_INFINIBAND && + if (dev_addr->dev_type != ARPHRD_INFINIBAND && id_priv->id.ps == RDMA_PS_IPOIB) return -EINVAL; @@ -370,41 +415,36 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, memcpy(&gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof gid); - if (listen_id_priv && - rdma_port_get_link_layer(listen_id_priv->id.device, - listen_id_priv->id.port_num) == dev_ll) { + + if (listen_id_priv) { cma_dev = listen_id_priv->cma_dev; port = listen_id_priv->id.port_num; - if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && - rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) - ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, - &found_port, NULL); - else - ret = ib_find_cached_gid(cma_dev->device, &gid, - &found_port, NULL); + gidp = rdma_protocol_roce(cma_dev->device, port) ? + &iboe_gid : &gid; - if (!ret && (port == found_port)) { - id_priv->id.port_num = found_port; + ret = cma_validate_port(cma_dev->device, port, gidp, + dev_addr->dev_type); + if (!ret) { + id_priv->id.port_num = port; goto out; } } + list_for_each_entry(cma_dev, &dev_list, list) { for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { if (listen_id_priv && listen_id_priv->cma_dev == cma_dev && listen_id_priv->id.port_num == port) continue; - if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) { - if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && - rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) - ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL); - else - ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL); - - if (!ret && (port == found_port)) { - id_priv->id.port_num = found_port; - goto out; - } + + gidp = rdma_protocol_roce(cma_dev->device, port) ? + &iboe_gid : &gid; + + ret = cma_validate_port(cma_dev->device, port, gidp, + dev_addr->dev_type); + if (!ret) { + id_priv->id.port_num = port; + goto out; } } } @@ -435,10 +475,10 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) pkey = ntohs(addr->sib_pkey); list_for_each_entry(cur_dev, &dev_list, list) { - if (rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB) - continue; - for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { + if (!rdma_cap_af_ib(cur_dev->device, p)) + continue; + if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) continue; @@ -633,10 +673,9 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, if (ret) goto out; - if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) - == RDMA_TRANSPORT_IB && - rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) - == IB_LINK_LAYER_ETHERNET) { + BUG_ON(id_priv->cma_dev->device != id_priv->id.device); + + if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) { ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL); if (ret) @@ -700,11 +739,10 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, int ret; u16 pkey; - if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) == - IB_LINK_LAYER_INFINIBAND) - pkey = ib_addr_get_pkey(dev_addr); - else + if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) pkey = 0xffff; + else + pkey = ib_addr_get_pkey(dev_addr); ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, pkey, &qp_attr->pkey_index); @@ -735,8 +773,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, int ret = 0; id_priv = container_of(id, struct rdma_id_private, id); - switch (rdma_node_get_transport(id_priv->id.device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, id->port_num)) { if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); else @@ -745,19 +782,15 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, if (qp_attr->qp_state == IB_QPS_RTR) qp_attr->rq_psn = id_priv->seq_num; - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, id->port_num)) { if (!id_priv->cm_id.iw) { qp_attr->qp_access_flags = 0; *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; } else ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, qp_attr_mask); - break; - default: + } else ret = -ENOSYS; - break; - } return ret; } @@ -945,13 +978,9 @@ static inline int cma_user_data_offset(struct rdma_id_private *id_priv) static void cma_cancel_route(struct rdma_id_private *id_priv) { - switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) { - case IB_LINK_LAYER_INFINIBAND: + if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { if (id_priv->query) ib_sa_cancel_query(id_priv->query_id, id_priv->query); - break; - default: - break; } } @@ -1023,17 +1052,12 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) mc = container_of(id_priv->mc_list.next, struct cma_multicast, list); list_del(&mc->list); - switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) { - case IB_LINK_LAYER_INFINIBAND: + if (rdma_cap_ib_mcast(id_priv->cma_dev->device, + id_priv->id.port_num)) { ib_sa_free_multicast(mc->multicast.ib); kfree(mc); - break; - case IB_LINK_LAYER_ETHERNET: + } else kref_put(&mc->mcref, release_mc); - break; - default: - break; - } } } @@ -1054,17 +1078,12 @@ void rdma_destroy_id(struct rdma_cm_id *id) mutex_unlock(&id_priv->handler_mutex); if (id_priv->cma_dev) { - switch (rdma_node_get_transport(id_priv->id.device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.ib) ib_destroy_cm_id(id_priv->cm_id.ib); - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.iw) iw_destroy_cm_id(id_priv->cm_id.iw); - break; - default: - break; } cma_leave_mc_groups(id_priv); cma_release_dev(id_priv); @@ -1610,6 +1629,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) if (IS_ERR(id)) return PTR_ERR(id); + id->tos = id_priv->tos; id_priv->cm_id.iw = id; memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), @@ -1642,8 +1662,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, struct rdma_cm_id *id; int ret; - if (cma_family(id_priv) == AF_IB && - rdma_node_get_transport(cma_dev->device->node_type) != RDMA_TRANSPORT_IB) + if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return; id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, @@ -1984,26 +2003,15 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) return -EINVAL; atomic_inc(&id_priv->refcount); - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: - switch (rdma_port_get_link_layer(id->device, id->port_num)) { - case IB_LINK_LAYER_INFINIBAND: - ret = cma_resolve_ib_route(id_priv, timeout_ms); - break; - case IB_LINK_LAYER_ETHERNET: - ret = cma_resolve_iboe_route(id_priv); - break; - default: - ret = -ENOSYS; - } - break; - case RDMA_TRANSPORT_IWARP: + if (rdma_cap_ib_sa(id->device, id->port_num)) + ret = cma_resolve_ib_route(id_priv, timeout_ms); + else if (rdma_protocol_roce(id->device, id->port_num)) + ret = cma_resolve_iboe_route(id_priv); + else if (rdma_protocol_iwarp(id->device, id->port_num)) ret = cma_resolve_iw_route(id_priv, timeout_ms); - break; - default: + else ret = -ENOSYS; - break; - } + if (ret) goto err; @@ -2045,7 +2053,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv) mutex_lock(&lock); list_for_each_entry(cur_dev, &dev_list, list) { if (cma_family(id_priv) == AF_IB && - rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB) + !rdma_cap_ib_cm(cur_dev->device, 1)) continue; if (!cma_dev) @@ -2077,7 +2085,7 @@ port_found: goto out; id_priv->id.route.addr.dev_addr.dev_type = - (rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ? + (rdma_protocol_ib(cma_dev->device, p)) ? ARPHRD_INFINIBAND : ARPHRD_ETHER; rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); @@ -2554,18 +2562,15 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) id_priv->backlog = backlog; if (id->device) { - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, 1)) { ret = cma_ib_listen(id_priv); if (ret) goto err; - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, 1)) { ret = cma_iw_listen(id_priv, backlog); if (ret) goto err; - break; - default: + } else { ret = -ENOSYS; goto err; } @@ -2857,6 +2862,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, if (IS_ERR(cm_id)) return PTR_ERR(cm_id); + cm_id->tos = id_priv->tos; id_priv->cm_id.iw = cm_id; memcpy(&cm_id->local_addr, cma_src_addr(id_priv), @@ -2901,20 +2907,15 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) id_priv->srq = conn_param->srq; } - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, id->port_num)) { if (id->qp_type == IB_QPT_UD) ret = cma_resolve_ib_udp(id_priv, conn_param); else ret = cma_connect_ib(id_priv, conn_param); - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, id->port_num)) ret = cma_connect_iw(id_priv, conn_param); - break; - default: + else ret = -ENOSYS; - break; - } if (ret) goto err; @@ -3017,8 +3018,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) id_priv->srq = conn_param->srq; } - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, id->port_num)) { if (id->qp_type == IB_QPT_UD) { if (conn_param) ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, @@ -3034,14 +3034,10 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) else ret = cma_rep_recv(id_priv); } - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, id->port_num)) ret = cma_accept_iw(id_priv, conn_param); - break; - default: + else ret = -ENOSYS; - break; - } if (ret) goto reject; @@ -3085,8 +3081,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, if (!id_priv->cm_id.ib) return -EINVAL; - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, id->port_num)) { if (id->qp_type == IB_QPT_UD) ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, private_data, private_data_len); @@ -3094,15 +3089,12 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, ret = ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, private_data, private_data_len); - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_reject(id_priv->cm_id.iw, private_data, private_data_len); - break; - default: + } else ret = -ENOSYS; - break; - } + return ret; } EXPORT_SYMBOL(rdma_reject); @@ -3116,22 +3108,18 @@ int rdma_disconnect(struct rdma_cm_id *id) if (!id_priv->cm_id.ib) return -EINVAL; - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, id->port_num)) { ret = cma_modify_qp_err(id_priv); if (ret) goto out; /* Initiate or respond to a disconnect. */ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); - break; - default: + } else ret = -EINVAL; - break; - } + out: return ret; } @@ -3377,24 +3365,13 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, list_add(&mc->list, &id_priv->mc_list); spin_unlock(&id_priv->lock); - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: - switch (rdma_port_get_link_layer(id->device, id->port_num)) { - case IB_LINK_LAYER_INFINIBAND: - ret = cma_join_ib_multicast(id_priv, mc); - break; - case IB_LINK_LAYER_ETHERNET: - kref_init(&mc->mcref); - ret = cma_iboe_join_multicast(id_priv, mc); - break; - default: - ret = -EINVAL; - } - break; - default: + if (rdma_protocol_roce(id->device, id->port_num)) { + kref_init(&mc->mcref); + ret = cma_iboe_join_multicast(id_priv, mc); + } else if (rdma_cap_ib_mcast(id->device, id->port_num)) + ret = cma_join_ib_multicast(id_priv, mc); + else ret = -ENOSYS; - break; - } if (ret) { spin_lock_irq(&id_priv->lock); @@ -3422,19 +3399,15 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) ib_detach_mcast(id->qp, &mc->multicast.ib->rec.mgid, be16_to_cpu(mc->multicast.ib->rec.mlid)); - if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) { - switch (rdma_port_get_link_layer(id->device, id->port_num)) { - case IB_LINK_LAYER_INFINIBAND: - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); - break; - case IB_LINK_LAYER_ETHERNET: - kref_put(&mc->mcref, release_mc); - break; - default: - break; - } - } + + BUG_ON(id_priv->cma_dev->device != id->device); + + if (rdma_cap_ib_mcast(id->device, id->port_num)) { + ib_sa_free_multicast(mc->multicast.ib); + kfree(mc); + } else if (rdma_protocol_roce(id->device, id->port_num)) + kref_put(&mc->mcref, release_mc); + return; } } diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 18c1ece..9567756 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -92,7 +92,8 @@ static int ib_device_check_mandatory(struct ib_device *device) IB_MANDATORY_FUNC(poll_cq), IB_MANDATORY_FUNC(req_notify_cq), IB_MANDATORY_FUNC(get_dma_mr), - IB_MANDATORY_FUNC(dereg_mr) + IB_MANDATORY_FUNC(dereg_mr), + IB_MANDATORY_FUNC(get_port_immutable) }; int i; @@ -151,18 +152,6 @@ static int alloc_name(char *name) return 0; } -static int start_port(struct ib_device *device) -{ - return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; -} - - -static int end_port(struct ib_device *device) -{ - return (device->node_type == RDMA_NODE_IB_SWITCH) ? - 0 : device->phys_port_cnt; -} - /** * ib_alloc_device - allocate an IB device struct * @size:size of structure to allocate @@ -222,42 +211,49 @@ static int add_client_context(struct ib_device *device, struct ib_client *client return 0; } -static int read_port_table_lengths(struct ib_device *device) +static int verify_immutable(const struct ib_device *dev, u8 port) { - struct ib_port_attr *tprops = NULL; - int num_ports, ret = -ENOMEM; - u8 port_index; - - tprops = kmalloc(sizeof *tprops, GFP_KERNEL); - if (!tprops) - goto out; - - num_ports = end_port(device) - start_port(device) + 1; + return WARN_ON(!rdma_cap_ib_mad(dev, port) && + rdma_max_mad_size(dev, port) != 0); +} - device->pkey_tbl_len = kmalloc(sizeof *device->pkey_tbl_len * num_ports, - GFP_KERNEL); - device->gid_tbl_len = kmalloc(sizeof *device->gid_tbl_len * num_ports, - GFP_KERNEL); - if (!device->pkey_tbl_len || !device->gid_tbl_len) +static int read_port_immutable(struct ib_device *device) +{ + int ret = -ENOMEM; + u8 start_port = rdma_start_port(device); + u8 end_port = rdma_end_port(device); + u8 port; + + /** + * device->port_immutable is indexed directly by the port number to make + * access to this data as efficient as possible. + * + * Therefore port_immutable is declared as a 1 based array with + * potential empty slots at the beginning. + */ + device->port_immutable = kzalloc(sizeof(*device->port_immutable) + * (end_port + 1), + GFP_KERNEL); + if (!device->port_immutable) goto err; - for (port_index = 0; port_index < num_ports; ++port_index) { - ret = ib_query_port(device, port_index + start_port(device), - tprops); + for (port = start_port; port <= end_port; ++port) { + ret = device->get_port_immutable(device, port, + &device->port_immutable[port]); if (ret) goto err; - device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len; - device->gid_tbl_len[port_index] = tprops->gid_tbl_len; + + if (verify_immutable(device, port)) { + ret = -EINVAL; + goto err; + } } ret = 0; goto out; - err: - kfree(device->gid_tbl_len); - kfree(device->pkey_tbl_len); + kfree(device->port_immutable); out: - kfree(tprops); return ret; } @@ -294,9 +290,9 @@ int ib_register_device(struct ib_device *device, spin_lock_init(&device->event_handler_lock); spin_lock_init(&device->client_data_lock); - ret = read_port_table_lengths(device); + ret = read_port_immutable(device); if (ret) { - printk(KERN_WARNING "Couldn't create table lengths cache for device %s\n", + printk(KERN_WARNING "Couldn't create per port immutable data %s\n", device->name); goto out; } @@ -305,8 +301,7 @@ int ib_register_device(struct ib_device *device, if (ret) { printk(KERN_WARNING "Couldn't register device %s with driver model\n", device->name); - kfree(device->gid_tbl_len); - kfree(device->pkey_tbl_len); + kfree(device->port_immutable); goto out; } @@ -348,9 +343,6 @@ void ib_unregister_device(struct ib_device *device) list_del(&device->core_list); - kfree(device->gid_tbl_len); - kfree(device->pkey_tbl_len); - mutex_unlock(&device_mutex); ib_device_unregister_sysfs(device); @@ -558,7 +550,11 @@ EXPORT_SYMBOL(ib_dispatch_event); int ib_query_device(struct ib_device *device, struct ib_device_attr *device_attr) { - return device->query_device(device, device_attr); + struct ib_udata uhw = {.outlen = 0, .inlen = 0}; + + memset(device_attr, 0, sizeof(*device_attr)); + + return device->query_device(device, device_attr, &uhw); } EXPORT_SYMBOL(ib_query_device); @@ -575,7 +571,7 @@ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; return device->query_port(device, port_num, port_attr); @@ -653,7 +649,7 @@ int ib_modify_port(struct ib_device *device, if (!device->modify_port) return -ENOSYS; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; return device->modify_port(device, port_num, port_modify_mask, @@ -676,8 +672,8 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, union ib_gid tmp_gid; int ret, port, i; - for (port = start_port(device); port <= end_port(device); ++port) { - for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) { + for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { + for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { ret = ib_query_gid(device, port, i, &tmp_gid); if (ret) return ret; @@ -709,7 +705,7 @@ int ib_find_pkey(struct ib_device *device, u16 tmp_pkey; int partial_ix = -1; - for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) { + for (i = 0; i < device->port_immutable[port_num].pkey_tbl_len; ++i) { ret = ib_query_pkey(device, port_num, i, &tmp_pkey); if (ret) return ret; diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 74c30f4..a4b1466 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3,6 +3,7 @@ * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -44,6 +45,7 @@ #include "mad_priv.h" #include "mad_rmpp.h" #include "smi.h" +#include "opa_smi.h" #include "agent.h" MODULE_LICENSE("Dual BSD/GPL"); @@ -59,8 +61,6 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); -static struct kmem_cache *ib_mad_cache; - static struct list_head ib_mad_port_list; static u32 ib_mad_client_id = 0; @@ -73,7 +73,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method, static void remove_mad_reg_req(struct ib_mad_agent_private *priv); static struct ib_mad_agent_private *find_mad_agent( struct ib_mad_port_private *port_priv, - struct ib_mad *mad); + const struct ib_mad_hdr *mad); static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad); static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); @@ -179,12 +179,12 @@ static int is_vendor_method_in_use( return 0; } -int ib_response_mad(struct ib_mad *mad) +int ib_response_mad(const struct ib_mad_hdr *hdr) { - return ((mad->mad_hdr.method & IB_MGMT_METHOD_RESP) || - (mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) || - ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_BM) && - (mad->mad_hdr.attr_mod & IB_BM_ATTR_MOD_RESP))); + return ((hdr->method & IB_MGMT_METHOD_RESP) || + (hdr->method == IB_MGMT_METHOD_TRAP_REPRESS) || + ((hdr->mgmt_class == IB_MGMT_CLASS_BM) && + (hdr->attr_mod & IB_BM_ATTR_MOD_RESP))); } EXPORT_SYMBOL(ib_response_mad); @@ -717,6 +717,32 @@ static void build_smp_wc(struct ib_qp *qp, wc->port_num = port_num; } +static size_t mad_priv_size(const struct ib_mad_private *mp) +{ + return sizeof(struct ib_mad_private) + mp->mad_size; +} + +static struct ib_mad_private *alloc_mad_private(size_t mad_size, gfp_t flags) +{ + size_t size = sizeof(struct ib_mad_private) + mad_size; + struct ib_mad_private *ret = kzalloc(size, flags); + + if (ret) + ret->mad_size = mad_size; + + return ret; +} + +static size_t port_mad_size(const struct ib_mad_port_private *port_priv) +{ + return rdma_max_mad_size(port_priv->device, port_priv->port_num); +} + +static size_t mad_priv_dma_size(const struct ib_mad_private *mp) +{ + return sizeof(struct ib_grh) + mp->mad_size; +} + /* * Return 0 if SMP is to be sent * Return 1 if SMP was consumed locally (whether or not solicited) @@ -727,6 +753,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, { int ret = 0; struct ib_smp *smp = mad_send_wr->send_buf.mad; + struct opa_smp *opa_smp = (struct opa_smp *)smp; unsigned long flags; struct ib_mad_local_private *local; struct ib_mad_private *mad_priv; @@ -736,6 +763,11 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, u8 port_num; struct ib_wc mad_wc; struct ib_send_wr *send_wr = &mad_send_wr->send_wr; + size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv); + u16 out_mad_pkey_index = 0; + u16 drslid; + bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, + mad_agent_priv->qp_info->port_priv->port_num); if (device->node_type == RDMA_NODE_IB_SWITCH && smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) @@ -749,19 +781,48 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, * If we are at the start of the LID routed part, don't update the * hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec. */ - if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == - IB_LID_PERMISSIVE && - smi_handle_dr_smp_send(smp, device->node_type, port_num) == - IB_SMI_DISCARD) { - ret = -EINVAL; - dev_err(&device->dev, "Invalid directed route\n"); - goto out; - } + if (opa && smp->class_version == OPA_SMP_CLASS_VERSION) { + u32 opa_drslid; + + if ((opa_get_smp_direction(opa_smp) + ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) == + OPA_LID_PERMISSIVE && + opa_smi_handle_dr_smp_send(opa_smp, device->node_type, + port_num) == IB_SMI_DISCARD) { + ret = -EINVAL; + dev_err(&device->dev, "OPA Invalid directed route\n"); + goto out; + } + opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid); + if (opa_drslid != OPA_LID_PERMISSIVE && + opa_drslid & 0xffff0000) { + ret = -EINVAL; + dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n", + opa_drslid); + goto out; + } + drslid = (u16)(opa_drslid & 0x0000ffff); - /* Check to post send on QP or process locally */ - if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && - smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) - goto out; + /* Check to post send on QP or process locally */ + if (opa_smi_check_local_smp(opa_smp, device) == IB_SMI_DISCARD && + opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD) + goto out; + } else { + if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == + IB_LID_PERMISSIVE && + smi_handle_dr_smp_send(smp, device->node_type, port_num) == + IB_SMI_DISCARD) { + ret = -EINVAL; + dev_err(&device->dev, "Invalid directed route\n"); + goto out; + } + drslid = be16_to_cpu(smp->dr_slid); + + /* Check to post send on QP or process locally */ + if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && + smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) + goto out; + } local = kmalloc(sizeof *local, GFP_ATOMIC); if (!local) { @@ -771,7 +832,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, } local->mad_priv = NULL; local->recv_mad_agent = NULL; - mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC); + mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC); if (!mad_priv) { ret = -ENOMEM; dev_err(&device->dev, "No memory for local response MAD\n"); @@ -780,18 +841,25 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, } build_smp_wc(mad_agent_priv->agent.qp, - send_wr->wr_id, be16_to_cpu(smp->dr_slid), + send_wr->wr_id, drslid, send_wr->wr.ud.pkey_index, send_wr->wr.ud.port_num, &mad_wc); + if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) { + mad_wc.byte_len = mad_send_wr->send_buf.hdr_len + + mad_send_wr->send_buf.data_len + + sizeof(struct ib_grh); + } + /* No GRH for DR SMP */ ret = device->process_mad(device, 0, port_num, &mad_wc, NULL, - (struct ib_mad *)smp, - (struct ib_mad *)&mad_priv->mad); + (const struct ib_mad_hdr *)smp, mad_size, + (struct ib_mad_hdr *)mad_priv->mad, + &mad_size, &out_mad_pkey_index); switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: - if (ib_response_mad(&mad_priv->mad.mad) && + if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) && mad_agent_priv->agent.recv_handler) { local->mad_priv = mad_priv; local->recv_mad_agent = mad_agent_priv; @@ -801,39 +869,43 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, */ atomic_inc(&mad_agent_priv->refcount); } else - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); break; case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED: - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); break; case IB_MAD_RESULT_SUCCESS: /* Treat like an incoming receive MAD */ port_priv = ib_get_mad_port(mad_agent_priv->agent.device, mad_agent_priv->agent.port_num); if (port_priv) { - memcpy(&mad_priv->mad.mad, smp, sizeof(struct ib_mad)); + memcpy(mad_priv->mad, smp, mad_priv->mad_size); recv_mad_agent = find_mad_agent(port_priv, - &mad_priv->mad.mad); + (const struct ib_mad_hdr *)mad_priv->mad); } if (!port_priv || !recv_mad_agent) { /* * No receiving agent so drop packet and * generate send completion. */ - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); break; } local->mad_priv = mad_priv; local->recv_mad_agent = recv_mad_agent; break; default: - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); kfree(local); ret = -EINVAL; goto out; } local->mad_send_wr = mad_send_wr; + if (opa) { + local->mad_send_wr->send_wr.wr.ud.pkey_index = out_mad_pkey_index; + local->return_wc_byte_len = mad_size; + } /* Reference MAD agent until send side of local completion handled */ atomic_inc(&mad_agent_priv->refcount); /* Queue local completion to local list */ @@ -847,11 +919,11 @@ out: return ret; } -static int get_pad_size(int hdr_len, int data_len) +static int get_pad_size(int hdr_len, int data_len, size_t mad_size) { int seg_size, pad; - seg_size = sizeof(struct ib_mad) - hdr_len; + seg_size = mad_size - hdr_len; if (data_len && seg_size) { pad = seg_size - data_len % seg_size; return pad == seg_size ? 0 : pad; @@ -870,14 +942,15 @@ static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr) } static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, - gfp_t gfp_mask) + size_t mad_size, gfp_t gfp_mask) { struct ib_mad_send_buf *send_buf = &send_wr->send_buf; struct ib_rmpp_mad *rmpp_mad = send_buf->mad; struct ib_rmpp_segment *seg = NULL; int left, seg_size, pad; - send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len; + send_buf->seg_size = mad_size - send_buf->hdr_len; + send_buf->seg_rmpp_size = mad_size - IB_MGMT_RMPP_HDR; seg_size = send_buf->seg_size; pad = send_wr->pad; @@ -910,7 +983,7 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, return 0; } -int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent) +int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent) { return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP); } @@ -920,26 +993,37 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, int rmpp_active, int hdr_len, int data_len, - gfp_t gfp_mask) + gfp_t gfp_mask, + u8 base_version) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; int pad, message_size, ret, size; void *buf; + size_t mad_size; + bool opa; mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); - pad = get_pad_size(hdr_len, data_len); + + opa = rdma_cap_opa_mad(mad_agent->device, mad_agent->port_num); + + if (opa && base_version == OPA_MGMT_BASE_VERSION) + mad_size = sizeof(struct opa_mad); + else + mad_size = sizeof(struct ib_mad); + + pad = get_pad_size(hdr_len, data_len, mad_size); message_size = hdr_len + data_len + pad; if (ib_mad_kernel_rmpp_agent(mad_agent)) { - if (!rmpp_active && message_size > sizeof(struct ib_mad)) + if (!rmpp_active && message_size > mad_size) return ERR_PTR(-EINVAL); } else - if (rmpp_active || message_size > sizeof(struct ib_mad)) + if (rmpp_active || message_size > mad_size) return ERR_PTR(-EINVAL); - size = rmpp_active ? hdr_len : sizeof(struct ib_mad); + size = rmpp_active ? hdr_len : mad_size; buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask); if (!buf) return ERR_PTR(-ENOMEM); @@ -954,7 +1038,14 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, mad_send_wr->mad_agent_priv = mad_agent_priv; mad_send_wr->sg_list[0].length = hdr_len; mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey; - mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len; + + /* OPA MADs don't have to be the full 2048 bytes */ + if (opa && base_version == OPA_MGMT_BASE_VERSION && + data_len < mad_size - hdr_len) + mad_send_wr->sg_list[1].length = data_len; + else + mad_send_wr->sg_list[1].length = mad_size - hdr_len; + mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey; mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; @@ -967,7 +1058,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; if (rmpp_active) { - ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask); + ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask); if (ret) { kfree(buf); return ERR_PTR(ret); @@ -1237,7 +1328,7 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) recv_wc); priv = container_of(mad_priv_hdr, struct ib_mad_private, header); - kmem_cache_free(ib_mad_cache, priv); + kfree(priv); } } EXPORT_SYMBOL(ib_free_recv_mad); @@ -1324,7 +1415,7 @@ static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class) } static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class, - char *oui) + const char *oui) { int i; @@ -1622,13 +1713,13 @@ out: static struct ib_mad_agent_private * find_mad_agent(struct ib_mad_port_private *port_priv, - struct ib_mad *mad) + const struct ib_mad_hdr *mad_hdr) { struct ib_mad_agent_private *mad_agent = NULL; unsigned long flags; spin_lock_irqsave(&port_priv->reg_lock, flags); - if (ib_response_mad(mad)) { + if (ib_response_mad(mad_hdr)) { u32 hi_tid; struct ib_mad_agent_private *entry; @@ -1636,7 +1727,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, * Routing is based on high 32 bits of transaction ID * of MAD. */ - hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32; + hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; list_for_each_entry(entry, &port_priv->agent_list, agent_list) { if (entry->agent.hi_tid == hi_tid) { mad_agent = entry; @@ -1648,45 +1739,45 @@ find_mad_agent(struct ib_mad_port_private *port_priv, struct ib_mad_mgmt_method_table *method; struct ib_mad_mgmt_vendor_class_table *vendor; struct ib_mad_mgmt_vendor_class *vendor_class; - struct ib_vendor_mad *vendor_mad; + const struct ib_vendor_mad *vendor_mad; int index; /* * Routing is based on version, class, and method * For "newer" vendor MADs, also based on OUI */ - if (mad->mad_hdr.class_version >= MAX_MGMT_VERSION) + if (mad_hdr->class_version >= MAX_MGMT_VERSION) goto out; - if (!is_vendor_class(mad->mad_hdr.mgmt_class)) { + if (!is_vendor_class(mad_hdr->mgmt_class)) { class = port_priv->version[ - mad->mad_hdr.class_version].class; + mad_hdr->class_version].class; if (!class) goto out; - if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >= + if (convert_mgmt_class(mad_hdr->mgmt_class) >= IB_MGMT_MAX_METHODS) goto out; method = class->method_table[convert_mgmt_class( - mad->mad_hdr.mgmt_class)]; + mad_hdr->mgmt_class)]; if (method) - mad_agent = method->agent[mad->mad_hdr.method & + mad_agent = method->agent[mad_hdr->method & ~IB_MGMT_METHOD_RESP]; } else { vendor = port_priv->version[ - mad->mad_hdr.class_version].vendor; + mad_hdr->class_version].vendor; if (!vendor) goto out; vendor_class = vendor->vendor_class[vendor_class_index( - mad->mad_hdr.mgmt_class)]; + mad_hdr->mgmt_class)]; if (!vendor_class) goto out; /* Find matching OUI */ - vendor_mad = (struct ib_vendor_mad *)mad; + vendor_mad = (const struct ib_vendor_mad *)mad_hdr; index = find_vendor_oui(vendor_class, vendor_mad->oui); if (index == -1) goto out; method = vendor_class->method_table[index]; if (method) { - mad_agent = method->agent[mad->mad_hdr.method & + mad_agent = method->agent[mad_hdr->method & ~IB_MGMT_METHOD_RESP]; } } @@ -1708,20 +1799,24 @@ out: return mad_agent; } -static int validate_mad(struct ib_mad *mad, u32 qp_num) +static int validate_mad(const struct ib_mad_hdr *mad_hdr, + const struct ib_mad_qp_info *qp_info, + bool opa) { int valid = 0; + u32 qp_num = qp_info->qp->qp_num; /* Make sure MAD base version is understood */ - if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) { - pr_err("MAD received with unsupported base version %d\n", - mad->mad_hdr.base_version); + if (mad_hdr->base_version != IB_MGMT_BASE_VERSION && + (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) { + pr_err("MAD received with unsupported base version %d %s\n", + mad_hdr->base_version, opa ? "(opa)" : ""); goto out; } /* Filter SMI packets sent to other than QP0 */ - if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || - (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || + (mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { if (qp_num == 0) valid = 1; } else { @@ -1734,8 +1829,8 @@ out: return valid; } -static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_hdr *mad_hdr) +static int is_rmpp_data_mad(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_hdr *mad_hdr) { struct ib_rmpp_mad *rmpp_mad; @@ -1747,16 +1842,16 @@ static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv, (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); } -static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr, - struct ib_mad_recv_wc *rwc) +static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr, + const struct ib_mad_recv_wc *rwc) { - return ((struct ib_mad *)(wr->send_buf.mad))->mad_hdr.mgmt_class == + return ((struct ib_mad_hdr *)(wr->send_buf.mad))->mgmt_class == rwc->recv_buf.mad->mad_hdr.mgmt_class; } -static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_send_wr_private *wr, - struct ib_mad_recv_wc *rwc ) +static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_send_wr_private *wr, + const struct ib_mad_recv_wc *rwc ) { struct ib_ah_attr attr; u8 send_resp, rcv_resp; @@ -1765,8 +1860,8 @@ static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv, u8 port_num = mad_agent_priv->agent.port_num; u8 lmc; - send_resp = ib_response_mad((struct ib_mad *)wr->send_buf.mad); - rcv_resp = ib_response_mad(rwc->recv_buf.mad); + send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad); + rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr); if (send_resp == rcv_resp) /* both requests, or both responses. GIDs different */ @@ -1811,22 +1906,22 @@ static inline int is_direct(u8 class) } struct ib_mad_send_wr_private* -ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_recv_wc *wc) +ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_recv_wc *wc) { struct ib_mad_send_wr_private *wr; - struct ib_mad *mad; + const struct ib_mad_hdr *mad_hdr; - mad = (struct ib_mad *)wc->recv_buf.mad; + mad_hdr = &wc->recv_buf.mad->mad_hdr; list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) { - if ((wr->tid == mad->mad_hdr.tid) && + if ((wr->tid == mad_hdr->tid) && rcv_has_same_class(wr, wc) && /* * Don't check GID for direct routed MADs. * These might have permissive LIDs. */ - (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) || + (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) return (wr->status == IB_WC_SUCCESS) ? wr : NULL; } @@ -1836,15 +1931,15 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, * been notified that the send has completed */ list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) { - if (is_data_mad(mad_agent_priv, wr->send_buf.mad) && - wr->tid == mad->mad_hdr.tid && + if (is_rmpp_data_mad(mad_agent_priv, wr->send_buf.mad) && + wr->tid == mad_hdr->tid && wr->timeout && rcv_has_same_class(wr, wc) && /* * Don't check GID for direct routed MADs. * These might have permissive LIDs. */ - (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) || + (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) /* Verify request has not been canceled */ return (wr->status == IB_WC_SUCCESS) ? wr : NULL; @@ -1879,7 +1974,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, } /* Complete corresponding request */ - if (ib_response_mad(mad_recv_wc->recv_buf.mad)) { + if (ib_response_mad(&mad_recv_wc->recv_buf.mad->mad_hdr)) { spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); if (!mad_send_wr) { @@ -1924,26 +2019,163 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, } } -static bool generate_unmatched_resp(struct ib_mad_private *recv, - struct ib_mad_private *response) +static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv, + const struct ib_mad_qp_info *qp_info, + const struct ib_wc *wc, + int port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response) +{ + enum smi_forward_action retsmi; + struct ib_smp *smp = (struct ib_smp *)recv->mad; + + if (smi_handle_dr_smp_recv(smp, + port_priv->device->node_type, + port_num, + port_priv->device->phys_port_cnt) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + retsmi = smi_check_forward_dr_smp(smp); + if (retsmi == IB_SMI_LOCAL) + return IB_SMI_HANDLE; + + if (retsmi == IB_SMI_SEND) { /* don't forward */ + if (smi_handle_dr_smp_send(smp, + port_priv->device->node_type, + port_num) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { + /* forward case for switches */ + memcpy(response, recv, mad_priv_size(response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + + agent_send_response((const struct ib_mad_hdr *)response->mad, + &response->grh, wc, + port_priv->device, + smi_get_fwd_port(smp), + qp_info->qp->qp_num, + response->mad_size, + false); + + return IB_SMI_DISCARD; + } + return IB_SMI_HANDLE; +} + +static bool generate_unmatched_resp(const struct ib_mad_private *recv, + struct ib_mad_private *response, + size_t *resp_len, bool opa) { - if (recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_GET || - recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_SET) { - memcpy(response, recv, sizeof *response); + const struct ib_mad_hdr *recv_hdr = (const struct ib_mad_hdr *)recv->mad; + struct ib_mad_hdr *resp_hdr = (struct ib_mad_hdr *)response->mad; + + if (recv_hdr->method == IB_MGMT_METHOD_GET || + recv_hdr->method == IB_MGMT_METHOD_SET) { + memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; - response->header.recv_wc.recv_buf.mad = &response->mad.mad; + response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; response->header.recv_wc.recv_buf.grh = &response->grh; - response->mad.mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP; - response->mad.mad.mad_hdr.status = - cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); - if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - response->mad.mad.mad_hdr.status |= IB_SMP_DIRECTION; + resp_hdr->method = IB_MGMT_METHOD_GET_RESP; + resp_hdr->status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); + if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + resp_hdr->status |= IB_SMP_DIRECTION; + + if (opa && recv_hdr->base_version == OPA_MGMT_BASE_VERSION) { + if (recv_hdr->mgmt_class == + IB_MGMT_CLASS_SUBN_LID_ROUTED || + recv_hdr->mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + *resp_len = opa_get_smp_header_size( + (struct opa_smp *)recv->mad); + else + *resp_len = sizeof(struct ib_mad_hdr); + } return true; } else { return false; } } + +static enum smi_action +handle_opa_smi(struct ib_mad_port_private *port_priv, + struct ib_mad_qp_info *qp_info, + struct ib_wc *wc, + int port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response) +{ + enum smi_forward_action retsmi; + struct opa_smp *smp = (struct opa_smp *)recv->mad; + + if (opa_smi_handle_dr_smp_recv(smp, + port_priv->device->node_type, + port_num, + port_priv->device->phys_port_cnt) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + retsmi = opa_smi_check_forward_dr_smp(smp); + if (retsmi == IB_SMI_LOCAL) + return IB_SMI_HANDLE; + + if (retsmi == IB_SMI_SEND) { /* don't forward */ + if (opa_smi_handle_dr_smp_send(smp, + port_priv->device->node_type, + port_num) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + if (opa_smi_check_local_smp(smp, port_priv->device) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { + /* forward case for switches */ + memcpy(response, recv, mad_priv_size(response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.opa_mad = + (struct opa_mad *)response->mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + + agent_send_response((const struct ib_mad_hdr *)response->mad, + &response->grh, wc, + port_priv->device, + opa_smi_get_fwd_port(smp), + qp_info->qp->qp_num, + recv->header.wc.byte_len, + true); + + return IB_SMI_DISCARD; + } + + return IB_SMI_HANDLE; +} + +static enum smi_action +handle_smi(struct ib_mad_port_private *port_priv, + struct ib_mad_qp_info *qp_info, + struct ib_wc *wc, + int port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response, + bool opa) +{ + struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad; + + if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION && + mad_hdr->class_version == OPA_SMI_CLASS_VERSION) + return handle_opa_smi(port_priv, qp_info, wc, port_num, recv, + response); + + return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response); +} + static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_wc *wc) { @@ -1954,35 +2186,49 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_mad_agent_private *mad_agent; int port_num; int ret = IB_MAD_RESULT_SUCCESS; + size_t mad_size; + u16 resp_mad_pkey_index = 0; + bool opa; mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id; qp_info = mad_list->mad_queue->qp_info; dequeue_mad(mad_list); + opa = rdma_cap_opa_mad(qp_info->port_priv->device, + qp_info->port_priv->port_num); + mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header, mad_list); recv = container_of(mad_priv_hdr, struct ib_mad_private, header); ib_dma_unmap_single(port_priv->device, recv->header.mapping, - sizeof(struct ib_mad_private) - - sizeof(struct ib_mad_private_header), + mad_priv_dma_size(recv), DMA_FROM_DEVICE); /* Setup MAD receive work completion from "normal" work completion */ recv->header.wc = *wc; recv->header.recv_wc.wc = &recv->header.wc; - recv->header.recv_wc.mad_len = sizeof(struct ib_mad); - recv->header.recv_wc.recv_buf.mad = &recv->mad.mad; + + if (opa && ((struct ib_mad_hdr *)(recv->mad))->base_version == OPA_MGMT_BASE_VERSION) { + recv->header.recv_wc.mad_len = wc->byte_len - sizeof(struct ib_grh); + recv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); + } else { + recv->header.recv_wc.mad_len = sizeof(struct ib_mad); + recv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); + } + + recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad; recv->header.recv_wc.recv_buf.grh = &recv->grh; if (atomic_read(&qp_info->snoop_count)) snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS); /* Validate MAD */ - if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num)) + if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa)) goto out; - response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); + mad_size = recv->mad_size; + response = alloc_mad_private(mad_size, GFP_KERNEL); if (!response) { dev_err(&port_priv->device->dev, "ib_mad_recv_done_handler no memory for response buffer\n"); @@ -1994,69 +2240,43 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, else port_num = port_priv->port_num; - if (recv->mad.mad.mad_hdr.mgmt_class == + if (((struct ib_mad_hdr *)recv->mad)->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - enum smi_forward_action retsmi; - - if (smi_handle_dr_smp_recv(&recv->mad.smp, - port_priv->device->node_type, - port_num, - port_priv->device->phys_port_cnt) == - IB_SMI_DISCARD) + if (handle_smi(port_priv, qp_info, wc, port_num, recv, + response, opa) + == IB_SMI_DISCARD) goto out; - - retsmi = smi_check_forward_dr_smp(&recv->mad.smp); - if (retsmi == IB_SMI_LOCAL) - goto local; - - if (retsmi == IB_SMI_SEND) { /* don't forward */ - if (smi_handle_dr_smp_send(&recv->mad.smp, - port_priv->device->node_type, - port_num) == IB_SMI_DISCARD) - goto out; - - if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD) - goto out; - } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { - /* forward case for switches */ - memcpy(response, recv, sizeof(*response)); - response->header.recv_wc.wc = &response->header.wc; - response->header.recv_wc.recv_buf.mad = &response->mad.mad; - response->header.recv_wc.recv_buf.grh = &response->grh; - - agent_send_response(&response->mad.mad, - &response->grh, wc, - port_priv->device, - smi_get_fwd_port(&recv->mad.smp), - qp_info->qp->qp_num); - - goto out; - } } -local: /* Give driver "right of first refusal" on incoming MAD */ if (port_priv->device->process_mad) { ret = port_priv->device->process_mad(port_priv->device, 0, port_priv->port_num, wc, &recv->grh, - &recv->mad.mad, - &response->mad.mad); + (const struct ib_mad_hdr *)recv->mad, + recv->mad_size, + (struct ib_mad_hdr *)response->mad, + &mad_size, &resp_mad_pkey_index); + + if (opa) + wc->pkey_index = resp_mad_pkey_index; + if (ret & IB_MAD_RESULT_SUCCESS) { if (ret & IB_MAD_RESULT_CONSUMED) goto out; if (ret & IB_MAD_RESULT_REPLY) { - agent_send_response(&response->mad.mad, + agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, port_priv->device, port_num, - qp_info->qp->qp_num); + qp_info->qp->qp_num, + mad_size, opa); goto out; } } } - mad_agent = find_mad_agent(port_priv, &recv->mad.mad); + mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad); if (mad_agent) { ib_mad_complete_recv(mad_agent, &recv->header.recv_wc); /* @@ -2065,17 +2285,17 @@ local: */ recv = NULL; } else if ((ret & IB_MAD_RESULT_SUCCESS) && - generate_unmatched_resp(recv, response)) { - agent_send_response(&response->mad.mad, &recv->grh, wc, - port_priv->device, port_num, qp_info->qp->qp_num); + generate_unmatched_resp(recv, response, &mad_size, opa)) { + agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, + port_priv->device, port_num, + qp_info->qp->qp_num, mad_size, opa); } out: /* Post another receive request for this QP */ if (response) { ib_mad_post_receive_mads(qp_info, response); - if (recv) - kmem_cache_free(ib_mad_cache, recv); + kfree(recv); } else ib_mad_post_receive_mads(qp_info, recv); } @@ -2411,7 +2631,8 @@ find_send_wr(struct ib_mad_agent_private *mad_agent_priv, list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { - if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && + if (is_rmpp_data_mad(mad_agent_priv, + mad_send_wr->send_buf.mad) && &mad_send_wr->send_buf == send_buf) return mad_send_wr; } @@ -2468,10 +2689,14 @@ static void local_completions(struct work_struct *work) int free_mad; struct ib_wc wc; struct ib_mad_send_wc mad_send_wc; + bool opa; mad_agent_priv = container_of(work, struct ib_mad_agent_private, local_work); + opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, + mad_agent_priv->qp_info->port_priv->port_num); + spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->local_list)) { local = list_entry(mad_agent_priv->local_list.next, @@ -2481,6 +2706,7 @@ static void local_completions(struct work_struct *work) spin_unlock_irqrestore(&mad_agent_priv->lock, flags); free_mad = 0; if (local->mad_priv) { + u8 base_version; recv_mad_agent = local->recv_mad_agent; if (!recv_mad_agent) { dev_err(&mad_agent_priv->agent.device->dev, @@ -2496,17 +2722,26 @@ static void local_completions(struct work_struct *work) build_smp_wc(recv_mad_agent->agent.qp, (unsigned long) local->mad_send_wr, be16_to_cpu(IB_LID_PERMISSIVE), - 0, recv_mad_agent->agent.port_num, &wc); + local->mad_send_wr->send_wr.wr.ud.pkey_index, + recv_mad_agent->agent.port_num, &wc); local->mad_priv->header.recv_wc.wc = &wc; - local->mad_priv->header.recv_wc.mad_len = - sizeof(struct ib_mad); + + base_version = ((struct ib_mad_hdr *)(local->mad_priv->mad))->base_version; + if (opa && base_version == OPA_MGMT_BASE_VERSION) { + local->mad_priv->header.recv_wc.mad_len = local->return_wc_byte_len; + local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); + } else { + local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad); + local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); + } + INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list); list_add(&local->mad_priv->header.recv_wc.recv_buf.list, &local->mad_priv->header.recv_wc.rmpp_list); local->mad_priv->header.recv_wc.recv_buf.grh = NULL; local->mad_priv->header.recv_wc.recv_buf.mad = - &local->mad_priv->mad.mad; + (struct ib_mad *)local->mad_priv->mad; if (atomic_read(&recv_mad_agent->qp_info->snoop_count)) snoop_recv(recv_mad_agent->qp_info, &local->mad_priv->header.recv_wc, @@ -2534,7 +2769,7 @@ local_send_completion: spin_lock_irqsave(&mad_agent_priv->lock, flags); atomic_dec(&mad_agent_priv->refcount); if (free_mad) - kmem_cache_free(ib_mad_cache, local->mad_priv); + kfree(local->mad_priv); kfree(local); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); @@ -2649,7 +2884,6 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_queue *recv_queue = &qp_info->recv_queue; /* Initialize common scatter list fields */ - sg_list.length = sizeof *mad_priv - sizeof mad_priv->header; sg_list.lkey = (*qp_info->port_priv->mr).lkey; /* Initialize common receive WR fields */ @@ -2663,7 +2897,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, mad_priv = mad; mad = NULL; } else { - mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); + mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv), + GFP_ATOMIC); if (!mad_priv) { dev_err(&qp_info->port_priv->device->dev, "No memory for receive buffer\n"); @@ -2671,10 +2906,10 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, break; } } + sg_list.length = mad_priv_dma_size(mad_priv); sg_list.addr = ib_dma_map_single(qp_info->port_priv->device, &mad_priv->grh, - sizeof *mad_priv - - sizeof mad_priv->header, + mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, sg_list.addr))) { @@ -2698,10 +2933,9 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, spin_unlock_irqrestore(&recv_queue->lock, flags); ib_dma_unmap_single(qp_info->port_priv->device, mad_priv->header.mapping, - sizeof *mad_priv - - sizeof mad_priv->header, + mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE); - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); dev_err(&qp_info->port_priv->device->dev, "ib_post_recv failed: %d\n", ret); break; @@ -2738,10 +2972,9 @@ static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info) ib_dma_unmap_single(qp_info->port_priv->device, recv->header.mapping, - sizeof(struct ib_mad_private) - - sizeof(struct ib_mad_private_header), + mad_priv_dma_size(recv), DMA_FROM_DEVICE); - kmem_cache_free(ib_mad_cache, recv); + kfree(recv); } qp_info->recv_queue.count = 0; @@ -2922,6 +3155,14 @@ static int ib_mad_port_open(struct ib_device *device, unsigned long flags; char name[sizeof "ib_mad123"]; int has_smi; + struct ib_cq_init_attr cq_attr = {}; + + if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE)) + return -EFAULT; + + if (WARN_ON(rdma_cap_opa_mad(device, port_num) && + rdma_max_mad_size(device, port_num) < OPA_MGMT_MAD_SIZE)) + return -EFAULT; /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); @@ -2938,13 +3179,14 @@ static int ib_mad_port_open(struct ib_device *device, init_mad_qp(port_priv, &port_priv->qp_info[1]); cq_size = mad_sendq_size + mad_recvq_size; - has_smi = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND; + has_smi = rdma_cap_ib_smi(device, port_num); if (has_smi) cq_size *= 2; + cq_attr.cqe = cq_size; port_priv->cq = ib_create_cq(port_priv->device, ib_mad_thread_completion_handler, - NULL, port_priv, cq_size, 0); + NULL, port_priv, &cq_attr); if (IS_ERR(port_priv->cq)) { dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); ret = PTR_ERR(port_priv->cq); @@ -3057,9 +3299,6 @@ static void ib_mad_init_device(struct ib_device *device) { int start, end, i; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; - if (device->node_type == RDMA_NODE_IB_SWITCH) { start = 0; end = 0; @@ -3069,6 +3308,9 @@ static void ib_mad_init_device(struct ib_device *device) } for (i = start; i <= end; i++) { + if (!rdma_cap_ib_mad(device, i)) + continue; + if (ib_mad_port_open(device, i)) { dev_err(&device->dev, "Couldn't open port %d\n", i); goto error; @@ -3086,40 +3328,39 @@ error_agent: dev_err(&device->dev, "Couldn't close port %d\n", i); error: - i--; + while (--i >= start) { + if (!rdma_cap_ib_mad(device, i)) + continue; - while (i >= start) { if (ib_agent_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d for agents\n", i); if (ib_mad_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d\n", i); - i--; } } static void ib_mad_remove_device(struct ib_device *device) { - int i, num_ports, cur_port; - - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; + int start, end, i; if (device->node_type == RDMA_NODE_IB_SWITCH) { - num_ports = 1; - cur_port = 0; + start = 0; + end = 0; } else { - num_ports = device->phys_port_cnt; - cur_port = 1; + start = 1; + end = device->phys_port_cnt; } - for (i = 0; i < num_ports; i++, cur_port++) { - if (ib_agent_port_close(device, cur_port)) + + for (i = start; i <= end; i++) { + if (!rdma_cap_ib_mad(device, i)) + continue; + + if (ib_agent_port_close(device, i)) dev_err(&device->dev, - "Couldn't close port %d for agents\n", - cur_port); - if (ib_mad_port_close(device, cur_port)) - dev_err(&device->dev, "Couldn't close port %d\n", - cur_port); + "Couldn't close port %d for agents\n", i); + if (ib_mad_port_close(device, i)) + dev_err(&device->dev, "Couldn't close port %d\n", i); } } @@ -3131,45 +3372,25 @@ static struct ib_client mad_client = { static int __init ib_mad_init_module(void) { - int ret; - mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE); mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE); - ib_mad_cache = kmem_cache_create("ib_mad", - sizeof(struct ib_mad_private), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!ib_mad_cache) { - pr_err("Couldn't create ib_mad cache\n"); - ret = -ENOMEM; - goto error1; - } - INIT_LIST_HEAD(&ib_mad_port_list); if (ib_register_client(&mad_client)) { pr_err("Couldn't register ib_mad client\n"); - ret = -EINVAL; - goto error2; + return -EINVAL; } return 0; - -error2: - kmem_cache_destroy(ib_mad_cache); -error1: - return ret; } static void __exit ib_mad_cleanup_module(void) { ib_unregister_client(&mad_client); - kmem_cache_destroy(ib_mad_cache); } module_init(ib_mad_init_module); diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index d1a0b0e..5be89f9 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -41,6 +41,7 @@ #include <linux/workqueue.h> #include <rdma/ib_mad.h> #include <rdma/ib_smi.h> +#include <rdma/opa_smi.h> #define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */ @@ -56,7 +57,7 @@ /* Registration table sizes */ #define MAX_MGMT_CLASS 80 -#define MAX_MGMT_VERSION 8 +#define MAX_MGMT_VERSION 0x83 #define MAX_MGMT_OUI 8 #define MAX_MGMT_VENDOR_RANGE2 (IB_MGMT_CLASS_VENDOR_RANGE2_END - \ IB_MGMT_CLASS_VENDOR_RANGE2_START + 1) @@ -75,12 +76,9 @@ struct ib_mad_private_header { struct ib_mad_private { struct ib_mad_private_header header; + size_t mad_size; struct ib_grh grh; - union { - struct ib_mad mad; - struct ib_rmpp_mad rmpp_mad; - struct ib_smp smp; - } mad; + u8 mad[0]; } __attribute__ ((packed)); struct ib_rmpp_segment { @@ -150,6 +148,7 @@ struct ib_mad_local_private { struct ib_mad_private *mad_priv; struct ib_mad_agent_private *recv_mad_agent; struct ib_mad_send_wr_private *mad_send_wr; + size_t return_wc_byte_len; }; struct ib_mad_mgmt_method_table { @@ -213,8 +212,8 @@ struct ib_mad_port_private { int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr); struct ib_mad_send_wr_private * -ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_recv_wc *mad_recv_wc); +ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_recv_wc *mad_recv_wc); void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc); diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index f37878c..382941b 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2005 Intel Inc. All rights reserved. * Copyright (c) 2005-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -67,6 +68,7 @@ struct mad_rmpp_recv { u8 mgmt_class; u8 class_version; u8 method; + u8 base_version; }; static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) @@ -139,7 +141,8 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv, hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class); msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, recv_wc->wc->pkey_index, 1, hdr_len, - 0, GFP_KERNEL); + 0, GFP_KERNEL, + IB_MGMT_BASE_VERSION); if (IS_ERR(msg)) return; @@ -165,7 +168,8 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class); msg = ib_create_send_mad(agent, recv_wc->wc->src_qp, recv_wc->wc->pkey_index, 1, - hdr_len, 0, GFP_KERNEL); + hdr_len, 0, GFP_KERNEL, + IB_MGMT_BASE_VERSION); if (IS_ERR(msg)) ib_destroy_ah(ah); else { @@ -316,6 +320,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent, rmpp_recv->mgmt_class = mad_hdr->mgmt_class; rmpp_recv->class_version = mad_hdr->class_version; rmpp_recv->method = mad_hdr->method; + rmpp_recv->base_version = mad_hdr->base_version; return rmpp_recv; error: kfree(rmpp_recv); @@ -431,14 +436,23 @@ static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv) { struct ib_rmpp_mad *rmpp_mad; int hdr_size, data_size, pad; + bool opa = rdma_cap_opa_mad(rmpp_recv->agent->qp_info->port_priv->device, + rmpp_recv->agent->qp_info->port_priv->port_num); rmpp_mad = (struct ib_rmpp_mad *)rmpp_recv->cur_seg_buf->mad; hdr_size = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); - data_size = sizeof(struct ib_rmpp_mad) - hdr_size; - pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); - if (pad > IB_MGMT_RMPP_DATA || pad < 0) - pad = 0; + if (opa && rmpp_recv->base_version == OPA_MGMT_BASE_VERSION) { + data_size = sizeof(struct opa_rmpp_mad) - hdr_size; + pad = OPA_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + if (pad > OPA_MGMT_RMPP_DATA || pad < 0) + pad = 0; + } else { + data_size = sizeof(struct ib_rmpp_mad) - hdr_size; + pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + if (pad > IB_MGMT_RMPP_DATA || pad < 0) + pad = 0; + } return hdr_size + rmpp_recv->seg_num * data_size - pad; } @@ -570,13 +584,14 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) if (mad_send_wr->seg_num == 1) { rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST; - paylen = mad_send_wr->send_buf.seg_count * IB_MGMT_RMPP_DATA - - mad_send_wr->pad; + paylen = (mad_send_wr->send_buf.seg_count * + mad_send_wr->send_buf.seg_rmpp_size) - + mad_send_wr->pad; } if (mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) { rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST; - paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad; + paylen = mad_send_wr->send_buf.seg_rmpp_size - mad_send_wr->pad; } rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen); diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index fa17b55..1244f02 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -780,8 +780,7 @@ static void mcast_event_handler(struct ib_event_handler *handler, int index; dev = container_of(handler, struct mcast_device, event_handler); - if (rdma_port_get_link_layer(dev->device, event->element.port_num) != - IB_LINK_LAYER_INFINIBAND) + if (!rdma_cap_ib_mcast(dev->device, event->element.port_num)) return; index = event->element.port_num - dev->start_port; @@ -808,9 +807,6 @@ static void mcast_add_one(struct ib_device *device) int i; int count = 0; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; - dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port, GFP_KERNEL); if (!dev) @@ -824,8 +820,7 @@ static void mcast_add_one(struct ib_device *device) } for (i = 0; i <= dev->end_port - dev->start_port; i++) { - if (rdma_port_get_link_layer(device, dev->start_port + i) != - IB_LINK_LAYER_INFINIBAND) + if (!rdma_cap_ib_mcast(device, dev->start_port + i)) continue; port = &dev->port[i]; port->dev = dev; @@ -863,8 +858,7 @@ static void mcast_remove_one(struct ib_device *device) flush_workqueue(mcast_wq); for (i = 0; i <= dev->end_port - dev->start_port; i++) { - if (rdma_port_get_link_layer(device, dev->start_port + i) == - IB_LINK_LAYER_INFINIBAND) { + if (rdma_cap_ib_mcast(device, dev->start_port + i)) { port = &dev->port[i]; deref_port(port); wait_for_completion(&port->comp); diff --git a/drivers/infiniband/core/opa_smi.h b/drivers/infiniband/core/opa_smi.h new file mode 100644 index 0000000..62d91bf --- /dev/null +++ b/drivers/infiniband/core/opa_smi.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __OPA_SMI_H_ +#define __OPA_SMI_H_ + +#include <rdma/ib_smi.h> +#include <rdma/opa_smi.h> + +#include "smi.h" + +enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, u8 node_type, + int port_num, int phys_port_cnt); +int opa_smi_get_fwd_port(struct opa_smp *smp); +extern enum smi_forward_action opa_smi_check_forward_dr_smp(struct opa_smp *smp); +extern enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, + u8 node_type, int port_num); + +/* + * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM + * via process_mad + */ +static inline enum smi_action opa_smi_check_local_smp(struct opa_smp *smp, + struct ib_device *device) +{ + /* C14-9:3 -- We're at the end of the DR segment of path */ + /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */ + return (device->process_mad && + !opa_get_smp_direction(smp) && + (smp->hop_ptr == smp->hop_cnt + 1)) ? + IB_SMI_HANDLE : IB_SMI_DISCARD; +} + +/* + * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM + * via process_mad + */ +static inline enum smi_action opa_smi_check_local_returning_smp(struct opa_smp *smp, + struct ib_device *device) +{ + /* C14-13:3 -- We're at the end of the DR segment of path */ + /* C14-13:4 -- Hop Pointer == 0 -> give to SM */ + return (device->process_mad && + opa_get_smp_direction(smp) && + !smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD; +} + +#endif /* __OPA_SMI_H_ */ diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index c38f030..0fae850 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -450,7 +450,7 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event struct ib_sa_port *port = &sa_dev->port[event->element.port_num - sa_dev->start_port]; - if (rdma_port_get_link_layer(handler->device, port->port_num) != IB_LINK_LAYER_INFINIBAND) + if (!rdma_cap_ib_sa(handler->device, port->port_num)) return; spin_lock_irqsave(&port->ah_lock, flags); @@ -540,7 +540,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, ah_attr->port_num = port_num; ah_attr->static_rate = rec->rate; - force_grh = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_ETHERNET; + force_grh = rdma_cap_eth_ah(device, port_num); if (rec->hop_limit > 1 || force_grh) { ah_attr->ah_flags = IB_AH_GRH; @@ -583,7 +583,8 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask) query->mad_buf = ib_create_send_mad(query->port->agent, 1, query->sm_ah->pkey_index, 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, - gfp_mask); + gfp_mask, + IB_MGMT_BASE_VERSION); if (IS_ERR(query->mad_buf)) { kref_put(&query->sm_ah->ref, free_sm_ah); return -ENOMEM; @@ -1153,9 +1154,7 @@ static void ib_sa_add_one(struct ib_device *device) { struct ib_sa_device *sa_dev; int s, e, i; - - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; + int count = 0; if (device->node_type == RDMA_NODE_IB_SWITCH) s = e = 0; @@ -1175,7 +1174,7 @@ static void ib_sa_add_one(struct ib_device *device) for (i = 0; i <= e - s; ++i) { spin_lock_init(&sa_dev->port[i].ah_lock); - if (rdma_port_get_link_layer(device, i + 1) != IB_LINK_LAYER_INFINIBAND) + if (!rdma_cap_ib_sa(device, i + 1)) continue; sa_dev->port[i].sm_ah = NULL; @@ -1189,8 +1188,13 @@ static void ib_sa_add_one(struct ib_device *device) goto err; INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah); + + count++; } + if (!count) + goto free; + ib_set_client_data(device, &sa_client, sa_dev); /* @@ -1204,19 +1208,20 @@ static void ib_sa_add_one(struct ib_device *device) if (ib_register_event_handler(&sa_dev->event_handler)) goto err; - for (i = 0; i <= e - s; ++i) - if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) + for (i = 0; i <= e - s; ++i) { + if (rdma_cap_ib_sa(device, i + 1)) update_sm_ah(&sa_dev->port[i].update_task); + } return; err: - while (--i >= 0) - if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) + while (--i >= 0) { + if (rdma_cap_ib_sa(device, i + 1)) ib_unregister_mad_agent(sa_dev->port[i].agent); - + } +free: kfree(sa_dev); - return; } @@ -1233,7 +1238,7 @@ static void ib_sa_remove_one(struct ib_device *device) flush_workqueue(ib_wq); for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { - if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) { + if (rdma_cap_ib_sa(device, i + 1)) { ib_unregister_mad_agent(sa_dev->port[i].agent); if (sa_dev->port[i].sm_ah) kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c index 5855e44..368a561 100644 --- a/drivers/infiniband/core/smi.c +++ b/drivers/infiniband/core/smi.c @@ -5,6 +5,7 @@ * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -38,85 +39,82 @@ #include <rdma/ib_smi.h> #include "smi.h" - -/* - * Fixup a directed route SMP for sending - * Return 0 if the SMP should be discarded - */ -enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, - u8 node_type, int port_num) +#include "opa_smi.h" + +static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, + u8 *hop_ptr, u8 hop_cnt, + const u8 *initial_path, + const u8 *return_path, + u8 direction, + bool dr_dlid_is_permissive, + bool dr_slid_is_permissive) { - u8 hop_ptr, hop_cnt; - - hop_ptr = smp->hop_ptr; - hop_cnt = smp->hop_cnt; - /* See section 14.2.2.2, Vol 1 IB spec */ /* C14-6 -- valid hop_cnt values are from 0 to 63 */ if (hop_cnt >= IB_SMP_MAX_PATH_HOPS) return IB_SMI_DISCARD; - if (!ib_get_smp_direction(smp)) { + if (!direction) { /* C14-9:1 */ - if (hop_cnt && hop_ptr == 0) { - smp->hop_ptr++; - return (smp->initial_path[smp->hop_ptr] == + if (hop_cnt && *hop_ptr == 0) { + (*hop_ptr)++; + return (initial_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:2 */ - if (hop_ptr && hop_ptr < hop_cnt) { + if (*hop_ptr && *hop_ptr < hop_cnt) { if (node_type != RDMA_NODE_IB_SWITCH) return IB_SMI_DISCARD; - /* smp->return_path set when received */ - smp->hop_ptr++; - return (smp->initial_path[smp->hop_ptr] == + /* return_path set when received */ + (*hop_ptr)++; + return (initial_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:3 -- We're at the end of the DR segment of path */ - if (hop_ptr == hop_cnt) { - /* smp->return_path set when received */ - smp->hop_ptr++; + if (*hop_ptr == hop_cnt) { + /* return_path set when received */ + (*hop_ptr)++; return (node_type == RDMA_NODE_IB_SWITCH || - smp->dr_dlid == IB_LID_PERMISSIVE ? + dr_dlid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ /* C14-9:5 -- Fail unreasonable hop pointer */ - return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD); + return (*hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD); } else { /* C14-13:1 */ - if (hop_cnt && hop_ptr == hop_cnt + 1) { - smp->hop_ptr--; - return (smp->return_path[smp->hop_ptr] == + if (hop_cnt && *hop_ptr == hop_cnt + 1) { + (*hop_ptr)--; + return (return_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:2 */ - if (2 <= hop_ptr && hop_ptr <= hop_cnt) { + if (2 <= *hop_ptr && *hop_ptr <= hop_cnt) { if (node_type != RDMA_NODE_IB_SWITCH) return IB_SMI_DISCARD; - smp->hop_ptr--; - return (smp->return_path[smp->hop_ptr] == + (*hop_ptr)--; + return (return_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:3 -- at the end of the DR segment of path */ - if (hop_ptr == 1) { - smp->hop_ptr--; + if (*hop_ptr == 1) { + (*hop_ptr)--; /* C14-13:3 -- SMPs destined for SM shouldn't be here */ return (node_type == RDMA_NODE_IB_SWITCH || - smp->dr_slid == IB_LID_PERMISSIVE ? + dr_slid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:4 -- hop_ptr = 0 -> should have gone to SM */ - if (hop_ptr == 0) + if (*hop_ptr == 0) return IB_SMI_HANDLE; /* C14-13:5 -- Check for unreasonable hop pointer */ @@ -125,105 +123,164 @@ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, } /* - * Adjust information for a received SMP - * Return 0 if the SMP should be dropped + * Fixup a directed route SMP for sending + * Return IB_SMI_DISCARD if the SMP should be discarded */ -enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, - int port_num, int phys_port_cnt) +enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, + u8 node_type, int port_num) { - u8 hop_ptr, hop_cnt; + return __smi_handle_dr_smp_send(node_type, port_num, + &smp->hop_ptr, smp->hop_cnt, + smp->initial_path, + smp->return_path, + ib_get_smp_direction(smp), + smp->dr_dlid == IB_LID_PERMISSIVE, + smp->dr_slid == IB_LID_PERMISSIVE); +} - hop_ptr = smp->hop_ptr; - hop_cnt = smp->hop_cnt; +enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, + u8 node_type, int port_num) +{ + return __smi_handle_dr_smp_send(node_type, port_num, + &smp->hop_ptr, smp->hop_cnt, + smp->route.dr.initial_path, + smp->route.dr.return_path, + opa_get_smp_direction(smp), + smp->route.dr.dr_dlid == + OPA_LID_PERMISSIVE, + smp->route.dr.dr_slid == + OPA_LID_PERMISSIVE); +} +static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, + int phys_port_cnt, + u8 *hop_ptr, u8 hop_cnt, + const u8 *initial_path, + u8 *return_path, + u8 direction, + bool dr_dlid_is_permissive, + bool dr_slid_is_permissive) +{ /* See section 14.2.2.2, Vol 1 IB spec */ /* C14-6 -- valid hop_cnt values are from 0 to 63 */ if (hop_cnt >= IB_SMP_MAX_PATH_HOPS) return IB_SMI_DISCARD; - if (!ib_get_smp_direction(smp)) { + if (!direction) { /* C14-9:1 -- sender should have incremented hop_ptr */ - if (hop_cnt && hop_ptr == 0) + if (hop_cnt && *hop_ptr == 0) return IB_SMI_DISCARD; /* C14-9:2 -- intermediate hop */ - if (hop_ptr && hop_ptr < hop_cnt) { + if (*hop_ptr && *hop_ptr < hop_cnt) { if (node_type != RDMA_NODE_IB_SWITCH) return IB_SMI_DISCARD; - smp->return_path[hop_ptr] = port_num; - /* smp->hop_ptr updated when sending */ - return (smp->initial_path[hop_ptr+1] <= phys_port_cnt ? + return_path[*hop_ptr] = port_num; + /* hop_ptr updated when sending */ + return (initial_path[*hop_ptr+1] <= phys_port_cnt ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:3 -- We're at the end of the DR segment of path */ - if (hop_ptr == hop_cnt) { + if (*hop_ptr == hop_cnt) { if (hop_cnt) - smp->return_path[hop_ptr] = port_num; - /* smp->hop_ptr updated when sending */ + return_path[*hop_ptr] = port_num; + /* hop_ptr updated when sending */ return (node_type == RDMA_NODE_IB_SWITCH || - smp->dr_dlid == IB_LID_PERMISSIVE ? + dr_dlid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ /* C14-9:5 -- fail unreasonable hop pointer */ - return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD); + return (*hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD); } else { /* C14-13:1 */ - if (hop_cnt && hop_ptr == hop_cnt + 1) { - smp->hop_ptr--; - return (smp->return_path[smp->hop_ptr] == + if (hop_cnt && *hop_ptr == hop_cnt + 1) { + (*hop_ptr)--; + return (return_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:2 */ - if (2 <= hop_ptr && hop_ptr <= hop_cnt) { + if (2 <= *hop_ptr && *hop_ptr <= hop_cnt) { if (node_type != RDMA_NODE_IB_SWITCH) return IB_SMI_DISCARD; - /* smp->hop_ptr updated when sending */ - return (smp->return_path[hop_ptr-1] <= phys_port_cnt ? + /* hop_ptr updated when sending */ + return (return_path[*hop_ptr-1] <= phys_port_cnt ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:3 -- We're at the end of the DR segment of path */ - if (hop_ptr == 1) { - if (smp->dr_slid == IB_LID_PERMISSIVE) { + if (*hop_ptr == 1) { + if (dr_slid_is_permissive) { /* giving SMP to SM - update hop_ptr */ - smp->hop_ptr--; + (*hop_ptr)--; return IB_SMI_HANDLE; } - /* smp->hop_ptr updated when sending */ + /* hop_ptr updated when sending */ return (node_type == RDMA_NODE_IB_SWITCH ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:4 -- hop_ptr = 0 -> give to SM */ /* C14-13:5 -- Check for unreasonable hop pointer */ - return (hop_ptr == 0 ? IB_SMI_HANDLE : IB_SMI_DISCARD); + return (*hop_ptr == 0 ? IB_SMI_HANDLE : IB_SMI_DISCARD); } } -enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp) +/* + * Adjust information for a received SMP + * Return IB_SMI_DISCARD if the SMP should be dropped + */ +enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, + int port_num, int phys_port_cnt) { - u8 hop_ptr, hop_cnt; + return __smi_handle_dr_smp_recv(node_type, port_num, phys_port_cnt, + &smp->hop_ptr, smp->hop_cnt, + smp->initial_path, + smp->return_path, + ib_get_smp_direction(smp), + smp->dr_dlid == IB_LID_PERMISSIVE, + smp->dr_slid == IB_LID_PERMISSIVE); +} - hop_ptr = smp->hop_ptr; - hop_cnt = smp->hop_cnt; +/* + * Adjust information for a received SMP + * Return IB_SMI_DISCARD if the SMP should be dropped + */ +enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, u8 node_type, + int port_num, int phys_port_cnt) +{ + return __smi_handle_dr_smp_recv(node_type, port_num, phys_port_cnt, + &smp->hop_ptr, smp->hop_cnt, + smp->route.dr.initial_path, + smp->route.dr.return_path, + opa_get_smp_direction(smp), + smp->route.dr.dr_dlid == + OPA_LID_PERMISSIVE, + smp->route.dr.dr_slid == + OPA_LID_PERMISSIVE); +} - if (!ib_get_smp_direction(smp)) { +static enum smi_forward_action __smi_check_forward_dr_smp(u8 hop_ptr, u8 hop_cnt, + u8 direction, + bool dr_dlid_is_permissive, + bool dr_slid_is_permissive) +{ + if (!direction) { /* C14-9:2 -- intermediate hop */ if (hop_ptr && hop_ptr < hop_cnt) return IB_SMI_FORWARD; /* C14-9:3 -- at the end of the DR segment of path */ if (hop_ptr == hop_cnt) - return (smp->dr_dlid == IB_LID_PERMISSIVE ? + return (dr_dlid_is_permissive ? IB_SMI_SEND : IB_SMI_LOCAL); /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ @@ -236,10 +293,29 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp) /* C14-13:3 -- at the end of the DR segment of path */ if (hop_ptr == 1) - return (smp->dr_slid != IB_LID_PERMISSIVE ? + return (!dr_slid_is_permissive ? IB_SMI_SEND : IB_SMI_LOCAL); } return IB_SMI_LOCAL; + +} + +enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp) +{ + return __smi_check_forward_dr_smp(smp->hop_ptr, smp->hop_cnt, + ib_get_smp_direction(smp), + smp->dr_dlid == IB_LID_PERMISSIVE, + smp->dr_slid == IB_LID_PERMISSIVE); +} + +enum smi_forward_action opa_smi_check_forward_dr_smp(struct opa_smp *smp) +{ + return __smi_check_forward_dr_smp(smp->hop_ptr, smp->hop_cnt, + opa_get_smp_direction(smp), + smp->route.dr.dr_dlid == + OPA_LID_PERMISSIVE, + smp->route.dr.dr_slid == + OPA_LID_PERMISSIVE); } /* @@ -251,3 +327,13 @@ int smi_get_fwd_port(struct ib_smp *smp) return (!ib_get_smp_direction(smp) ? smp->initial_path[smp->hop_ptr+1] : smp->return_path[smp->hop_ptr-1]); } + +/* + * Return the forwarding port number from initial_path for outgoing SMP and + * from return_path for returning SMP + */ +int opa_smi_get_fwd_port(struct opa_smp *smp) +{ + return !opa_get_smp_direction(smp) ? smp->route.dr.initial_path[smp->hop_ptr+1] : + smp->route.dr.return_path[smp->hop_ptr-1]; +} diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index cbd0383..ed6b6c8 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -326,6 +326,8 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, int width = (tab_attr->index >> 16) & 0xff; struct ib_mad *in_mad = NULL; struct ib_mad *out_mad = NULL; + size_t mad_size = sizeof(*out_mad); + u16 out_mad_pkey_index = 0; ssize_t ret; if (!p->ibdev->process_mad) @@ -347,7 +349,10 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, in_mad->data[41] = p->port_num; /* PortSelect field */ if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY, - p->port_num, NULL, NULL, in_mad, out_mad) & + p->port_num, NULL, NULL, + (const struct ib_mad_hdr *)in_mad, mad_size, + (struct ib_mad_hdr *)out_mad, &mad_size, + &out_mad_pkey_index) & (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) != (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) { ret = -EINVAL; @@ -456,6 +461,7 @@ static void ib_device_release(struct device *device) { struct ib_device *dev = container_of(device, struct ib_device, dev); + kfree(dev->port_immutable); kfree(dev); } diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index f2f6393..62c24b1 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1253,8 +1253,7 @@ static void ib_ucm_add_one(struct ib_device *device) dev_t base; struct ib_ucm_device *ucm_dev; - if (!device->alloc_ucontext || - rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + if (!device->alloc_ucontext || !rdma_cap_ib_cm(device, 1)) return; ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 45d67e9..ad45469 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -722,26 +722,13 @@ static ssize_t ucma_query_route(struct ucma_file *file, resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; resp.port_num = ctx->cm_id->port_num; - switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) { - case RDMA_TRANSPORT_IB: - switch (rdma_port_get_link_layer(ctx->cm_id->device, - ctx->cm_id->port_num)) { - case IB_LINK_LAYER_INFINIBAND: - ucma_copy_ib_route(&resp, &ctx->cm_id->route); - break; - case IB_LINK_LAYER_ETHERNET: - ucma_copy_iboe_route(&resp, &ctx->cm_id->route); - break; - default: - break; - } - break; - case RDMA_TRANSPORT_IWARP: + + if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) + ucma_copy_ib_route(&resp, &ctx->cm_id->route); + else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) + ucma_copy_iboe_route(&resp, &ctx->cm_id->route); + else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) ucma_copy_iw_route(&resp, &ctx->cm_id->route); - break; - default: - break; - } out: if (copy_to_user((void __user *)(unsigned long)cmd.response, diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 928cdd2..35567ff 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -99,7 +99,6 @@ struct ib_umad_port { }; struct ib_umad_device { - int start_port, end_port; struct kobject kobj; struct ib_umad_port port[0]; }; @@ -263,20 +262,23 @@ static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, { struct ib_mad_recv_buf *recv_buf; int left, seg_payload, offset, max_seg_payload; + size_t seg_size; - /* We need enough room to copy the first (or only) MAD segment. */ recv_buf = &packet->recv_wc->recv_buf; - if ((packet->length <= sizeof (*recv_buf->mad) && + seg_size = packet->recv_wc->mad_seg_size; + + /* We need enough room to copy the first (or only) MAD segment. */ + if ((packet->length <= seg_size && count < hdr_size(file) + packet->length) || - (packet->length > sizeof (*recv_buf->mad) && - count < hdr_size(file) + sizeof (*recv_buf->mad))) + (packet->length > seg_size && + count < hdr_size(file) + seg_size)) return -EINVAL; if (copy_to_user(buf, &packet->mad, hdr_size(file))) return -EFAULT; buf += hdr_size(file); - seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad)); + seg_payload = min_t(int, packet->length, seg_size); if (copy_to_user(buf, recv_buf->mad, seg_payload)) return -EFAULT; @@ -293,7 +295,7 @@ static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, return -ENOSPC; } offset = ib_get_mad_data_offset(recv_buf->mad->mad_hdr.mgmt_class); - max_seg_payload = sizeof (struct ib_mad) - offset; + max_seg_payload = seg_size - offset; for (left = packet->length - seg_payload, buf += seg_payload; left; left -= seg_payload, buf += seg_payload) { @@ -426,11 +428,11 @@ static int is_duplicate(struct ib_umad_file *file, * the same TID, reject the second as a duplicate. This is more * restrictive than required by the spec. */ - if (!ib_response_mad((struct ib_mad *) hdr)) { - if (!ib_response_mad((struct ib_mad *) sent_hdr)) + if (!ib_response_mad(hdr)) { + if (!ib_response_mad(sent_hdr)) return 1; continue; - } else if (!ib_response_mad((struct ib_mad *) sent_hdr)) + } else if (!ib_response_mad(sent_hdr)) continue; if (same_destination(&packet->mad.hdr, &sent_packet->mad.hdr)) @@ -451,6 +453,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_rmpp_mad *rmpp_mad; __be64 *tid; int ret, data_len, hdr_len, copy_offset, rmpp_active; + u8 base_version; if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) return -EINVAL; @@ -517,11 +520,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, rmpp_active = 0; } + base_version = ((struct ib_mad_hdr *)&packet->mad.data)->base_version; data_len = count - hdr_size(file) - hdr_len; packet->msg = ib_create_send_mad(agent, be32_to_cpu(packet->mad.hdr.qpn), packet->mad.hdr.pkey_index, rmpp_active, - hdr_len, data_len, GFP_KERNEL); + hdr_len, data_len, GFP_KERNEL, + base_version); if (IS_ERR(packet->msg)) { ret = PTR_ERR(packet->msg); goto err_ah; @@ -1273,16 +1278,10 @@ static void ib_umad_add_one(struct ib_device *device) { struct ib_umad_device *umad_dev; int s, e, i; + int count = 0; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; - - if (device->node_type == RDMA_NODE_IB_SWITCH) - s = e = 0; - else { - s = 1; - e = device->phys_port_cnt; - } + s = rdma_start_port(device); + e = rdma_end_port(device); umad_dev = kzalloc(sizeof *umad_dev + (e - s + 1) * sizeof (struct ib_umad_port), @@ -1292,25 +1291,34 @@ static void ib_umad_add_one(struct ib_device *device) kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype); - umad_dev->start_port = s; - umad_dev->end_port = e; - for (i = s; i <= e; ++i) { + if (!rdma_cap_ib_mad(device, i)) + continue; + umad_dev->port[i - s].umad_dev = umad_dev; if (ib_umad_init_port(device, i, umad_dev, &umad_dev->port[i - s])) goto err; + + count++; } + if (!count) + goto free; + ib_set_client_data(device, &umad_client, umad_dev); return; err: - while (--i >= s) - ib_umad_kill_port(&umad_dev->port[i - s]); + while (--i >= s) { + if (!rdma_cap_ib_mad(device, i)) + continue; + ib_umad_kill_port(&umad_dev->port[i - s]); + } +free: kobject_put(&umad_dev->kobj); } @@ -1322,8 +1330,10 @@ static void ib_umad_remove_one(struct ib_device *device) if (!umad_dev) return; - for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) - ib_umad_kill_port(&umad_dev->port[i]); + for (i = 0; i <= rdma_end_port(device) - rdma_start_port(device); ++i) { + if (rdma_cap_ib_mad(device, i + rdma_start_port(device))) + ib_umad_kill_port(&umad_dev->port[i]); + } kobject_put(&umad_dev->kobj); } diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index b716b08..ba365b6 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -259,5 +259,6 @@ IB_UVERBS_DECLARE_CMD(close_xrcd); IB_UVERBS_DECLARE_EX_CMD(create_flow); IB_UVERBS_DECLARE_EX_CMD(destroy_flow); IB_UVERBS_DECLARE_EX_CMD(query_device); +IB_UVERBS_DECLARE_EX_CMD(create_cq); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a9f0489..bbb02ff 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1330,40 +1330,37 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, return in_len; } -ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw, + struct ib_uverbs_ex_create_cq *cmd, + size_t cmd_sz, + int (*cb)(struct ib_uverbs_file *file, + struct ib_ucq_object *obj, + struct ib_uverbs_ex_create_cq_resp *resp, + struct ib_udata *udata, + void *context), + void *context) { - struct ib_uverbs_create_cq cmd; - struct ib_uverbs_create_cq_resp resp; - struct ib_udata udata; struct ib_ucq_object *obj; struct ib_uverbs_event_file *ev_file = NULL; struct ib_cq *cq; int ret; + struct ib_uverbs_ex_create_cq_resp resp; + struct ib_cq_init_attr attr = {}; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); - - if (cmd.comp_vector >= file->device->num_comp_vectors) - return -EINVAL; + if (cmd->comp_vector >= file->device->num_comp_vectors) + return ERR_PTR(-EINVAL); obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_class); + init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class); down_write(&obj->uobject.mutex); - if (cmd.comp_channel >= 0) { - ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); + if (cmd->comp_channel >= 0) { + ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel); if (!ev_file) { ret = -EINVAL; goto err; @@ -1376,9 +1373,14 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, INIT_LIST_HEAD(&obj->comp_list); INIT_LIST_HEAD(&obj->async_list); - cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe, - cmd.comp_vector, - file->ucontext, &udata); + attr.cqe = cmd->cqe; + attr.comp_vector = cmd->comp_vector; + + if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) + attr.flags = cmd->flags; + + cq = file->device->ib_dev->create_cq(file->device->ib_dev, &attr, + file->ucontext, uhw); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_file; @@ -1397,14 +1399,15 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, goto err_free; memset(&resp, 0, sizeof resp); - resp.cq_handle = obj->uobject.id; - resp.cqe = cq->cqe; + resp.base.cq_handle = obj->uobject.id; + resp.base.cqe = cq->cqe; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_copy; - } + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + + ret = cb(file, obj, &resp, ucore, context); + if (ret) + goto err_cb; mutex_lock(&file->mutex); list_add_tail(&obj->uobject.list, &file->ucontext->cq_list); @@ -1414,9 +1417,9 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, up_write(&obj->uobject.mutex); - return in_len; + return obj; -err_copy: +err_cb: idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); err_free: @@ -1428,7 +1431,106 @@ err_file: err: put_uobj_write(&obj->uobject); - return ret; + + return ERR_PTR(ret); +} + +static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file, + struct ib_ucq_object *obj, + struct ib_uverbs_ex_create_cq_resp *resp, + struct ib_udata *ucore, void *context) +{ + if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) + return -EFAULT; + + return 0; +} + +ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_cq cmd; + struct ib_uverbs_ex_create_cq cmd_ex; + struct ib_uverbs_create_cq_resp resp; + struct ib_udata ucore; + struct ib_udata uhw; + struct ib_ucq_object *obj; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + INIT_UDATA(&ucore, buf, cmd.response, sizeof(cmd), sizeof(resp)); + + INIT_UDATA(&uhw, buf + sizeof(cmd), + (unsigned long)cmd.response + sizeof(resp), + in_len - sizeof(cmd), out_len - sizeof(resp)); + + memset(&cmd_ex, 0, sizeof(cmd_ex)); + cmd_ex.user_handle = cmd.user_handle; + cmd_ex.cqe = cmd.cqe; + cmd_ex.comp_vector = cmd.comp_vector; + cmd_ex.comp_channel = cmd.comp_channel; + + obj = create_cq(file, &ucore, &uhw, &cmd_ex, + offsetof(typeof(cmd_ex), comp_channel) + + sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb, + NULL); + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + return in_len; +} + +static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file, + struct ib_ucq_object *obj, + struct ib_uverbs_ex_create_cq_resp *resp, + struct ib_udata *ucore, void *context) +{ + if (ib_copy_to_udata(ucore, resp, resp->response_length)) + return -EFAULT; + + return 0; +} + +int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_cq_resp resp; + struct ib_uverbs_ex_create_cq cmd; + struct ib_ucq_object *obj; + int err; + + if (ucore->inlen < sizeof(cmd)) + return -EINVAL; + + err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + if (err) + return err; + + if (cmd.comp_mask) + return -EINVAL; + + if (cmd.reserved) + return -EINVAL; + + if (ucore->outlen < (offsetof(typeof(resp), response_length) + + sizeof(resp.response_length))) + return -ENOSPC; + + obj = create_cq(file, ucore, uhw, &cmd, + min(ucore->inlen, sizeof(cmd)), + ib_uverbs_ex_create_cq_cb, NULL); + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + return 0; } ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, @@ -3324,7 +3426,9 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, if (ucore->outlen < resp.response_length) return -ENOSPC; - err = device->query_device(device, &attr); + memset(&attr, 0, sizeof(attr)); + + err = device->query_device(device, &attr, uhw); if (err) return err; @@ -3348,6 +3452,18 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, #endif resp.response_length += sizeof(resp.odp_caps); + if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask)) + goto end; + + resp.timestamp_mask = attr.timestamp_mask; + resp.response_length += sizeof(resp.timestamp_mask); + + if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock)) + goto end; + + resp.hca_core_clock = attr.hca_core_clock; + resp.response_length += sizeof(resp.hca_core_clock); + end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); if (err) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 88cce9b..f6eef2d 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -124,6 +124,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, + [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, }; static void ib_uverbs_add_one(struct ib_device *device); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index f93eb8d..bac3fb4 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -48,6 +48,71 @@ #include "core_priv.h" +static const char * const ib_events[] = { + [IB_EVENT_CQ_ERR] = "CQ error", + [IB_EVENT_QP_FATAL] = "QP fatal error", + [IB_EVENT_QP_REQ_ERR] = "QP request error", + [IB_EVENT_QP_ACCESS_ERR] = "QP access error", + [IB_EVENT_COMM_EST] = "communication established", + [IB_EVENT_SQ_DRAINED] = "send queue drained", + [IB_EVENT_PATH_MIG] = "path migration successful", + [IB_EVENT_PATH_MIG_ERR] = "path migration error", + [IB_EVENT_DEVICE_FATAL] = "device fatal error", + [IB_EVENT_PORT_ACTIVE] = "port active", + [IB_EVENT_PORT_ERR] = "port error", + [IB_EVENT_LID_CHANGE] = "LID change", + [IB_EVENT_PKEY_CHANGE] = "P_key change", + [IB_EVENT_SM_CHANGE] = "SM change", + [IB_EVENT_SRQ_ERR] = "SRQ error", + [IB_EVENT_SRQ_LIMIT_REACHED] = "SRQ limit reached", + [IB_EVENT_QP_LAST_WQE_REACHED] = "last WQE reached", + [IB_EVENT_CLIENT_REREGISTER] = "client reregister", + [IB_EVENT_GID_CHANGE] = "GID changed", +}; + +const char *ib_event_msg(enum ib_event_type event) +{ + size_t index = event; + + return (index < ARRAY_SIZE(ib_events) && ib_events[index]) ? + ib_events[index] : "unrecognized event"; +} +EXPORT_SYMBOL(ib_event_msg); + +static const char * const wc_statuses[] = { + [IB_WC_SUCCESS] = "success", + [IB_WC_LOC_LEN_ERR] = "local length error", + [IB_WC_LOC_QP_OP_ERR] = "local QP operation error", + [IB_WC_LOC_EEC_OP_ERR] = "local EE context operation error", + [IB_WC_LOC_PROT_ERR] = "local protection error", + [IB_WC_WR_FLUSH_ERR] = "WR flushed", + [IB_WC_MW_BIND_ERR] = "memory management operation error", + [IB_WC_BAD_RESP_ERR] = "bad response error", + [IB_WC_LOC_ACCESS_ERR] = "local access error", + [IB_WC_REM_INV_REQ_ERR] = "invalid request error", + [IB_WC_REM_ACCESS_ERR] = "remote access error", + [IB_WC_REM_OP_ERR] = "remote operation error", + [IB_WC_RETRY_EXC_ERR] = "transport retry counter exceeded", + [IB_WC_RNR_RETRY_EXC_ERR] = "RNR retry counter exceeded", + [IB_WC_LOC_RDD_VIOL_ERR] = "local RDD violation error", + [IB_WC_REM_INV_RD_REQ_ERR] = "remote invalid RD request", + [IB_WC_REM_ABORT_ERR] = "operation aborted", + [IB_WC_INV_EECN_ERR] = "invalid EE context number", + [IB_WC_INV_EEC_STATE_ERR] = "invalid EE context state", + [IB_WC_FATAL_ERR] = "fatal error", + [IB_WC_RESP_TIMEOUT_ERR] = "response timeout error", + [IB_WC_GENERAL_ERR] = "general error", +}; + +const char *ib_wc_status_msg(enum ib_wc_status status) +{ + size_t index = status; + + return (index < ARRAY_SIZE(wc_statuses) && wc_statuses[index]) ? + wc_statuses[index] : "unrecognized status"; +} +EXPORT_SYMBOL(ib_wc_status_msg); + __attribute_const__ int ib_rate_to_mult(enum ib_rate rate) { switch (rate) { @@ -192,17 +257,16 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } EXPORT_SYMBOL(ib_create_ah); -int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, - struct ib_grh *grh, struct ib_ah_attr *ah_attr) +int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, + const struct ib_wc *wc, const struct ib_grh *grh, + struct ib_ah_attr *ah_attr) { u32 flow_class; u16 gid_index; int ret; - int is_eth = (rdma_port_get_link_layer(device, port_num) == - IB_LINK_LAYER_ETHERNET); memset(ah_attr, 0, sizeof *ah_attr); - if (is_eth) { + if (rdma_cap_eth_ah(device, port_num)) { if (!(wc->wc_flags & IB_WC_GRH)) return -EPROTOTYPE; @@ -244,8 +308,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, } EXPORT_SYMBOL(ib_init_ah_from_wc); -struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, - struct ib_grh *grh, u8 port_num) +struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, + const struct ib_grh *grh, u8 port_num) { struct ib_ah_attr ah_attr; int ret; @@ -871,7 +935,7 @@ int ib_resolve_eth_l2_attrs(struct ib_qp *qp, union ib_gid sgid; if ((*qp_attr_mask & IB_QP_AV) && - (rdma_port_get_link_layer(qp->device, qp_attr->ah_attr.port_num) == IB_LINK_LAYER_ETHERNET)) { + (rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))) { ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num, qp_attr->ah_attr.grh.sgid_index, &sgid); if (ret) @@ -1012,11 +1076,12 @@ EXPORT_SYMBOL(ib_destroy_qp); struct ib_cq *ib_create_cq(struct ib_device *device, ib_comp_handler comp_handler, void (*event_handler)(struct ib_event *, void *), - void *cq_context, int cqe, int comp_vector) + void *cq_context, + const struct ib_cq_init_attr *cq_attr) { struct ib_cq *cq; - cq = device->create_cq(device, cqe, comp_vector, NULL, NULL); + cq = device->create_cq(device, cq_attr, NULL, NULL); if (!IS_ERR(cq)) { cq->device = device; diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index bdf3507..25c3f00 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -63,13 +63,16 @@ #include "c2_provider.h" #include "c2_user.h" -static int c2_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) +static int c2_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct c2_dev *c2dev = to_c2dev(ibdev); pr_debug("%s:%u\n", __func__, __LINE__); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + *props = c2dev->props; return 0; } @@ -286,13 +289,18 @@ static int c2_destroy_qp(struct ib_qp *ib_qp) return 0; } -static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries, int vector, +static struct ib_cq *c2_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) { + int entries = attr->cqe; struct c2_cq *cq; int err; + if (attr->flags) + return ERR_PTR(-EINVAL); + cq = kmalloc(sizeof(*cq), GFP_KERNEL); if (!cq) { pr_debug("%s: Unable to allocate CQ\n", __func__); @@ -582,9 +590,13 @@ static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) static int c2_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in_mad, + size_t in_mad_size, + struct ib_mad_hdr *out_mad, + size_t *out_mad_size, + u16 *out_mad_pkey_index) { pr_debug("%s:%u\n", __func__, __LINE__); return -ENOSYS; @@ -757,6 +769,23 @@ static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev) return netdev; } +static int c2_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = c2_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + return 0; +} + int c2_register_device(struct c2_dev *dev) { int ret = -ENOMEM; @@ -820,6 +849,7 @@ int c2_register_device(struct c2_dev *dev) dev->ibdev.reg_phys_mr = c2_reg_phys_mr; dev->ibdev.reg_user_mr = c2_reg_user_mr; dev->ibdev.dereg_mr = c2_dereg_mr; + dev->ibdev.get_port_immutable = c2_port_immutable; dev->ibdev.alloc_fmr = NULL; dev->ibdev.unmap_fmr = NULL; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 811b24a..b1b7323 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -85,9 +85,13 @@ static int iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) static int iwch_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in_mad, + size_t in_mad_size, + struct ib_mad_hdr *out_mad, + size_t *out_mad_size, + u16 *out_mad_pkey_index) { return -ENOSYS; } @@ -138,10 +142,12 @@ static int iwch_destroy_cq(struct ib_cq *ib_cq) return 0; } -static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int vector, - struct ib_ucontext *ib_context, - struct ib_udata *udata) +static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_context, + struct ib_udata *udata) { + int entries = attr->cqe; struct iwch_dev *rhp; struct iwch_cq *chp; struct iwch_create_cq_resp uresp; @@ -151,6 +157,9 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve size_t resplen; PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries); + if (attr->flags) + return ERR_PTR(-EINVAL); + rhp = to_iwch_dev(ibdev); chp = kzalloc(sizeof(*chp), GFP_KERNEL); if (!chp) @@ -1145,13 +1154,17 @@ static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev) (fw_mic & 0xffff); } -static int iwch_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) +static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct iwch_dev *dev; + PDBG("%s ibdev %p\n", __func__, ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + dev = to_iwch_dev(ibdev); memset(props, 0, sizeof *props); memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); @@ -1343,6 +1356,23 @@ static struct device_attribute *iwch_class_attributes[] = { &dev_attr_board_id, }; +static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = iwch_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + return 0; +} + int iwch_register_device(struct iwch_dev *dev) { int ret; @@ -1420,6 +1450,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.post_recv = iwch_post_receive; dev->ibdev.get_protocol_stats = iwch_get_mib; dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; + dev->ibdev.get_port_immutable = iwch_port_immutable; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 68ddb37..c7aab48 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -156,19 +156,17 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, goto err4; cq->gen = 1; + cq->gts = rdev->lldi.gts_reg; cq->rdev = rdev; - if (user) { - u32 off = (cq->cqid << rdev->cqshift) & PAGE_MASK; - cq->ugts = (u64)rdev->bar2_pa + off; - } else if (is_t4(rdev->lldi.adapter_type)) { - cq->gts = rdev->lldi.gts_reg; - cq->qid_mask = -1U; - } else { - u32 off = ((cq->cqid << rdev->cqshift) & PAGE_MASK) + 12; - - cq->gts = rdev->bar2_kva + off; - cq->qid_mask = rdev->qpmask; + cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS, + &cq->bar2_qid, + user ? &cq->bar2_pa : NULL); + if (user && !cq->bar2_va) { + pr_warn(MOD "%s: cqid %u not in BAR2 range.\n", + pci_name(rdev->lldi.pdev), cq->cqid); + ret = -EINVAL; + goto err4; } return 0; err4: @@ -866,10 +864,13 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq) return 0; } -struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *ib_context, +struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_context, struct ib_udata *udata) { + int entries = attr->cqe; + int vector = attr->comp_vector; struct c4iw_dev *rhp; struct c4iw_cq *chp; struct c4iw_create_cq_resp uresp; @@ -879,6 +880,8 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, struct c4iw_mm_entry *mm, *mm2; PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries); + if (attr->flags) + return ERR_PTR(-EINVAL); rhp = to_c4iw_dev(ibdev); @@ -971,7 +974,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, insert_mmap(ucontext, mm); mm2->key = uresp.gts_key; - mm2->addr = chp->cq.ugts; + mm2->addr = chp->cq.bar2_pa; mm2->len = PAGE_SIZE; insert_mmap(ucontext, mm2); } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 7e895d7..1a29739 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -795,13 +795,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) goto err1; } - /* - * qpshift is the number of bits to shift the qpid left in order - * to get the correct address of the doorbell for that qp. - */ - rdev->qpshift = PAGE_SHIFT - ilog2(rdev->lldi.udb_density); rdev->qpmask = rdev->lldi.udb_density - 1; - rdev->cqshift = PAGE_SHIFT - ilog2(rdev->lldi.ucq_density); rdev->cqmask = rdev->lldi.ucq_density - 1; PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d " "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x " @@ -815,14 +809,12 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.start, rdev->lldi.vr->cq.size); - PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu " - "qpmask 0x%x cqshift %lu cqmask 0x%x\n", + PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p " + "qpmask 0x%x cqmask 0x%x\n", (unsigned)pci_resource_len(rdev->lldi.pdev, 2), (void *)pci_resource_start(rdev->lldi.pdev, 2), - rdev->lldi.db_reg, - rdev->lldi.gts_reg, - rdev->qpshift, rdev->qpmask, - rdev->cqshift, rdev->cqmask); + rdev->lldi.db_reg, rdev->lldi.gts_reg, + rdev->qpmask, rdev->cqmask); if (c4iw_num_stags(rdev) == 0) { err = -EINVAL; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 97bb555..cc77844 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -165,9 +165,7 @@ struct wr_log_entry { struct c4iw_rdev { struct c4iw_resource resource; - unsigned long qpshift; u32 qpmask; - unsigned long cqshift; u32 cqmask; struct c4iw_dev_ucontext uctx; struct gen_pool *pbl_pool; @@ -992,10 +990,10 @@ int c4iw_reregister_phys_mem(struct ib_mr *mr, int acc, u64 *iova_start); int c4iw_dereg_mr(struct ib_mr *ib_mr); int c4iw_destroy_cq(struct ib_cq *ib_cq); -struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, - int vector, - struct ib_ucontext *ib_context, - struct ib_udata *udata); +struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_context, + struct ib_udata *udata); int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int c4iw_destroy_qp(struct ib_qp *ib_qp); @@ -1032,6 +1030,9 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe); extern struct cxgb4_client t4c_client; extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; +void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, + enum cxgb4_bar2_qtype qtype, + unsigned int *pbar2_qid, u64 *pbar2_pa); extern void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe); extern int c4iw_wr_log; extern int db_fc_threshold; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 66bd6a2..62c816a 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -80,9 +80,13 @@ static int c4iw_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags, - u8 port_num, struct ib_wc *in_wc, - struct ib_grh *in_grh, struct ib_mad *in_mad, - struct ib_mad *out_mad) + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in_mad, + size_t in_mad_size, + struct ib_mad_hdr *out_mad, + size_t *out_mad_size, + u16 *out_mad_pkey_index) { return -ENOSYS; } @@ -301,13 +305,17 @@ static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index, return 0; } -static int c4iw_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) +static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct c4iw_dev *dev; + PDBG("%s ibdev %p\n", __func__, ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + dev = to_c4iw_dev(ibdev); memset(props, 0, sizeof *props); memcpy(&props->sys_image_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); @@ -465,6 +473,23 @@ static struct device_attribute *c4iw_class_attributes[] = { &dev_attr_board_id, }; +static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = c4iw_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + return 0; +} + int c4iw_register_device(struct c4iw_dev *dev) { int ret; @@ -542,6 +567,7 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.post_recv = c4iw_post_receive; dev->ibdev.get_protocol_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; + dev->ibdev.get_port_immutable = c4iw_port_immutable; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 389ced3..6517e12 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -165,6 +165,29 @@ static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, return 0; } +/* + * Determine the BAR2 virtual address and qid. If pbar2_pa is not NULL, + * then this is a user mapping so compute the page-aligned physical address + * for mapping. + */ +void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, + enum cxgb4_bar2_qtype qtype, + unsigned int *pbar2_qid, u64 *pbar2_pa) +{ + u64 bar2_qoffset; + int ret; + + ret = cxgb4_bar2_sge_qregs(rdev->lldi.ports[0], qid, qtype, + pbar2_pa ? 1 : 0, + &bar2_qoffset, pbar2_qid); + if (ret) + return NULL; + + if (pbar2_pa) + *pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK; + return rdev->bar2_kva + bar2_qoffset; +} + static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct t4_cq *rcq, struct t4_cq *scq, struct c4iw_dev_ucontext *uctx) @@ -236,25 +259,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); wq->db = rdev->lldi.db_reg; - wq->gts = rdev->lldi.gts_reg; - if (user || is_t5(rdev->lldi.adapter_type)) { - u32 off; - off = (wq->sq.qid << rdev->qpshift) & PAGE_MASK; - if (user) { - wq->sq.udb = (u64 __iomem *)(rdev->bar2_pa + off); - } else { - off += 128 * (wq->sq.qid & rdev->qpmask) + 8; - wq->sq.udb = (u64 __iomem *)(rdev->bar2_kva + off); - } - off = (wq->rq.qid << rdev->qpshift) & PAGE_MASK; - if (user) { - wq->rq.udb = (u64 __iomem *)(rdev->bar2_pa + off); - } else { - off += 128 * (wq->rq.qid & rdev->qpmask) + 8; - wq->rq.udb = (u64 __iomem *)(rdev->bar2_kva + off); - } + wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS, + &wq->sq.bar2_qid, + user ? &wq->sq.bar2_pa : NULL); + wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, T4_BAR2_QTYPE_EGRESS, + &wq->rq.bar2_qid, + user ? &wq->rq.bar2_pa : NULL); + + /* + * User mode must have bar2 access. + */ + if (user && (!wq->sq.bar2_va || !wq->rq.bar2_va)) { + pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n", + pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid); + goto free_dma; } + wq->rdev = rdev; wq->rq.msn = 1; @@ -336,10 +357,9 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, if (ret) goto free_dma; - PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%lx rqudb 0x%lx\n", + PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n", __func__, wq->sq.qid, wq->rq.qid, wq->db, - (__force unsigned long) wq->sq.udb, - (__force unsigned long) wq->rq.udb); + wq->sq.bar2_va, wq->rq.bar2_va); return 0; free_dma: @@ -1766,11 +1786,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); insert_mmap(ucontext, mm2); mm3->key = uresp.sq_db_gts_key; - mm3->addr = (__force unsigned long)qhp->wq.sq.udb; + mm3->addr = (__force unsigned long)qhp->wq.sq.bar2_pa; mm3->len = PAGE_SIZE; insert_mmap(ucontext, mm3); mm4->key = uresp.rq_db_gts_key; - mm4->addr = (__force unsigned long)qhp->wq.rq.udb; + mm4->addr = (__force unsigned long)qhp->wq.rq.bar2_pa; mm4->len = PAGE_SIZE; insert_mmap(ucontext, mm4); if (mm5) { diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 7f2a6c2..274a7ab 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -33,6 +33,7 @@ #include "t4_hw.h" #include "t4_regs.h" +#include "t4_values.h" #include "t4_msg.h" #include "t4fw_ri_api.h" @@ -290,8 +291,10 @@ struct t4_sq { unsigned long phys_addr; struct t4_swsqe *sw_sq; struct t4_swsqe *oldest_read; - u64 __iomem *udb; + void __iomem *bar2_va; + u64 bar2_pa; size_t memsize; + u32 bar2_qid; u32 qid; u16 in_use; u16 size; @@ -314,8 +317,10 @@ struct t4_rq { dma_addr_t dma_addr; DEFINE_DMA_UNMAP_ADDR(mapping); struct t4_swrqe *sw_rq; - u64 __iomem *udb; + void __iomem *bar2_va; + u64 bar2_pa; size_t memsize; + u32 bar2_qid; u32 qid; u32 msn; u32 rqt_hwaddr; @@ -332,7 +337,6 @@ struct t4_wq { struct t4_sq sq; struct t4_rq rq; void __iomem *db; - void __iomem *gts; struct c4iw_rdev *rdev; int flushed; }; @@ -457,15 +461,18 @@ static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5, /* Flush host queue memory writes. */ wmb(); - if (t5) { - if (inc == 1 && wqe) { + if (wq->sq.bar2_va) { + if (inc == 1 && wq->sq.bar2_qid == 0 && wqe) { PDBG("%s: WC wq->sq.pidx = %d\n", __func__, wq->sq.pidx); - pio_copy(wq->sq.udb + 7, (void *)wqe); + pio_copy((u64 __iomem *) + (wq->sq.bar2_va + SGE_UDB_WCDOORBELL), + (u64 *)wqe); } else { PDBG("%s: DB wq->sq.pidx = %d\n", __func__, wq->sq.pidx); - writel(PIDX_T5_V(inc), wq->sq.udb); + writel(PIDX_T5_V(inc) | QID_V(wq->sq.bar2_qid), + wq->sq.bar2_va + SGE_UDB_KDOORBELL); } /* Flush user doorbell area writes. */ @@ -481,15 +488,18 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5, /* Flush host queue memory writes. */ wmb(); - if (t5) { - if (inc == 1 && wqe) { + if (wq->rq.bar2_va) { + if (inc == 1 && wq->rq.bar2_qid == 0 && wqe) { PDBG("%s: WC wq->rq.pidx = %d\n", __func__, wq->rq.pidx); - pio_copy(wq->rq.udb + 7, (void *)wqe); + pio_copy((u64 __iomem *) + (wq->rq.bar2_va + SGE_UDB_WCDOORBELL), + (void *)wqe); } else { PDBG("%s: DB wq->rq.pidx = %d\n", __func__, wq->rq.pidx); - writel(PIDX_T5_V(inc), wq->rq.udb); + writel(PIDX_T5_V(inc) | QID_V(wq->rq.bar2_qid), + wq->rq.bar2_va + SGE_UDB_KDOORBELL); } /* Flush user doorbell area writes. */ @@ -534,8 +544,10 @@ struct t4_cq { DEFINE_DMA_UNMAP_ADDR(mapping); struct t4_cqe *sw_queue; void __iomem *gts; + void __iomem *bar2_va; + u64 bar2_pa; + u32 bar2_qid; struct c4iw_rdev *rdev; - u64 ugts; size_t memsize; __be64 bits_type_ts; u32 cqid; @@ -552,6 +564,15 @@ struct t4_cq { unsigned long flags; }; +static inline void write_gts(struct t4_cq *cq, u32 val) +{ + if (cq->bar2_va) + writel(val | INGRESSQID_V(cq->bar2_qid), + cq->bar2_va + SGE_UDB_GTS); + else + writel(val | INGRESSQID_V(cq->cqid), cq->gts); +} + static inline int t4_clear_cq_armed(struct t4_cq *cq) { return test_and_clear_bit(CQ_ARMED, &cq->flags); @@ -563,14 +584,12 @@ static inline int t4_arm_cq(struct t4_cq *cq, int se) set_bit(CQ_ARMED, &cq->flags); while (cq->cidx_inc > CIDXINC_M) { - val = SEINTARM_V(0) | CIDXINC_V(CIDXINC_M) | TIMERREG_V(7) | - INGRESSQID_V(cq->cqid & cq->qid_mask); - writel(val, cq->gts); + val = SEINTARM_V(0) | CIDXINC_V(CIDXINC_M) | TIMERREG_V(7); + write_gts(cq, val); cq->cidx_inc -= CIDXINC_M; } - val = SEINTARM_V(se) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(6) | - INGRESSQID_V(cq->cqid & cq->qid_mask); - writel(val, cq->gts); + val = SEINTARM_V(se) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(6); + write_gts(cq, val); cq->cidx_inc = 0; return 0; } @@ -601,9 +620,8 @@ static inline void t4_hwcq_consume(struct t4_cq *cq) if (++cq->cidx_inc == (cq->size >> 4) || cq->cidx_inc == CIDXINC_M) { u32 val; - val = SEINTARM_V(0) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(7) | - INGRESSQID_V(cq->cqid & cq->qid_mask); - writel(val, cq->gts); + val = SEINTARM_V(0) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(7); + write_gts(cq, val); cq->cidx_inc = 0; } if (++cq->cidx == cq->size) { diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 8cc8375..9b68b17 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -113,10 +113,12 @@ struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) return ret; } -struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, +struct ib_cq *ehca_create_cq(struct ib_device *device, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) { + int cqe = attr->cqe; static const u32 additional_cqe = 20; struct ib_cq *cq; struct ehca_cq *my_cq; @@ -131,6 +133,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, int ipz_rc, i; unsigned long flags; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) return ERR_PTR(-EINVAL); diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index 9ed4d25..e8b1bb6 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -50,7 +50,8 @@ static unsigned int limit_uint(unsigned int value) return min_t(unsigned int, value, INT_MAX); } -int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { int i, ret = 0; struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, @@ -71,6 +72,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT, }; + if (uhw->inlen || uhw->outlen) + return -EINVAL; + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 22f79af..80e6a3d 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -44,11 +44,15 @@ #include "ehca_classes.h" -int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props); +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw); int ehca_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); +enum rdma_protocol_type +ehca_query_protocol(struct ib_device *device, u8 port_num); + int ehca_query_sma_attr(struct ehca_shca *shca, u8 port, struct ehca_sma_attr *attr); @@ -126,7 +130,8 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq); void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq); -struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, +struct ib_cq *ehca_create_cq(struct ib_device *device, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); @@ -188,9 +193,10 @@ int ehca_dealloc_ucontext(struct ib_ucontext *context); int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, - struct ib_mad *out_mad); + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); void ehca_poll_eqs(unsigned long data); diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index cd8d290..8246418 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -46,6 +46,7 @@ #include <linux/notifier.h> #include <linux/memory.h> +#include <rdma/ib_mad.h> #include "ehca_classes.h" #include "ehca_iverbs.h" #include "ehca_mrmw.h" @@ -431,6 +432,24 @@ init_node_guid1: return ret; } +static int ehca_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = ehca_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + static int ehca_init_device(struct ehca_shca *shca) { int ret; @@ -510,6 +529,7 @@ static int ehca_init_device(struct ehca_shca *shca) shca->ib_device.process_mad = ehca_process_mad; shca->ib_device.mmap = ehca_mmap; shca->ib_device.dma_ops = &ehca_dma_mapping_ops; + shca->ib_device.get_port_immutable = ehca_port_immutable; if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { shca->ib_device.uverbs_cmd_mask |= @@ -534,6 +554,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) struct ib_cq *ibcq; struct ib_qp *ibqp; struct ib_qp_init_attr qp_init_attr; + struct ib_cq_init_attr cq_attr = {}; int ret; if (sport->ibcq_aqp1) { @@ -541,7 +562,9 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) return -EPERM; } - ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), 10, 0); + cq_attr.cqe = 10; + ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), + &cq_attr); if (IS_ERR(ibcq)) { ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); return PTR_ERR(ibcq); diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c index dba8f9f..12b5bc2 100644 --- a/drivers/infiniband/hw/ehca/ehca_sqp.c +++ b/drivers/infiniband/hw/ehca/ehca_sqp.c @@ -140,10 +140,10 @@ struct vertcfl { } __attribute__ ((packed)); static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in_mad, struct ib_mad *out_mad) { - struct ib_perf *in_perf = (struct ib_perf *)in_mad; + const struct ib_perf *in_perf = (const struct ib_perf *)in_mad; struct ib_perf *out_perf = (struct ib_perf *)out_mad; struct ib_class_port_info *poi = (struct ib_class_port_info *)out_perf->data; @@ -187,8 +187,8 @@ static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, /* if request was globally routed, copy route info */ if (in_grh) { - struct vertcfl *vertcfl = - (struct vertcfl *)&in_grh->version_tclass_flow; + const struct vertcfl *vertcfl = + (const struct vertcfl *)&in_grh->version_tclass_flow; memcpy(poi->redirect_gid, in_grh->dgid.raw, sizeof(poi->redirect_gid)); tcslfl->tc = vertcfl->tc; @@ -217,10 +217,17 @@ perf_reply: } int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { int ret; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + BUG_ON(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad)); if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) return IB_MAD_RESULT_FAILURE; diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 0416c6c..e9dd911 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -188,7 +188,7 @@ static void send_complete(unsigned long data) /** * ipath_create_cq - create a completion queue * @ibdev: the device this completion queue is attached to - * @entries: the minimum size of the completion queue + * @attr: creation attributes * @context: unused by the InfiniPath driver * @udata: unused by the InfiniPath driver * @@ -197,16 +197,21 @@ static void send_complete(unsigned long data) * * Called by ib_create_cq() in the generic verbs code. */ -struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector, +struct ib_cq *ipath_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) { + int entries = attr->cqe; struct ipath_ibdev *dev = to_idev(ibdev); struct ipath_cq *cq; struct ipath_cq_wc *wc; struct ib_cq *ret; u32 sz; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (entries < 1 || entries > ib_ipath_max_cqes) { ret = ERR_PTR(-EINVAL); goto done; diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index e890e5b..948188e 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -1257,7 +1257,7 @@ static int recv_pma_set_portcounters_ext(struct ib_pma_mad *pmp, } static int process_subn(struct ib_device *ibdev, int mad_flags, - u8 port_num, struct ib_mad *in_mad, + u8 port_num, const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_smp *smp = (struct ib_smp *)out_mad; @@ -1389,7 +1389,7 @@ bail: } static int process_perf(struct ib_device *ibdev, u8 port_num, - struct ib_mad *in_mad, + const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad; @@ -1490,10 +1490,17 @@ bail: * This is called by the ib_mad module. */ int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { int ret; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + BUG_ON(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad)); switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 44ea939..48253b8 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1495,11 +1495,14 @@ bail: return 0; } -static int ipath_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) +static int ipath_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct ipath_ibdev *dev = to_idev(ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + memset(props, 0, sizeof(*props)); props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | @@ -1980,6 +1983,24 @@ static int disable_timer(struct ipath_devdata *dd) return 0; } +static int ipath_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = ipath_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + /** * ipath_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -2179,6 +2200,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) dev->process_mad = ipath_process_mad; dev->mmap = ipath_mmap; dev->dma_ops = &ipath_dma_mapping_ops; + dev->get_port_immutable = ipath_port_immutable; snprintf(dev->node_desc, sizeof(dev->node_desc), IPATH_IDSTR " %s", init_utsname()->nodename); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index ae6cff4..ec167e5 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -701,9 +701,11 @@ static inline void ipath_schedule_send(struct ipath_qp *qp) int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad); + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); /* * Compare the lower 24 bits of the two values. @@ -807,7 +809,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); -struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector, +struct ib_cq *ipath_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 0176caa..36eb3d0 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -166,10 +166,14 @@ err_buf: return err; } -struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, +#define CQ_CREATE_FLAGS_SUPPORTED IB_CQ_FLAGS_TIMESTAMP_COMPLETION +struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) { + int entries = attr->cqe; + int vector = attr->comp_vector; struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_cq *cq; struct mlx4_uar *uar; @@ -178,6 +182,9 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector if (entries < 1 || entries > dev->dev->caps.max_cqes) return ERR_PTR(-EINVAL); + if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED) + return ERR_PTR(-EINVAL); + cq = kmalloc(sizeof *cq, GFP_KERNEL); if (!cq) return ERR_PTR(-ENOMEM); @@ -188,6 +195,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector spin_lock_init(&cq->lock); cq->resize_buf = NULL; cq->resize_umem = NULL; + cq->create_flags = attr->flags; INIT_LIST_HEAD(&cq->send_qp_list); INIT_LIST_HEAD(&cq->recv_qp_list); @@ -231,7 +239,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector vector = dev->eq_table[vector % ibdev->num_comp_vectors]; err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, - cq->db.dma, &cq->mcq, vector, 0, 0); + cq->db.dma, &cq->mcq, vector, 0, + !!(cq->create_flags & IB_CQ_FLAGS_TIMESTAMP_COMPLETION)); if (err) goto err_dbmap; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 9cd2b00..3e2dee4 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -111,8 +111,9 @@ __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx) } int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, - int port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad) + int port, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const void *in_mad, void *response_mad) { struct mlx4_cmd_mailbox *inmailbox, *outmailbox; void *inbox; @@ -220,7 +221,7 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) * Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can * synthesize LID change, Client-Rereg, GID change, and P_Key change events. */ -static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, +static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad *mad, u16 prev_lid) { struct ib_port_info *pinfo; @@ -356,7 +357,7 @@ static void node_desc_override(struct ib_device *dev, } } -static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *mad) +static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, const struct ib_mad *mad) { int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; struct ib_mad_send_buf *send_buf; @@ -366,7 +367,8 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma if (agent) { send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, - IB_MGMT_MAD_DATA, GFP_ATOMIC); + IB_MGMT_MAD_DATA, GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(send_buf)) return; /* @@ -722,8 +724,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, } static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in_mad, struct ib_mad *out_mad) { u16 slid, prev_lid = 0; int err; @@ -825,8 +827,8 @@ static void edit_counter(struct mlx4_counter *cnt, } static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_ib_dev *dev = to_mdev(ibdev); @@ -866,9 +868,17 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + BUG_ON(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad)); + switch (rdma_port_get_link_layer(ibdev, port_num)) { case IB_LINK_LAYER_INFINIBAND: return ib_process_mad(ibdev, mad_flags, port_num, in_wc, @@ -1773,6 +1783,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, int create_tun, struct mlx4_ib_demux_pv_ctx *ctx) { int ret, cq_size; + struct ib_cq_init_attr cq_attr = {}; if (ctx->state != DEMUX_PV_STATE_DOWN) return -EEXIST; @@ -1801,8 +1812,9 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, if (ctx->has_smi) cq_size *= 2; + cq_attr.cqe = cq_size; ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler, - NULL, ctx, cq_size, 0); + NULL, ctx, &cq_attr); if (IS_ERR(ctx->cq)) { ret = PTR_ERR(ctx->cq); pr_err("Couldn't create tunnel CQ (%d)\n", ret); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index cc64400..166da78 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -132,14 +132,35 @@ static int num_ib_ports(struct mlx4_dev *dev) } static int mlx4_ib_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) + struct ib_device_attr *props, + struct ib_udata *uhw) { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; int have_ib_ports; + struct mlx4_uverbs_ex_query_device cmd; + struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0}; + struct mlx4_clock_params clock_params; + if (uhw->inlen) { + if (uhw->inlen < sizeof(cmd)) + return -EINVAL; + + err = ib_copy_from_udata(&cmd, uhw, sizeof(cmd)); + if (err) + return err; + + if (cmd.comp_mask) + return -EINVAL; + + if (cmd.reserved) + return -EINVAL; + } + + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) @@ -229,7 +250,24 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; + props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL; + props->timestamp_mask = 0xFFFFFFFFFFFFULL; + + err = mlx4_get_internal_clock_params(dev->dev, &clock_params); + if (err) + goto out; + + if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) { + resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE; + resp.response_length += sizeof(resp.hca_core_clock_offset); + resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP; + } + if (uhw->outlen) { + err = ib_copy_to_udata(uhw, &resp, resp.response_length); + if (err) + goto out; + } out: kfree(in_mad); kfree(out_mad); @@ -712,8 +750,24 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) dev->dev->caps.num_uars, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; - } else + } else if (vma->vm_pgoff == 3) { + struct mlx4_clock_params params; + int ret = mlx4_get_internal_clock_params(dev->dev, ¶ms); + + if (ret) + return ret; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (io_remap_pfn_range(vma, vma->vm_start, + (pci_resource_start(dev->dev->persist->pdev, + params.bar) + + params.offset) + >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + } else { return -EINVAL; + } return 0; } @@ -758,6 +812,7 @@ static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx4_ib_xrcd *xrcd; + struct ib_cq_init_attr cq_attr = {}; int err; if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) @@ -777,7 +832,8 @@ static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, goto err2; } - xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0); + cq_attr.cqe = 1; + xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr); if (IS_ERR(xrcd->cq)) { err = PTR_ERR(xrcd->cq); goto err3; @@ -1185,7 +1241,6 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, &mflow->reg_id[i].id); if (err) goto err_create_flow; - i++; if (is_bonded) { /* Application always sees one port so the mirror rule * must be on port #2 @@ -1200,6 +1255,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, j++; } + i++; } if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) { @@ -1207,7 +1263,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, &mflow->reg_id[i].id); if (err) goto err_create_flow; - i++; + if (is_bonded) { flow_attr->port = 2; err = mlx4_ib_tunnel_steer_add(qp, flow_attr, @@ -1218,6 +1274,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, j++; } /* function to create mirror rule */ + i++; } return &mflow->ibflow; @@ -2114,6 +2171,29 @@ static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) kfree(ibdev->eq_table); } +static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = mlx4_ib_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + + if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + else + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + static void *mlx4_ib_add(struct mlx4_dev *dev) { struct mlx4_ib_dev *ibdev; @@ -2241,6 +2321,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; + ibdev->ib_dev.get_port_immutable = mlx4_port_immutable; if (!mlx4_is_slave(ibdev->dev)) { ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; @@ -2278,6 +2359,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); } + ibdev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ); + mlx4_ib_alloc_eqs(dev, ibdev); spin_lock_init(&iboe->lock); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index fce39343..7933adf 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -110,6 +110,7 @@ struct mlx4_ib_cq { struct mutex resize_mutex; struct ib_umem *umem; struct ib_umem *resize_umem; + int create_flags; /* List of qps that it serves.*/ struct list_head send_qp_list; struct list_head recv_qp_list; @@ -555,6 +556,21 @@ struct mlx4_ib_qp_tunnel_init_attr { u8 port; }; +struct mlx4_uverbs_ex_query_device { + __u32 comp_mask; + __u32 reserved; +}; + +enum query_device_resp_mask { + QUERY_DEVICE_RESP_MASK_TIMESTAMP = 1UL << 0, +}; + +struct mlx4_uverbs_ex_query_device_resp { + __u32 comp_mask; + __u32 response_length; + __u64 hca_core_clock_offset; +}; + static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx4_ib_dev, ib_dev); @@ -668,7 +684,8 @@ void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); -struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, +struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); int mlx4_ib_destroy_cq(struct ib_cq *cq); @@ -706,11 +723,13 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, - int port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad); + int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const void *in_mad, void *response_mad); int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad); + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); int mlx4_ib_mad_init(struct mlx4_ib_dev *dev); void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 2ee6b10..09fbae6 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -736,10 +736,13 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) mlx5_db_free(dev->mdev, &cq->db); } -struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *context, +struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, struct ib_udata *udata) { + int entries = attr->cqe; + int vector = attr->comp_vector; struct mlx5_create_cq_mbox_in *cqb = NULL; struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_cq *cq; @@ -750,6 +753,9 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, int eqn; int err; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (entries < 0) return ERR_PTR(-EINVAL); diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 9cf9a37..8e45714 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -41,8 +41,8 @@ enum { }; int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad) + u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const void *in_mad, void *response_mad) { u8 op_modifier = 0; @@ -58,11 +58,18 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, } int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { u16 slid; int err; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + BUG_ON(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad)); slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 57c9809..c6cb26e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -63,7 +63,8 @@ static char mlx5_version[] = DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; static int mlx5_ib_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) + struct ib_device_attr *props, + struct ib_udata *uhw) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct ib_smp *in_mad = NULL; @@ -74,6 +75,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, int max_sq_sg; u64 flags; + if (uhw->inlen || uhw->outlen) + return -EINVAL; + gen = &dev->mdev->caps.gen; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); @@ -910,6 +914,7 @@ static int get_port_caps(struct mlx5_ib_dev *dev) struct mlx5_general_caps *gen; int err = -ENOMEM; int port; + struct ib_udata uhw = {.inlen = 0, .outlen = 0}; gen = &dev->mdev->caps.gen; pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); @@ -920,7 +925,7 @@ static int get_port_caps(struct mlx5_ib_dev *dev) if (!dprops) goto out; - err = mlx5_ib_query_device(&dev->ib_dev, dprops); + err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw); if (err) { mlx5_ib_warn(dev, "query_device failed %d\n", err); goto out; @@ -971,6 +976,7 @@ static int create_umr_res(struct mlx5_ib_dev *dev) struct ib_cq *cq; struct ib_qp *qp; struct ib_mr *mr; + struct ib_cq_init_attr cq_attr = {}; int ret; attr = kzalloc(sizeof(*attr), GFP_KERNEL); @@ -994,8 +1000,9 @@ static int create_umr_res(struct mlx5_ib_dev *dev) goto error_1; } - cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, 128, - 0); + cq_attr.cqe = 128; + cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, + &cq_attr); if (IS_ERR(cq)) { mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); ret = PTR_ERR(cq); @@ -1087,6 +1094,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) { struct ib_srq_init_attr attr; struct mlx5_ib_dev *dev; + struct ib_cq_init_attr cq_attr = {.cqe = 1}; int ret = 0; dev = container_of(devr, struct mlx5_ib_dev, devr); @@ -1100,7 +1108,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->p0->uobject = NULL; atomic_set(&devr->p0->usecnt, 0); - devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL); + devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL); if (IS_ERR(devr->c0)) { ret = PTR_ERR(devr->c0); goto error1; @@ -1182,6 +1190,24 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) mlx5_ib_dealloc_pd(devr->p0); } +static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = mlx5_ib_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; @@ -1285,6 +1311,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; + dev->ib_dev.get_port_immutable = mlx5_port_immutable; mlx5_ib_internal_query_odp_caps(dev); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index dff1cfc..178314e 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -525,8 +525,8 @@ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq) void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad); + u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const void *in_mad, void *response_mad); struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, struct mlx5_ib_ah *ah); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); @@ -556,8 +556,9 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, void *buffer, u32 length); -struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *context, +struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, struct ib_udata *udata); int mlx5_ib_destroy_cq(struct ib_cq *cq); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); @@ -586,8 +587,10 @@ int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int mlx5_ib_unmap_fmr(struct list_head *fmr_list); int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad); + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 9d3e5c1..c7f49bb 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1858,8 +1858,8 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn) } int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, - int port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad) + int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const void *in_mad, void *response_mad) { struct mthca_mailbox *inmailbox, *outmailbox; void *inbox; diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index f952244..d2e5b19 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -312,8 +312,8 @@ int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, struct mthca_mailbox *mailbox); int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn); int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, - int port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad); + int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const void *in_mad, void *response_mad); int mthca_READ_MGM(struct mthca_dev *dev, int index, struct mthca_mailbox *mailbox); int mthca_WRITE_MGM(struct mthca_dev *dev, int index, diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 7e6a6d6..4393a022 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -576,10 +576,11 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, - struct ib_mad *out_mad); + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); int mthca_create_agents(struct mthca_dev *dev); void mthca_free_agents(struct mthca_dev *dev); diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 8881fa3..6b2418b 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -104,7 +104,7 @@ static void update_sm_ah(struct mthca_dev *dev, */ static void smp_snoop(struct ib_device *ibdev, u8 port_num, - struct ib_mad *mad, + const struct ib_mad *mad, u16 prev_lid) { struct ib_event event; @@ -160,7 +160,7 @@ static void node_desc_override(struct ib_device *dev, static void forward_trap(struct mthca_dev *dev, u8 port_num, - struct ib_mad *mad) + const struct ib_mad *mad) { int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; struct ib_mad_send_buf *send_buf; @@ -170,7 +170,8 @@ static void forward_trap(struct mthca_dev *dev, if (agent) { send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, - IB_MGMT_MAD_DATA, GFP_ATOMIC); + IB_MGMT_MAD_DATA, GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(send_buf)) return; /* @@ -195,15 +196,21 @@ static void forward_trap(struct mthca_dev *dev, int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, - struct ib_mad *out_mad) + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { int err; u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); u16 prev_lid = 0; struct ib_port_attr pattr; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + BUG_ON(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad)); /* Forward locally generated traps to the SM */ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index 8edb28a..15d0644 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -77,7 +77,6 @@ s64 mthca_make_profile(struct mthca_dev *dev, u64 mem_base, mem_avail; s64 total_size = 0; struct mthca_resource *profile; - struct mthca_resource tmp; int i, j; profile = kzalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL); @@ -136,11 +135,8 @@ s64 mthca_make_profile(struct mthca_dev *dev, */ for (i = MTHCA_RES_NUM; i > 0; --i) for (j = 1; j < i; ++j) { - if (profile[j].size > profile[j - 1].size) { - tmp = profile[j]; - profile[j] = profile[j - 1]; - profile[j - 1] = tmp; - } + if (profile[j].size > profile[j - 1].size) + swap(profile[j], profile[j - 1]); } for (i = 0; i < MTHCA_RES_NUM; ++i) { diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 415f8e1..93ae51d 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -57,14 +57,17 @@ static void init_query_mad(struct ib_smp *mad) mad->method = IB_MGMT_METHOD_GET; } -static int mthca_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) +static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; struct mthca_dev *mdev = to_mdev(ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) @@ -641,16 +644,20 @@ static int mthca_destroy_qp(struct ib_qp *qp) return 0; } -static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, - int comp_vector, +static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) { + int entries = attr->cqe; struct mthca_create_cq ucmd; struct mthca_cq *cq; int nent; int err; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) return ERR_PTR(-EINVAL); @@ -1244,6 +1251,24 @@ out: return err; } +static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = mthca_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + int mthca_register_device(struct mthca_dev *dev) { int ret; @@ -1323,6 +1348,7 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr; dev->ib_dev.reg_user_mr = mthca_reg_user_mr; dev->ib_dev.dereg_mr = mthca_dereg_mr; + dev->ib_dev.get_port_immutable = mthca_port_immutable; if (dev->mthca_flags & MTHCA_FLAG_FMR) { dev->ib_dev.alloc_fmr = mthca_alloc_fmr; diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 72b4341..9047af4 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1616,6 +1616,8 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, &cm_node->loc_addr, cm_node->loc_port, &cm_node->rem_addr, cm_node->rem_port); cm_node->listener = listener; + if (listener) + cm_node->tos = listener->tos; cm_node->netdev = nesvnic->netdev; cm_node->cm_id = cm_info->cm_id; memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN); @@ -2938,6 +2940,9 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod nesqp->nesqp_context->misc2 |= cpu_to_le32(64 << NES_QPCONTEXT_MISC2_TTL_SHIFT); + nesqp->nesqp_context->misc2 |= cpu_to_le32( + cm_node->tos << NES_QPCONTEXT_MISC2_TOS_SHIFT); + nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16); nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32( @@ -3612,6 +3617,7 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->ord_size = 1; cm_node->apbvt_set = apbvt_set; + cm_node->tos = cm_id->tos; nesqp->cm_node = cm_node; cm_node->nesqp = nesqp; nes_add_ref(&nesqp->ibqp); @@ -3666,6 +3672,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) } cm_id->provider_data = cm_node; + cm_node->tos = cm_id->tos; if (!cm_node->reused_node) { if (nes_create_mapinfo(&cm_info)) diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index f522cf6..32a6420 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -303,6 +303,7 @@ struct nes_cm_listener { int backlog; enum nes_cm_listener_state listener_state; u32 reused_node; + u8 tos; }; /* per connection node and node state information */ @@ -352,6 +353,7 @@ struct nes_cm_node { struct list_head reset_entry; struct nes_qp *nesqp; atomic_t passive_state; + u8 tos; }; /* structure for client or CM to fill when making CM api calls. */ diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index c0d0296..fbc43e5 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -512,12 +512,16 @@ static void nes_free_fast_reg_page_list(struct ib_fast_reg_page_list *pifrpl) /** * nes_query_device */ -static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *props) +static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct nes_vnic *nesvnic = to_nesvnic(ibdev); struct nes_device *nesdev = nesvnic->nesdev; struct nes_ib_device *nesibdev = nesvnic->nesibdev; + if (uhw->inlen || uhw->outlen) + return -EINVAL; + memset(props, 0, sizeof(*props)); memcpy(&props->sys_image_guid, nesvnic->netdev->dev_addr, 6); @@ -606,7 +610,6 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr return 0; } - /** * nes_query_pkey */ @@ -1527,10 +1530,12 @@ static int nes_destroy_qp(struct ib_qp *ibqp) /** * nes_create_cq */ -static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, - int comp_vector, - struct ib_ucontext *context, struct ib_udata *udata) +static struct ib_cq *nes_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata) { + int entries = attr->cqe; u64 u64temp; struct nes_vnic *nesvnic = to_nesvnic(ibdev); struct nes_device *nesdev = nesvnic->nesdev; @@ -1550,6 +1555,9 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, unsigned long flags; int ret; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (entries > nesadapter->max_cqe) return ERR_PTR(-EINVAL); @@ -3222,8 +3230,10 @@ static int nes_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) * nes_process_mad */ static int nes_process_mad(struct ib_device *ibdev, int mad_flags, - u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { nes_debug(NES_DBG_INIT, "\n"); return -ENOSYS; @@ -3828,6 +3838,22 @@ static int nes_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_ return 0; } +static int nes_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = nes_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + return 0; +} /** * nes_init_ofa_device @@ -3928,6 +3954,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.iwcm->reject = nes_reject; nesibdev->ibdev.iwcm->create_listen = nes_create_listen; nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; + nesibdev->ibdev.get_port_immutable = nes_port_immutable; return nesibdev; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index f5a5ea836..4bafa15 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -204,12 +204,19 @@ int ocrdma_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) int ocrdma_process_mad(struct ib_device *ibdev, int process_mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { int status; struct ocrdma_dev *dev; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + BUG_ON(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad)); switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_PERF_MGMT: diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 726a87c..cf366fe 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -42,7 +42,9 @@ int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *); int ocrdma_process_mad(struct ib_device *, int process_mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad); + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); #endif /* __OCRDMA_AH_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 7a2b59a..8a1398b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -30,6 +30,7 @@ #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_addr.h> +#include <rdma/ib_mad.h> #include <linux/netdevice.h> #include <net/addrconf.h> @@ -202,6 +203,24 @@ static enum rdma_link_layer ocrdma_link_layer(struct ib_device *device, return IB_LINK_LAYER_ETHERNET; } +static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = ocrdma_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + static int ocrdma_register_device(struct ocrdma_dev *dev) { strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX); @@ -286,6 +305,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.dma_device = &dev->nic_info.pdev->dev; dev->ibdev.process_mad = ocrdma_process_mad; + dev->ibdev.get_port_immutable = ocrdma_port_immutable; if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { dev->ibdev.uverbs_cmd_mask |= diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 9dcb660..5bb61eb 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -61,10 +61,14 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port, return 0; } -int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) +int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, + struct ib_udata *uhw) { struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + memset(attr, 0, sizeof *attr); memcpy(&attr->fw_ver, &dev->attr.fw_ver[0], min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver))); @@ -375,7 +379,12 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, if (dev->pd_mgr->pd_prealloc_valid) { status = ocrdma_get_pd_num(dev, pd); - return (status == 0) ? pd : ERR_PTR(status); + if (status == 0) { + return pd; + } else { + kfree(pd); + return ERR_PTR(status); + } } retry: @@ -679,7 +688,6 @@ err: ocrdma_release_ucontext_pd(uctx); } else { status = _ocrdma_dealloc_pd(dev, pd); - kfree(pd); } exit: return ERR_PTR(status); @@ -1000,10 +1008,12 @@ err: return status; } -struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, +struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *ib_ctx, struct ib_udata *udata) { + int entries = attr->cqe; struct ocrdma_cq *cq; struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); struct ocrdma_ucontext *uctx = NULL; @@ -1011,6 +1021,9 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, int status; struct ocrdma_create_cq_ureq ureq; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (udata) { if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) return ERR_PTR(-EFAULT); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index b8f7853..b15c608 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -36,11 +36,15 @@ int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *, int ocrdma_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags); -int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props); +int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props, + struct ib_udata *uhw); int ocrdma_query_port(struct ib_device *, u8 port, struct ib_port_attr *props); int ocrdma_modify_port(struct ib_device *, u8 port, int mask, struct ib_port_modify *props); +enum rdma_protocol_type +ocrdma_query_protocol(struct ib_device *device, u8 port_num); + void ocrdma_get_guid(struct ocrdma_dev *, u8 *guid); int ocrdma_query_gid(struct ib_device *, u8 port, int index, union ib_gid *gid); @@ -56,8 +60,10 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *, struct ib_ucontext *, struct ib_udata *); int ocrdma_dealloc_pd(struct ib_pd *pd); -struct ib_cq *ocrdma_create_cq(struct ib_device *, int entries, int vector, - struct ib_ucontext *, struct ib_udata *); +struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata); int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); int ocrdma_destroy_cq(struct ib_cq *); diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c index ab4e11c..2b45d0b 100644 --- a/drivers/infiniband/hw/qib/qib_cq.c +++ b/drivers/infiniband/hw/qib/qib_cq.c @@ -203,7 +203,7 @@ static void send_complete(struct kthread_work *work) /** * qib_create_cq - create a completion queue * @ibdev: the device this completion queue is attached to - * @entries: the minimum size of the completion queue + * @attr: creation attributes * @context: unused by the QLogic_IB driver * @udata: user data for libibverbs.so * @@ -212,16 +212,21 @@ static void send_complete(struct kthread_work *work) * * Called by ib_create_cq() in the generic verbs code. */ -struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, - int comp_vector, struct ib_ucontext *context, +struct ib_cq *qib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, struct ib_udata *udata) { + int entries = attr->cqe; struct qib_ibdev *dev = to_idev(ibdev); struct qib_cq *cq; struct qib_cq_wc *wc; struct ib_cq *ret; u32 sz; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (entries < 1 || entries > ib_qib_max_cqes) { ret = ERR_PTR(-EINVAL); goto done; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index f32b462..6c8ff10 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -5502,7 +5502,8 @@ static void try_7322_ipg(struct qib_pportdata *ppd) goto retry; send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR, - IB_MGMT_MAD_DATA, GFP_ATOMIC); + IB_MGMT_MAD_DATA, GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(send_buf)) goto retry; diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 395f404..05e3242 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -83,7 +83,8 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) return; send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR, - IB_MGMT_MAD_DATA, GFP_ATOMIC); + IB_MGMT_MAD_DATA, GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(send_buf)) return; @@ -1854,7 +1855,7 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp, } static int process_subn(struct ib_device *ibdev, int mad_flags, - u8 port, struct ib_mad *in_mad, + u8 port, const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_smp *smp = (struct ib_smp *)out_mad; @@ -2006,7 +2007,7 @@ bail: } static int process_perf(struct ib_device *ibdev, u8 port, - struct ib_mad *in_mad, + const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad; @@ -2299,7 +2300,7 @@ static int check_cc_key(struct qib_ibport *ibp, } static int process_cc(struct ib_device *ibdev, int mad_flags, - u8 port, struct ib_mad *in_mad, + u8 port, const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad; @@ -2400,12 +2401,19 @@ bail: * This is called by the ib_mad module. */ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { int ret; struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + BUG_ON(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad)); switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 4a35998..a05d1a3 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1550,12 +1550,14 @@ full: } } -static int qib_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) +static int qib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct qib_devdata *dd = dd_from_ibdev(ibdev); struct qib_ibdev *dev = to_idev(ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; memset(props, 0, sizeof(*props)); props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | @@ -2040,6 +2042,24 @@ static void init_ibport(struct qib_pportdata *ppd) RCU_INIT_POINTER(ibp->qp1, NULL); } +static int qib_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = qib_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + /** * qib_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -2227,6 +2247,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->process_mad = qib_process_mad; ibdev->mmap = qib_mmap; ibdev->dma_ops = &qib_dma_mapping_ops; + ibdev->get_port_immutable = qib_port_immutable; snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), "Intel Infiniband HCA %s", init_utsname()->nodename); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index bfc8948..1635572 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -872,8 +872,10 @@ void qib_cap_mask_chg(struct qib_ibport *ibp); void qib_sys_guid_chg(struct qib_ibport *ibp); void qib_node_desc_chg(struct qib_ibport *ibp); int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad); + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); int qib_create_agents(struct qib_ibdev *dev); void qib_free_agents(struct qib_ibdev *dev); @@ -1007,8 +1009,9 @@ void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig); int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); -struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, - int comp_vector, struct ib_ucontext *context, +struct ib_cq *qib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, struct ib_udata *udata); int qib_destroy_cq(struct ib_cq *ibcq); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 0d0f986..34c49b8 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -300,6 +300,22 @@ static struct notifier_block usnic_ib_inetaddr_notifier = { }; /* End of inet section*/ +static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = usnic_ib_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + + return 0; +} + /* Start of PF discovery section */ static void *usnic_ib_device_add(struct pci_dev *dev) { @@ -383,6 +399,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq; us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq; us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr; + us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable; if (ib_register_device(&us_ibdev->ib_dev, NULL)) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 53bd6a2..7df4382 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -248,7 +248,8 @@ enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device, } int usnic_ib_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) + struct ib_device_attr *props, + struct ib_udata *uhw) { struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); union ib_gid gid; @@ -257,6 +258,9 @@ int usnic_ib_query_device(struct ib_device *ibdev, int qp_per_vf; usnic_dbg("\n"); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + mutex_lock(&us_ibdev->usdev_lock); us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd); @@ -570,13 +574,17 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, return status; } -struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *context, - struct ib_udata *udata) +struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata) { struct ib_cq *cq; usnic_dbg("\n"); + if (attr->flags) + return ERR_PTR(-EINVAL); + cq = kzalloc(sizeof(*cq), GFP_KERNEL); if (!cq) return ERR_PTR(-EBUSY); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index bb864f5..0bd04ef 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -24,9 +24,12 @@ enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device, u8 port_num); int usnic_ib_query_device(struct ib_device *ibdev, - struct ib_device_attr *props); + struct ib_device_attr *props, + struct ib_udata *uhw); int usnic_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); +enum rdma_protocol_type +usnic_ib_query_protocol(struct ib_device *device, u8 port_num); int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); @@ -44,9 +47,10 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, int usnic_ib_destroy_qp(struct ib_qp *qp); int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); -struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *context, - struct ib_udata *udata); +struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata); int usnic_ib_destroy_cq(struct ib_cq *cq); struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 417de1f..cb2337f 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -472,11 +472,10 @@ struct usnic_uiom_pd *usnic_uiom_alloc_pd(void) return ERR_PTR(-ENOMEM); pd->domain = domain = iommu_domain_alloc(&pci_bus_type); - if (IS_ERR_OR_NULL(domain)) { - usnic_err("Failed to allocate IOMMU domain with err %ld\n", - PTR_ERR(pd->domain)); + if (!domain) { + usnic_err("Failed to allocate IOMMU domain"); kfree(pd); - return ERR_PTR(domain ? PTR_ERR(domain) : -ENOMEM); + return ERR_PTR(-ENOMEM); } iommu_set_fault_handler(pd->domain, usnic_uiom_dma_fault, NULL); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 9e1b203..da149c2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1128,7 +1128,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) { struct ipoib_neigh_table *ntbl = &priv->ntbl; struct ipoib_neigh_hash *htbl; - struct ipoib_neigh **buckets; + struct ipoib_neigh __rcu **buckets; u32 size; clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); @@ -1146,7 +1146,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) htbl->size = size; htbl->mask = (size - 1); htbl->buckets = buckets; - ntbl->htbl = htbl; + RCU_INIT_POINTER(ntbl->htbl, htbl); htbl->ntbl = ntbl; atomic_set(&ntbl->entries, 0); @@ -1685,9 +1685,7 @@ static void ipoib_add_one(struct ib_device *device) struct net_device *dev; struct ipoib_dev_priv *priv; int s, e, p; - - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; + int count = 0; dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); if (!dev_list) @@ -1704,15 +1702,21 @@ static void ipoib_add_one(struct ib_device *device) } for (p = s; p <= e; ++p) { - if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND) + if (!rdma_protocol_ib(device, p)) continue; dev = ipoib_add_port("ib%d", device, p); if (!IS_ERR(dev)) { priv = netdev_priv(dev); list_add_tail(&priv->list, dev_list); + count++; } } + if (!count) { + kfree(dev_list); + return; + } + ib_set_client_data(device, &ipoib_client, dev_list); } @@ -1721,9 +1725,6 @@ static void ipoib_remove_one(struct ib_device *device) struct ipoib_dev_priv *priv, *tmp; struct list_head *dev_list; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; - dev_list = ib_get_client_data(device, &ipoib_client); if (!dev_list) return; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index e5cc430..9e6ee82 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -141,6 +141,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_UD }; + struct ib_cq_init_attr cq_attr = {}; int ret, size; int i; @@ -178,14 +179,17 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) } else goto out_free_wq; - priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); + cq_attr.cqe = size; + priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, + dev, &cq_attr); if (IS_ERR(priv->recv_cq)) { printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name); goto out_cm_dev_cleanup; } + cq_attr.cqe = ipoib_sendq_size; priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL, - dev, ipoib_sendq_size, 0); + dev, &cq_attr); if (IS_ERR(priv->send_cq)) { printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name); goto out_free_recv_cq; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index cc2dd35..5c9f565 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -51,19 +51,22 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context); static void iser_cq_event_callback(struct ib_event *cause, void *context) { - iser_err("got cq event %d \n", cause->event); + iser_err("cq event %s (%d)\n", + ib_event_msg(cause->event), cause->event); } static void iser_qp_event_callback(struct ib_event *cause, void *context) { - iser_err("got qp event %d\n",cause->event); + iser_err("qp event %s (%d)\n", + ib_event_msg(cause->event), cause->event); } static void iser_event_handler(struct ib_event_handler *handler, struct ib_event *event) { - iser_err("async event %d on device %s port %d\n", event->event, - event->device->name, event->element.port_num); + iser_err("async event %s (%d) on device %s port %d\n", + ib_event_msg(event->event), event->event, + event->device->name, event->element.port_num); } /** @@ -123,14 +126,17 @@ static int iser_create_device_ib_res(struct iser_device *device) goto pd_err; for (i = 0; i < device->comps_used; i++) { + struct ib_cq_init_attr cq_attr = {}; struct iser_comp *comp = &device->comps[i]; comp->device = device; + cq_attr.cqe = max_cqe; + cq_attr.comp_vector = i; comp->cq = ib_create_cq(device->ib_device, iser_cq_callback, iser_cq_event_callback, (void *)comp, - max_cqe, i); + &cq_attr); if (IS_ERR(comp->cq)) { comp->cq = NULL; goto cq_err; @@ -873,8 +879,9 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve int ret = 0; iser_conn = (struct iser_conn *)cma_id->context; - iser_info("event %d status %d conn %p id %p\n", - event->event, event->status, cma_id->context, cma_id); + iser_info("%s (%d): status %d conn %p id %p\n", + rdma_event_msg(event->event), event->event, + event->status, cma_id->context, cma_id); mutex_lock(&iser_conn->state_mutex); switch (event->event) { @@ -913,7 +920,8 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve } break; default: - iser_err("Unexpected RDMA CM event (%d)\n", event->event); + iser_err("Unexpected RDMA CM event: %s (%d)\n", + rdma_event_msg(event->event), event->event); break; } mutex_unlock(&iser_conn->state_mutex); @@ -1173,10 +1181,13 @@ static void iser_handle_wc(struct ib_wc *wc) } } else { if (wc->status != IB_WC_WR_FLUSH_ERR) - iser_err("wr id %llx status %d vend_err %x\n", - wc->wr_id, wc->status, wc->vendor_err); + iser_err("%s (%d): wr id %llx vend_err %x\n", + ib_wc_status_msg(wc->status), wc->status, + wc->wr_id, wc->vendor_err); else - iser_dbg("flush error: wr id %llx\n", wc->wr_id); + iser_dbg("%s (%d): wr id %llx\n", + ib_wc_status_msg(wc->status), wc->status, + wc->wr_id); if (wc->wr_id == ISER_BEACON_WRID) /* all flush errors were consumed */ diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 575a072..f3b7a34 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -80,7 +80,9 @@ isert_qp_event_callback(struct ib_event *e, void *context) { struct isert_conn *isert_conn = context; - isert_err("conn %p event: %d\n", isert_conn, e->event); + isert_err("%s (%d): conn %p\n", + ib_event_msg(e->event), e->event, isert_conn); + switch (e->event) { case IB_EVENT_COMM_EST: rdma_notify(isert_conn->cm_id, IB_EVENT_COMM_EST); @@ -318,15 +320,18 @@ isert_alloc_comps(struct isert_device *device, max_cqe = min(ISER_MAX_CQ_LEN, attr->max_cqe); for (i = 0; i < device->comps_used; i++) { + struct ib_cq_init_attr cq_attr = {}; struct isert_comp *comp = &device->comps[i]; comp->device = device; INIT_WORK(&comp->work, isert_cq_work); + cq_attr.cqe = max_cqe; + cq_attr.comp_vector = i; comp->cq = ib_create_cq(device->ib_device, isert_cq_callback, isert_cq_event_callback, (void *)comp, - max_cqe, i); + &cq_attr); if (IS_ERR(comp->cq)) { isert_err("Unable to allocate cq\n"); ret = PTR_ERR(comp->cq); @@ -900,7 +905,8 @@ static int isert_np_cma_handler(struct isert_np *isert_np, enum rdma_cm_event_type event) { - isert_dbg("isert np %p, handling event %d\n", isert_np, event); + isert_dbg("%s (%d): isert np %p\n", + rdma_event_msg(event), event, isert_np); switch (event) { case RDMA_CM_EVENT_DEVICE_REMOVAL: @@ -974,7 +980,8 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { int ret = 0; - isert_info("event %d status %d id %p np %p\n", event->event, + isert_info("%s (%d): status %d id %p np %p\n", + rdma_event_msg(event->event), event->event, event->status, cma_id, cma_id->context); switch (event->event) { @@ -2108,10 +2115,13 @@ isert_handle_wc(struct ib_wc *wc) } } else { if (wc->status != IB_WC_WR_FLUSH_ERR) - isert_err("wr id %llx status %d vend_err %x\n", - wc->wr_id, wc->status, wc->vendor_err); + isert_err("%s (%d): wr id %llx vend_err %x\n", + ib_wc_status_msg(wc->status), wc->status, + wc->wr_id, wc->vendor_err); else - isert_dbg("flush error: wr id %llx\n", wc->wr_id); + isert_dbg("%s (%d): wr id %llx\n", + ib_wc_status_msg(wc->status), wc->status, + wc->wr_id); if (wc->wr_id != ISER_FASTREG_LI_WRID) isert_cq_comp_err(isert_conn, wc); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 918814c..eada8f7 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -59,9 +59,10 @@ #define DRV_RELDATE "July 1, 2013" MODULE_AUTHOR("Roland Dreier"); -MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator " - "v" DRV_VERSION " (" DRV_RELDATE ")"); +MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); +MODULE_INFO(release_date, DRV_RELDATE); static unsigned int srp_sg_tablesize; static unsigned int cmd_sg_entries; @@ -253,7 +254,8 @@ static void srp_free_iu(struct srp_host *host, struct srp_iu *iu) static void srp_qp_event(struct ib_event *event, void *context) { - pr_debug("QP event %d\n", event->event); + pr_debug("QP event %s (%d)\n", + ib_event_msg(event->event), event->event); } static int srp_init_qp(struct srp_target_port *target, @@ -465,14 +467,13 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) */ static void srp_destroy_qp(struct srp_rdma_ch *ch) { - struct srp_target_port *target = ch->target; static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID }; struct ib_recv_wr *bad_wr; int ret; /* Destroying a QP and reusing ch->done is only safe if not connected */ - WARN_ON_ONCE(target->connected); + WARN_ON_ONCE(ch->connected); ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE); WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret); @@ -499,6 +500,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) struct ib_fmr_pool *fmr_pool = NULL; struct srp_fr_pool *fr_pool = NULL; const int m = 1 + dev->use_fast_reg; + struct ib_cq_init_attr cq_attr = {}; int ret; init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); @@ -506,15 +508,19 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) return -ENOMEM; /* + 1 for SRP_LAST_WR_ID */ + cq_attr.cqe = target->queue_size + 1; + cq_attr.comp_vector = ch->comp_vector; recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch, - target->queue_size + 1, ch->comp_vector); + &cq_attr); if (IS_ERR(recv_cq)) { ret = PTR_ERR(recv_cq); goto err; } + cq_attr.cqe = m * target->queue_size; + cq_attr.comp_vector = ch->comp_vector; send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch, - m * target->queue_size, ch->comp_vector); + &cq_attr); if (IS_ERR(send_cq)) { ret = PTR_ERR(send_cq); goto err_recv_cq; @@ -781,7 +787,7 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich) shost_printk(KERN_DEBUG, target->scsi_host, PFX "Topspin/Cisco initiator port ID workaround " "activated for target GUID %016llx\n", - (unsigned long long) be64_to_cpu(target->ioc_guid)); + be64_to_cpu(target->ioc_guid)); memset(req->priv.initiator_port_id, 0, 8); memcpy(req->priv.initiator_port_id + 8, &target->srp_host->srp_dev->dev->node_guid, 8); @@ -811,35 +817,19 @@ static bool srp_queue_remove_work(struct srp_target_port *target) return changed; } -static bool srp_change_conn_state(struct srp_target_port *target, - bool connected) -{ - bool changed = false; - - spin_lock_irq(&target->lock); - if (target->connected != connected) { - target->connected = connected; - changed = true; - } - spin_unlock_irq(&target->lock); - - return changed; -} - static void srp_disconnect_target(struct srp_target_port *target) { struct srp_rdma_ch *ch; int i; - if (srp_change_conn_state(target, false)) { - /* XXX should send SRP_I_LOGOUT request */ + /* XXX should send SRP_I_LOGOUT request */ - for (i = 0; i < target->ch_count; i++) { - ch = &target->ch[i]; - if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) { - shost_printk(KERN_DEBUG, target->scsi_host, - PFX "Sending CM DREQ failed\n"); - } + for (i = 0; i < target->ch_count; i++) { + ch = &target->ch[i]; + ch->connected = false; + if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) { + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Sending CM DREQ failed\n"); } } } @@ -852,7 +842,7 @@ static void srp_free_req_data(struct srp_target_port *target, struct srp_request *req; int i; - if (!ch->target || !ch->req_ring) + if (!ch->req_ring) return; for (i = 0; i < target->req_ring_size; ++i) { @@ -986,14 +976,26 @@ static void srp_rport_delete(struct srp_rport *rport) srp_queue_remove_work(target); } +/** + * srp_connected_ch() - number of connected channels + * @target: SRP target port. + */ +static int srp_connected_ch(struct srp_target_port *target) +{ + int i, c = 0; + + for (i = 0; i < target->ch_count; i++) + c += target->ch[i].connected; + + return c; +} + static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) { struct srp_target_port *target = ch->target; int ret; - WARN_ON_ONCE(!multich && target->connected); - - target->qp_in_error = false; + WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0); ret = srp_lookup_path(ch); if (ret) @@ -1016,7 +1018,7 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) */ switch (ch->status) { case 0: - srp_change_conn_state(target, true); + ch->connected = true; return 0; case SRP_PORT_REDIRECT: @@ -1214,14 +1216,10 @@ static int srp_rport_reconnect(struct srp_rport *rport) */ for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - if (!ch->target) - break; ret += srp_new_cm_id(ch); } for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - if (!ch->target) - break; for (j = 0; j < target->req_ring_size; ++j) { struct srp_request *req = &ch->req_ring[j]; @@ -1230,8 +1228,6 @@ static int srp_rport_reconnect(struct srp_rport *rport) } for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - if (!ch->target) - break; /* * Whether or not creating a new CM ID succeeded, create a new * QP. This guarantees that all completion callback function @@ -1243,13 +1239,13 @@ static int srp_rport_reconnect(struct srp_rport *rport) for (j = 0; j < target->queue_size; ++j) list_add(&ch->tx_ring[j]->list, &ch->free_tx); } + + target->qp_in_error = false; + for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - if (ret || !ch->target) { - if (i > 1) - ret = 0; + if (ret) break; - } ret = srp_connect_ch(ch, multich); multich = true; } @@ -1842,7 +1838,7 @@ static void srp_process_aer_req(struct srp_rdma_ch *ch, s32 delta = be32_to_cpu(req->req_lim_delta); shost_printk(KERN_ERR, target->scsi_host, PFX - "ignoring AER for LUN %llu\n", be64_to_cpu(req->lun)); + "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun)); if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) shost_printk(KERN_ERR, target->scsi_host, PFX @@ -1929,20 +1925,21 @@ static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, return; } - if (target->connected && !target->qp_in_error) { + if (ch->connected && !target->qp_in_error) { if (wr_id & LOCAL_INV_WR_ID_MASK) { shost_printk(KERN_ERR, target->scsi_host, PFX - "LOCAL_INV failed with status %d\n", - wc_status); + "LOCAL_INV failed with status %s (%d)\n", + ib_wc_status_msg(wc_status), wc_status); } else if (wr_id & FAST_REG_WR_ID_MASK) { shost_printk(KERN_ERR, target->scsi_host, PFX - "FAST_REG_MR failed status %d\n", - wc_status); + "FAST_REG_MR failed status %s (%d)\n", + ib_wc_status_msg(wc_status), wc_status); } else { shost_printk(KERN_ERR, target->scsi_host, - PFX "failed %s status %d for iu %p\n", + PFX "failed %s status %s (%d) for iu %p\n", send_err ? "send" : "receive", - wc_status, (void *)(uintptr_t)wr_id); + ib_wc_status_msg(wc_status), wc_status, + (void *)(uintptr_t)wr_id); } queue_work(system_long_wq, &target->tl_err_work); } @@ -2034,7 +2031,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) memset(cmd, 0, sizeof *cmd); cmd->opcode = SRP_CMD; - cmd->lun = cpu_to_be64((u64) scmnd->device->lun << 48); + int_to_scsilun(scmnd->device->lun, &cmd->lun); cmd->tag = tag; memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); @@ -2367,7 +2364,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) case IB_CM_DREQ_RECEIVED: shost_printk(KERN_WARNING, target->scsi_host, PFX "DREQ received - connection closed\n"); - srp_change_conn_state(target, false); + ch->connected = false; if (ib_send_cm_drep(cm_id, NULL, 0)) shost_printk(KERN_ERR, target->scsi_host, PFX "Sending CM DREP failed\n"); @@ -2414,8 +2411,8 @@ srp_change_queue_depth(struct scsi_device *sdev, int qdepth) return scsi_change_queue_depth(sdev, qdepth); } -static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, - unsigned int lun, u8 func) +static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, + u8 func) { struct srp_target_port *target = ch->target; struct srp_rport *rport = target->rport; @@ -2423,7 +2420,7 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, struct srp_iu *iu; struct srp_tsk_mgmt *tsk_mgmt; - if (!target->connected || target->qp_in_error) + if (!ch->connected || target->qp_in_error) return -1; init_completion(&ch->tsk_mgmt_done); @@ -2449,7 +2446,7 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, memset(tsk_mgmt, 0, sizeof *tsk_mgmt); tsk_mgmt->opcode = SRP_TSK_MGMT; - tsk_mgmt->lun = cpu_to_be64((u64) lun << 48); + int_to_scsilun(lun, &tsk_mgmt->lun); tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT; tsk_mgmt->tsk_mgmt_func = func; tsk_mgmt->task_tag = req_tag; @@ -2563,8 +2560,7 @@ static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", - (unsigned long long) be64_to_cpu(target->id_ext)); + return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); } static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, @@ -2572,8 +2568,7 @@ static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", - (unsigned long long) be64_to_cpu(target->ioc_guid)); + return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); } static ssize_t show_service_id(struct device *dev, @@ -2581,8 +2576,7 @@ static ssize_t show_service_id(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", - (unsigned long long) be64_to_cpu(target->service_id)); + return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id)); } static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, @@ -2773,7 +2767,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) target->state = SRP_TARGET_SCANNING; sprintf(target->target_name, "SRP.T10:%016llX", - (unsigned long long) be64_to_cpu(target->id_ext)); + be64_to_cpu(target->id_ext)); if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device)) return -ENODEV; @@ -2797,7 +2791,8 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) scsi_scan_target(&target->scsi_host->shost_gendev, 0, target->scsi_id, SCAN_WILD_CARD, 0); - if (!target->connected || target->qp_in_error) { + if (srp_connected_ch(target) < target->ch_count || + target->qp_in_error) { shost_printk(KERN_INFO, target->scsi_host, PFX "SCSI scan failed - removing SCSI host\n"); srp_queue_remove_work(target); @@ -3146,7 +3141,7 @@ static ssize_t srp_create_target(struct device *dev, target_host->transportt = ib_srp_transport_template; target_host->max_channel = 0; target_host->max_id = 1; - target_host->max_lun = SRP_MAX_LUN; + target_host->max_lun = -1LL; target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; target = host_to_target(target_host); @@ -3172,11 +3167,11 @@ static ssize_t srp_create_target(struct device *dev, ret = srp_parse_options(buf, target); if (ret) - goto err; + goto out; ret = scsi_init_shared_tag_map(target_host, target_host->can_queue); if (ret) - goto err; + goto out; target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; @@ -3187,7 +3182,7 @@ static ssize_t srp_create_target(struct device *dev, be64_to_cpu(target->ioc_guid), be64_to_cpu(target->initiator_ext)); ret = -EEXIST; - goto err; + goto out; } if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && @@ -3208,7 +3203,7 @@ static ssize_t srp_create_target(struct device *dev, spin_lock_init(&target->lock); ret = ib_query_gid(ibdev, host->port, 0, &target->sgid); if (ret) - goto err; + goto out; ret = -ENOMEM; target->ch_count = max_t(unsigned, num_online_nodes(), @@ -3219,7 +3214,7 @@ static ssize_t srp_create_target(struct device *dev, target->ch = kcalloc(target->ch_count, sizeof(*target->ch), GFP_KERNEL); if (!target->ch) - goto err; + goto out; node_idx = 0; for_each_online_node(node) { @@ -3315,9 +3310,6 @@ err_disconnect: } kfree(target->ch); - -err: - scsi_host_put(target_host); goto out; } diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index a611556..17ee3f8 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -54,7 +54,6 @@ enum { SRP_DLID_REDIRECT = 2, SRP_STALE_CONN = 3, - SRP_MAX_LUN = 512, SRP_DEF_SG_TABLESIZE = 12, SRP_DEFAULT_QUEUE_SIZE = 1 << 6, @@ -170,6 +169,7 @@ struct srp_rdma_ch { struct completion tsk_mgmt_done; u8 tsk_mgmt_status; + bool connected; }; /** @@ -214,7 +214,6 @@ struct srp_target_port { __be16 pkey; u32 rq_tmo_jiffies; - bool connected; int zero_req_lim; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 9b84b4c..0b2857b 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -476,7 +476,8 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp, mad_wc->wc->pkey_index, 0, IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA, - GFP_KERNEL); + GFP_KERNEL, + IB_MGMT_BASE_VERSION); if (IS_ERR(rsp)) goto err_rsp; @@ -2080,6 +2081,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) struct srpt_port *sport = ch->sport; struct srpt_device *sdev = sport->sdev; u32 srp_sq_size = sport->port_attrib.srp_sq_size; + struct ib_cq_init_attr cq_attr = {}; int ret; WARN_ON(ch->rq_size < 1); @@ -2090,8 +2092,9 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) goto out; retry: + cq_attr.cqe = ch->rq_size + srp_sq_size; ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, - ch->rq_size + srp_sq_size, 0); + &cq_attr); if (IS_ERR(ch->cq)) { ret = PTR_ERR(ch->cq); pr_err("failed to create CQ cqe= %d ret= %d\n", diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 524d110..e052f05 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1185,6 +1185,7 @@ enum t4_bar2_qtype { T4_BAR2_QTYPE_EGRESS, T4_BAR2_QTYPE_INGRESS }; int cxgb4_t4_bar2_sge_qregs(struct adapter *adapter, unsigned int qid, enum t4_bar2_qtype qtype, + int user, u64 *pbar2_qoffset, unsigned int *pbar2_qid); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 803d91b..a935559 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2145,6 +2145,7 @@ EXPORT_SYMBOL(cxgb4_read_sge_timestamp); int cxgb4_bar2_sge_qregs(struct net_device *dev, unsigned int qid, enum cxgb4_bar2_qtype qtype, + int user, u64 *pbar2_qoffset, unsigned int *pbar2_qid) { @@ -2153,6 +2154,7 @@ int cxgb4_bar2_sge_qregs(struct net_device *dev, (qtype == CXGB4_BAR2_QTYPE_EGRESS ? T4_BAR2_QTYPE_EGRESS : T4_BAR2_QTYPE_INGRESS), + user, pbar2_qoffset, pbar2_qid); } @@ -2351,7 +2353,7 @@ static void process_db_drop(struct work_struct *work) int ret; ret = cxgb4_t4_bar2_sge_qregs(adap, qid, T4_BAR2_QTYPE_EGRESS, - &bar2_qoffset, &bar2_qid); + 0, &bar2_qoffset, &bar2_qid); if (ret) dev_err(adap->pdev_dev, "doorbell drop recovery: " "qid=%d, pidx_inc=%d\n", qid, pidx_inc); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 78ab4d4..e33934a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -306,6 +306,7 @@ enum cxgb4_bar2_qtype { CXGB4_BAR2_QTYPE_EGRESS, CXGB4_BAR2_QTYPE_INGRESS }; int cxgb4_bar2_sge_qregs(struct net_device *dev, unsigned int qid, enum cxgb4_bar2_qtype qtype, + int user, u64 *pbar2_qoffset, unsigned int *pbar2_qid); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 0d2edda..1b99aec 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2429,8 +2429,8 @@ static void __iomem *bar2_address(struct adapter *adapter, u64 bar2_qoffset; int ret; - ret = cxgb4_t4_bar2_sge_qregs(adapter, qid, qtype, - &bar2_qoffset, pbar2_qid); + ret = cxgb4_t4_bar2_sge_qregs(adapter, qid, qtype, 0, + &bar2_qoffset, pbar2_qid); if (ret) return NULL; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index e8578a7..61d8b3e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -5102,6 +5102,7 @@ int t4_prep_adapter(struct adapter *adapter) * @adapter: the adapter * @qid: the Queue ID * @qtype: the Ingress or Egress type for @qid + * @user: true if this request is for a user mode queue * @pbar2_qoffset: BAR2 Queue Offset * @pbar2_qid: BAR2 Queue ID or 0 for Queue ID inferred SGE Queues * @@ -5125,6 +5126,7 @@ int t4_prep_adapter(struct adapter *adapter) int cxgb4_t4_bar2_sge_qregs(struct adapter *adapter, unsigned int qid, enum t4_bar2_qtype qtype, + int user, u64 *pbar2_qoffset, unsigned int *pbar2_qid) { @@ -5132,9 +5134,8 @@ int cxgb4_t4_bar2_sge_qregs(struct adapter *adapter, u64 bar2_page_offset, bar2_qoffset; unsigned int bar2_qid, bar2_qid_offset, bar2_qinferred; - /* T4 doesn't support BAR2 SGE Queue registers. - */ - if (is_t4(adapter->params.chip)) + /* T4 doesn't support BAR2 SGE Queue registers for kernel mode queues */ + if (!user && is_t4(adapter->params.chip)) return -EINVAL; /* Get our SGE Page Size parameters. diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index ced5eca..70de39c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1674,6 +1674,25 @@ static int map_internal_clock(struct mlx4_dev *dev) return 0; } +int mlx4_get_internal_clock_params(struct mlx4_dev *dev, + struct mlx4_clock_params *params) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (mlx4_is_slave(dev)) + return -ENOTSUPP; + + if (!params) + return -EINVAL; + + params->bar = priv->fw.clock_bar; + params->offset = priv->fw.clock_offset; + params->size = MLX4_CLOCK_SIZE; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params); + static void unmap_internal_clock(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c index ee1b0b9..1368dac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c @@ -36,7 +36,7 @@ #include <linux/mlx5/cmd.h> #include "mlx5_core.h" -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb, +int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port) { struct mlx5_mad_ifc_mbox_in *in = NULL; diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index acea5d6..6a41c36 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -1053,7 +1053,7 @@ static int ibmvscsi_queuecommand_lck(struct scsi_cmnd *cmnd, memset(srp_cmd, 0x00, SRP_MAX_IU_LEN); srp_cmd->opcode = SRP_CMD; memcpy(srp_cmd->cdb, cmnd->cmnd, sizeof(srp_cmd->cdb)); - srp_cmd->lun = cpu_to_be64(((u64)lun) << 48); + int_to_scsilun(lun, &srp_cmd->lun); if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) { if (!firmware_has_feature(FW_FEATURE_CMO)) @@ -1529,7 +1529,7 @@ static int ibmvscsi_eh_abort_handler(struct scsi_cmnd *cmd) /* Set up an abort SRP command */ memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt)); tsk_mgmt->opcode = SRP_TSK_MGMT; - tsk_mgmt->lun = cpu_to_be64(((u64) lun) << 48); + int_to_scsilun(lun, &tsk_mgmt->lun); tsk_mgmt->tsk_mgmt_func = SRP_TSK_ABORT_TASK; tsk_mgmt->task_tag = (u64) found_evt; @@ -1652,7 +1652,7 @@ static int ibmvscsi_eh_device_reset_handler(struct scsi_cmnd *cmd) /* Set up a lun reset SRP command */ memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt)); tsk_mgmt->opcode = SRP_TSK_MGMT; - tsk_mgmt->lun = cpu_to_be64(((u64) lun) << 48); + int_to_scsilun(lun, &tsk_mgmt->lun); tsk_mgmt->tsk_mgmt_func = SRP_TSK_LUN_RESET; evt->sync_srp = &srp_rsp; diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index ae45bd9..a85292b 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -61,6 +61,11 @@ static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r) return dev_to_shost(r->dev.parent); } +static inline struct srp_rport *shost_to_rport(struct Scsi_Host *shost) +{ + return transport_class_to_srp_rport(&shost->shost_gendev); +} + /** * srp_tmo_valid() - check timeout combination validity * @reconnect_delay: Reconnect delay in seconds. @@ -396,6 +401,36 @@ static void srp_reconnect_work(struct work_struct *work) } } +/** + * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn() + * @shost: SCSI host for which to count the number of scsi_request_fn() callers. + * + * To do: add support for scsi-mq in this function. + */ +static int scsi_request_fn_active(struct Scsi_Host *shost) +{ + struct scsi_device *sdev; + struct request_queue *q; + int request_fn_active = 0; + + shost_for_each_device(sdev, shost) { + q = sdev->request_queue; + + spin_lock_irq(q->queue_lock); + request_fn_active += q->request_fn_active; + spin_unlock_irq(q->queue_lock); + } + + return request_fn_active; +} + +/* Wait until ongoing shost->hostt->queuecommand() calls have finished. */ +static void srp_wait_for_queuecommand(struct Scsi_Host *shost) +{ + while (scsi_request_fn_active(shost)) + msleep(20); +} + static void __rport_fail_io_fast(struct srp_rport *rport) { struct Scsi_Host *shost = rport_to_shost(rport); @@ -409,8 +444,10 @@ static void __rport_fail_io_fast(struct srp_rport *rport) /* Involve the LLD if possible to terminate all I/O on the rport. */ i = to_srp_internal(shost->transportt); - if (i->f->terminate_rport_io) + if (i->f->terminate_rport_io) { + srp_wait_for_queuecommand(shost); i->f->terminate_rport_io(rport); + } } /** @@ -504,27 +541,6 @@ void srp_start_tl_fail_timers(struct srp_rport *rport) EXPORT_SYMBOL(srp_start_tl_fail_timers); /** - * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn() - * @shost: SCSI host for which to count the number of scsi_request_fn() callers. - */ -static int scsi_request_fn_active(struct Scsi_Host *shost) -{ - struct scsi_device *sdev; - struct request_queue *q; - int request_fn_active = 0; - - shost_for_each_device(sdev, shost) { - q = sdev->request_queue; - - spin_lock_irq(q->queue_lock); - request_fn_active += q->request_fn_active; - spin_unlock_irq(q->queue_lock); - } - - return request_fn_active; -} - -/** * srp_reconnect_rport() - reconnect to an SRP target port * @rport: SRP target port. * @@ -559,8 +575,7 @@ int srp_reconnect_rport(struct srp_rport *rport) if (res) goto out; scsi_target_block(&shost->shost_gendev); - while (scsi_request_fn_active(shost)) - msleep(20); + srp_wait_for_queuecommand(shost); res = rport->state != SRP_RPORT_LOST ? i->f->reconnect(rport) : -ENODEV; pr_debug("%s (state %d): transport.reconnect() returned %d\n", dev_name(&shost->shost_gendev), rport->state, res); @@ -618,9 +633,11 @@ static enum blk_eh_timer_return srp_timed_out(struct scsi_cmnd *scmd) struct scsi_device *sdev = scmd->device; struct Scsi_Host *shost = sdev->host; struct srp_internal *i = to_srp_internal(shost->transportt); + struct srp_rport *rport = shost_to_rport(shost); pr_debug("timeout for sdev %s\n", dev_name(&sdev->sdev_gendev)); - return i->f->reset_timer_if_blocked && scsi_device_blocked(sdev) ? + return rport->fast_io_fail_tmo < 0 && rport->dev_loss_tmo < 0 && + i->f->reset_timer_if_blocked && scsi_device_blocked(sdev) ? BLK_EH_RESET_TIMER : BLK_EH_NOT_HANDLED; } diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c index 3bad441..c41b557 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c @@ -647,6 +647,7 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, kib_dev_t *dev; struct ib_qp_init_attr *init_qp_attr; struct kib_sched_info *sched; + struct ib_cq_init_attr cq_attr = {}; kib_conn_t *conn; struct ib_cq *cq; unsigned long flags; @@ -742,10 +743,11 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, kiblnd_map_rx_descs(conn); + cq_attr.cqe = IBLND_CQ_ENTRIES(version); + cq_attr.comp_vector = kiblnd_get_completion_vector(conn, cpt); cq = ib_create_cq(cmid->device, kiblnd_cq_completion, kiblnd_cq_event, conn, - IBLND_CQ_ENTRIES(version), - kiblnd_get_completion_vector(conn, cpt)); + &cq_attr); if (IS_ERR(cq)) { CERROR("Can't create CQ: %ld, cqe: %d\n", PTR_ERR(cq), IBLND_CQ_ENTRIES(version)); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 83e80ab..f94984f 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -829,6 +829,12 @@ struct mlx4_dev { struct mlx4_vf_dev *dev_vfs; }; +struct mlx4_clock_params { + u64 offset; + u8 bar; + u8 size; +}; + struct mlx4_eqe { u8 reserved1; u8 type; @@ -1485,4 +1491,7 @@ int mlx4_ACCESS_PTYS_REG(struct mlx4_dev *dev, enum mlx4_access_reg_method method, struct mlx4_ptys_reg *ptys_reg); +int mlx4_get_internal_clock_params(struct mlx4_dev *dev, + struct mlx4_clock_params *params); + #endif /* MLX4_DEVICE_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9a90e75..9ec7c93 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -696,7 +696,7 @@ int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, u32 *mkey); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb, +int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port); void mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index ac54c27..fde33ac 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -111,8 +111,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, int rdma_addr_size(struct sockaddr *addr); int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id); -int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *smac, - u16 *vlan_id); +int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, + u8 *smac, u16 *vlan_id); static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) { @@ -160,7 +160,7 @@ static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid) } /* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */ -static inline void rdma_gid2ip(struct sockaddr *out, union ib_gid *gid) +static inline void rdma_gid2ip(struct sockaddr *out, const union ib_gid *gid) { if (ipv6_addr_v4mapped((struct in6_addr *)gid)) { struct sockaddr_in *out_in = (struct sockaddr_in *)out; diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index ad9a3c2..bd92130 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -64,10 +64,10 @@ int ib_get_cached_gid(struct ib_device *device, * ib_find_cached_gid() searches for the specified GID value in * the local software cache. */ -int ib_find_cached_gid(struct ib_device *device, - union ib_gid *gid, - u8 *port_num, - u16 *index); +int ib_find_cached_gid(struct ib_device *device, + const union ib_gid *gid, + u8 *port_num, + u16 *index); /** * ib_get_cached_pkey - Returns a cached PKey table entry diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 9bb99e9..c8422d5 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -42,8 +42,11 @@ #include <rdma/ib_verbs.h> #include <uapi/rdma/ib_user_mad.h> -/* Management base version */ +/* Management base versions */ #define IB_MGMT_BASE_VERSION 1 +#define OPA_MGMT_BASE_VERSION 0x80 + +#define OPA_SMP_CLASS_VERSION 0x80 /* Management classes */ #define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01 @@ -135,6 +138,10 @@ enum { IB_MGMT_SA_DATA = 200, IB_MGMT_DEVICE_HDR = 64, IB_MGMT_DEVICE_DATA = 192, + IB_MGMT_MAD_SIZE = IB_MGMT_MAD_HDR + IB_MGMT_MAD_DATA, + OPA_MGMT_MAD_DATA = 2024, + OPA_MGMT_RMPP_DATA = 2012, + OPA_MGMT_MAD_SIZE = IB_MGMT_MAD_HDR + OPA_MGMT_MAD_DATA, }; struct ib_mad_hdr { @@ -181,12 +188,23 @@ struct ib_mad { u8 data[IB_MGMT_MAD_DATA]; }; +struct opa_mad { + struct ib_mad_hdr mad_hdr; + u8 data[OPA_MGMT_MAD_DATA]; +}; + struct ib_rmpp_mad { struct ib_mad_hdr mad_hdr; struct ib_rmpp_hdr rmpp_hdr; u8 data[IB_MGMT_RMPP_DATA]; }; +struct opa_rmpp_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + u8 data[OPA_MGMT_RMPP_DATA]; +}; + struct ib_sa_mad { struct ib_mad_hdr mad_hdr; struct ib_rmpp_hdr rmpp_hdr; @@ -235,7 +253,10 @@ struct ib_class_port_info { * includes the common MAD, RMPP, and class specific headers. * @data_len: Indicates the total size of user-transferred data. * @seg_count: The number of RMPP segments allocated for this send. - * @seg_size: Size of each RMPP segment. + * @seg_size: Size of the data in each RMPP segment. This does not include + * class specific headers. + * @seg_rmpp_size: Size of each RMPP segment including the class specific + * headers. * @timeout_ms: Time to wait for a response. * @retries: Number of times to retry a request for a response. For MADs * using RMPP, this applies per window. On completion, returns the number @@ -255,6 +276,7 @@ struct ib_mad_send_buf { int data_len; int seg_count; int seg_size; + int seg_rmpp_size; int timeout_ms; int retries; }; @@ -263,7 +285,7 @@ struct ib_mad_send_buf { * ib_response_mad - Returns if the specified MAD has been generated in * response to a sent request or trap. */ -int ib_response_mad(struct ib_mad *mad); +int ib_response_mad(const struct ib_mad_hdr *hdr); /** * ib_get_rmpp_resptime - Returns the RMPP response time. @@ -401,7 +423,10 @@ struct ib_mad_send_wc { struct ib_mad_recv_buf { struct list_head list; struct ib_grh *grh; - struct ib_mad *mad; + union { + struct ib_mad *mad; + struct opa_mad *opa_mad; + }; }; /** @@ -410,6 +435,7 @@ struct ib_mad_recv_buf { * @recv_buf: Specifies the location of the received data buffer(s). * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers. * @mad_len: The length of the received MAD, without duplicated headers. + * @mad_seg_size: The size of individual MAD segments * * For received response, the wr_id contains a pointer to the ib_mad_send_buf * for the corresponding send request. @@ -419,6 +445,7 @@ struct ib_mad_recv_wc { struct ib_mad_recv_buf recv_buf; struct list_head rmpp_list; int mad_len; + size_t mad_seg_size; }; /** @@ -618,6 +645,7 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, * automatically adjust the allocated buffer size to account for any * additional padding that may be necessary. * @gfp_mask: GFP mask used for the memory allocation. + * @base_version: Base Version of this MAD * * This routine allocates a MAD for sending. The returned MAD send buffer * will reference a data buffer usable for sending a MAD, along @@ -633,7 +661,8 @@ struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, int rmpp_active, int hdr_len, int data_len, - gfp_t gfp_mask); + gfp_t gfp_mask, + u8 base_version); /** * ib_is_mad_class_rmpp - returns whether given management class @@ -675,6 +704,6 @@ void ib_free_send_mad(struct ib_mad_send_buf *send_buf); * @agent: the agent in question * @return: true if agent is performing rmpp, false otherwise. */ -int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent); +int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent); #endif /* IB_MAD_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 65994a1..986fddb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -81,6 +81,13 @@ enum rdma_transport_type { RDMA_TRANSPORT_USNIC_UDP }; +enum rdma_protocol_type { + RDMA_PROTOCOL_IB, + RDMA_PROTOCOL_IBOE, + RDMA_PROTOCOL_IWARP, + RDMA_PROTOCOL_USNIC_UDP +}; + __attribute_const__ enum rdma_transport_type rdma_node_get_transport(enum rdma_node_type node_type); @@ -166,6 +173,16 @@ struct ib_odp_caps { } per_transport_caps; }; +enum ib_cq_creation_flags { + IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0, +}; + +struct ib_cq_init_attr { + unsigned int cqe; + int comp_vector; + u32 flags; +}; + struct ib_device_attr { u64 fw_ver; __be64 sys_image_guid; @@ -210,6 +227,8 @@ struct ib_device_attr { int sig_prot_cap; int sig_guard_cap; struct ib_odp_caps odp_caps; + uint64_t timestamp_mask; + uint64_t hca_core_clock; /* in KHZ */ }; enum ib_mtu { @@ -346,6 +365,42 @@ union rdma_protocol_stats { struct iw_protocol_stats iw; }; +/* Define bits for the various functionality this port needs to be supported by + * the core. + */ +/* Management 0x00000FFF */ +#define RDMA_CORE_CAP_IB_MAD 0x00000001 +#define RDMA_CORE_CAP_IB_SMI 0x00000002 +#define RDMA_CORE_CAP_IB_CM 0x00000004 +#define RDMA_CORE_CAP_IW_CM 0x00000008 +#define RDMA_CORE_CAP_IB_SA 0x00000010 +#define RDMA_CORE_CAP_OPA_MAD 0x00000020 + +/* Address format 0x000FF000 */ +#define RDMA_CORE_CAP_AF_IB 0x00001000 +#define RDMA_CORE_CAP_ETH_AH 0x00002000 + +/* Protocol 0xFFF00000 */ +#define RDMA_CORE_CAP_PROT_IB 0x00100000 +#define RDMA_CORE_CAP_PROT_ROCE 0x00200000 +#define RDMA_CORE_CAP_PROT_IWARP 0x00400000 + +#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \ + | RDMA_CORE_CAP_IB_MAD \ + | RDMA_CORE_CAP_IB_SMI \ + | RDMA_CORE_CAP_IB_CM \ + | RDMA_CORE_CAP_IB_SA \ + | RDMA_CORE_CAP_AF_IB) +#define RDMA_CORE_PORT_IBA_ROCE (RDMA_CORE_CAP_PROT_ROCE \ + | RDMA_CORE_CAP_IB_MAD \ + | RDMA_CORE_CAP_IB_CM \ + | RDMA_CORE_CAP_AF_IB \ + | RDMA_CORE_CAP_ETH_AH) +#define RDMA_CORE_PORT_IWARP (RDMA_CORE_CAP_PROT_IWARP \ + | RDMA_CORE_CAP_IW_CM) +#define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \ + | RDMA_CORE_CAP_OPA_MAD) + struct ib_port_attr { enum ib_port_state state; enum ib_mtu max_mtu; @@ -412,6 +467,8 @@ enum ib_event_type { IB_EVENT_GID_CHANGE, }; +__attribute_const__ const char *ib_event_msg(enum ib_event_type event); + struct ib_event { struct ib_device *device; union { @@ -663,6 +720,8 @@ enum ib_wc_status { IB_WC_GENERAL_ERR }; +__attribute_const__ const char *ib_wc_status_msg(enum ib_wc_status status); + enum ib_wc_opcode { IB_WC_SEND, IB_WC_RDMA_WRITE, @@ -1407,7 +1466,7 @@ struct ib_flow { struct ib_uobject *uobject; }; -struct ib_mad; +struct ib_mad_hdr; struct ib_grh; enum ib_process_mad_flags { @@ -1474,6 +1533,13 @@ struct ib_dma_mapping_ops { struct iw_cm_verbs; +struct ib_port_immutable { + int pkey_tbl_len; + int gid_tbl_len; + u32 core_cap_flags; + u32 max_mad_size; +}; + struct ib_device { struct device *dma_device; @@ -1487,8 +1553,10 @@ struct ib_device { struct list_head client_data_list; struct ib_cache cache; - int *pkey_tbl_len; - int *gid_tbl_len; + /** + * port_immutable is indexed by port number + */ + struct ib_port_immutable *port_immutable; int num_comp_vectors; @@ -1497,7 +1565,8 @@ struct ib_device { int (*get_protocol_stats)(struct ib_device *device, union rdma_protocol_stats *stats); int (*query_device)(struct ib_device *device, - struct ib_device_attr *device_attr); + struct ib_device_attr *device_attr, + struct ib_udata *udata); int (*query_port)(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); @@ -1561,8 +1630,8 @@ struct ib_device { int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr); - struct ib_cq * (*create_cq)(struct ib_device *device, int cqe, - int comp_vector, + struct ib_cq * (*create_cq)(struct ib_device *device, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, @@ -1637,10 +1706,13 @@ struct ib_device { int (*process_mad)(struct ib_device *device, int process_mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, - struct ib_mad *out_mad); + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in_mad, + size_t in_mad_size, + struct ib_mad_hdr *out_mad, + size_t *out_mad_size, + u16 *out_mad_pkey_index); struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device, struct ib_ucontext *ucontext, struct ib_udata *udata); @@ -1675,6 +1747,14 @@ struct ib_device { u32 local_dma_lkey; u8 node_type; u8 phys_port_cnt; + + /** + * The following mandatory functions are used only at device + * registration. Keep functions such as these at the end of this + * structure to avoid cache line misses when accessing struct ib_device + * in fast paths. + */ + int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *); }; struct ib_client { @@ -1743,6 +1823,284 @@ int ib_query_port(struct ib_device *device, enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num); +/** + * rdma_start_port - Return the first valid port number for the device + * specified + * + * @device: Device to be checked + * + * Return start port number + */ +static inline u8 rdma_start_port(const struct ib_device *device) +{ + return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; +} + +/** + * rdma_end_port - Return the last valid port number for the device + * specified + * + * @device: Device to be checked + * + * Return last port number + */ +static inline u8 rdma_end_port(const struct ib_device *device) +{ + return (device->node_type == RDMA_NODE_IB_SWITCH) ? + 0 : device->phys_port_cnt; +} + +static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB; +} + +static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE; +} + +static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP; +} + +static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & + (RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_PROT_ROCE); +} + +/** + * rdma_cap_ib_mad - Check if the port of a device supports Infiniband + * Management Datagrams. + * @device: Device to check + * @port_num: Port number to check + * + * Management Datagrams (MAD) are a required part of the InfiniBand + * specification and are supported on all InfiniBand devices. A slightly + * extended version are also supported on OPA interfaces. + * + * Return: true if the port supports sending/receiving of MAD packets. + */ +static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_MAD; +} + +/** + * rdma_cap_opa_mad - Check if the port of device provides support for OPA + * Management Datagrams. + * @device: Device to check + * @port_num: Port number to check + * + * Intel OmniPath devices extend and/or replace the InfiniBand Management + * datagrams with their own versions. These OPA MADs share many but not all of + * the characteristics of InfiniBand MADs. + * + * OPA MADs differ in the following ways: + * + * 1) MADs are variable size up to 2K + * IBTA defined MADs remain fixed at 256 bytes + * 2) OPA SMPs must carry valid PKeys + * 3) OPA SMP packets are a different format + * + * Return: true if the port supports OPA MAD packet formats. + */ +static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num) +{ + return (device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_OPA_MAD) + == RDMA_CORE_CAP_OPA_MAD; +} + +/** + * rdma_cap_ib_smi - Check if the port of a device provides an Infiniband + * Subnet Management Agent (SMA) on the Subnet Management Interface (SMI). + * @device: Device to check + * @port_num: Port number to check + * + * Each InfiniBand node is required to provide a Subnet Management Agent + * that the subnet manager can access. Prior to the fabric being fully + * configured by the subnet manager, the SMA is accessed via a well known + * interface called the Subnet Management Interface (SMI). This interface + * uses directed route packets to communicate with the SM to get around the + * chicken and egg problem of the SM needing to know what's on the fabric + * in order to configure the fabric, and needing to configure the fabric in + * order to send packets to the devices on the fabric. These directed + * route packets do not need the fabric fully configured in order to reach + * their destination. The SMI is the only method allowed to send + * directed route packets on an InfiniBand fabric. + * + * Return: true if the port provides an SMI. + */ +static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SMI; +} + +/** + * rdma_cap_ib_cm - Check if the port of device has the capability Infiniband + * Communication Manager. + * @device: Device to check + * @port_num: Port number to check + * + * The InfiniBand Communication Manager is one of many pre-defined General + * Service Agents (GSA) that are accessed via the General Service + * Interface (GSI). It's role is to facilitate establishment of connections + * between nodes as well as other management related tasks for established + * connections. + * + * Return: true if the port supports an IB CM (this does not guarantee that + * a CM is actually running however). + */ +static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_CM; +} + +/** + * rdma_cap_iw_cm - Check if the port of device has the capability IWARP + * Communication Manager. + * @device: Device to check + * @port_num: Port number to check + * + * Similar to above, but specific to iWARP connections which have a different + * managment protocol than InfiniBand. + * + * Return: true if the port supports an iWARP CM (this does not guarantee that + * a CM is actually running however). + */ +static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IW_CM; +} + +/** + * rdma_cap_ib_sa - Check if the port of device has the capability Infiniband + * Subnet Administration. + * @device: Device to check + * @port_num: Port number to check + * + * An InfiniBand Subnet Administration (SA) service is a pre-defined General + * Service Agent (GSA) provided by the Subnet Manager (SM). On InfiniBand + * fabrics, devices should resolve routes to other hosts by contacting the + * SA to query the proper route. + * + * Return: true if the port should act as a client to the fabric Subnet + * Administration interface. This does not imply that the SA service is + * running locally. + */ +static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SA; +} + +/** + * rdma_cap_ib_mcast - Check if the port of device has the capability Infiniband + * Multicast. + * @device: Device to check + * @port_num: Port number to check + * + * InfiniBand multicast registration is more complex than normal IPv4 or + * IPv6 multicast registration. Each Host Channel Adapter must register + * with the Subnet Manager when it wishes to join a multicast group. It + * should do so only once regardless of how many queue pairs it subscribes + * to this group. And it should leave the group only after all queue pairs + * attached to the group have been detached. + * + * Return: true if the port must undertake the additional adminstrative + * overhead of registering/unregistering with the SM and tracking of the + * total number of queue pairs attached to the multicast group. + */ +static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num) +{ + return rdma_cap_ib_sa(device, port_num); +} + +/** + * rdma_cap_af_ib - Check if the port of device has the capability + * Native Infiniband Address. + * @device: Device to check + * @port_num: Port number to check + * + * InfiniBand addressing uses a port's GUID + Subnet Prefix to make a default + * GID. RoCE uses a different mechanism, but still generates a GID via + * a prescribed mechanism and port specific data. + * + * Return: true if the port uses a GID address to identify devices on the + * network. + */ +static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_AF_IB; +} + +/** + * rdma_cap_eth_ah - Check if the port of device has the capability + * Ethernet Address Handle. + * @device: Device to check + * @port_num: Port number to check + * + * RoCE is InfiniBand over Ethernet, and it uses a well defined technique + * to fabricate GIDs over Ethernet/IP specific addresses native to the + * port. Normally, packet headers are generated by the sending host + * adapter, but when sending connectionless datagrams, we must manually + * inject the proper headers for the fabric we are communicating over. + * + * Return: true if we are running as a RoCE port and must force the + * addition of a Global Route Header built from our Ethernet Address + * Handle into our header list for connectionless packets. + */ +static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num) +{ + return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_ETH_AH; +} + +/** + * rdma_cap_read_multi_sge - Check if the port of device has the capability + * RDMA Read Multiple Scatter-Gather Entries. + * @device: Device to check + * @port_num: Port number to check + * + * iWARP has a restriction that RDMA READ requests may only have a single + * Scatter/Gather Entry (SGE) in the work request. + * + * NOTE: although the linux kernel currently assumes all devices are either + * single SGE RDMA READ devices or identical SGE maximums for RDMA READs and + * WRITEs, according to Tom Talpey, this is not accurate. There are some + * devices out there that support more than a single SGE on RDMA READ + * requests, but do not support the same number of SGEs as they do on + * RDMA WRITE requests. The linux kernel would need rearchitecting to + * support these imbalanced READ/WRITE SGEs allowed devices. So, for now, + * suffice with either the device supports the same READ/WRITE SGEs, or + * it only gets one READ sge. + * + * Return: true for any device that allows more than one SGE in RDMA READ + * requests. + */ +static inline bool rdma_cap_read_multi_sge(struct ib_device *device, + u8 port_num) +{ + return !(device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP); +} + +/** + * rdma_max_mad_size - Return the max MAD size required by this RDMA Port. + * + * @device: Device + * @port_num: Port number + * + * This MAD size includes the MAD headers and MAD payload. No other headers + * are included. + * + * Return the max MAD size required by the Port. Will return 0 if the port + * does not support MADs + */ +static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num) +{ + return device->port_immutable[port_num].max_mad_size; +} + int ib_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid); @@ -1799,8 +2157,9 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); * @ah_attr: Returned attributes that can be used when creating an address * handle for replying to the message. */ -int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, - struct ib_grh *grh, struct ib_ah_attr *ah_attr); +int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, + const struct ib_wc *wc, const struct ib_grh *grh, + struct ib_ah_attr *ah_attr); /** * ib_create_ah_from_wc - Creates an address handle associated with the @@ -1814,8 +2173,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, * The address handle is used to reference a local or global destination * in all UD QP post sends. */ -struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, - struct ib_grh *grh, u8 port_num); +struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, + const struct ib_grh *grh, u8 port_num); /** * ib_modify_ah - Modifies the address vector associated with an address @@ -2011,16 +2370,15 @@ static inline int ib_post_recv(struct ib_qp *qp, * asynchronous event not associated with a completion occurs on the CQ. * @cq_context: Context associated with the CQ returned to the user via * the associated completion and event handlers. - * @cqe: The minimum size of the CQ. - * @comp_vector - Completion vector used to signal completion events. - * Must be >= 0 and < context->num_comp_vectors. + * @cq_attr: The attributes the CQ should be created upon. * * Users can examine the cq structure to determine the actual CQ size. */ struct ib_cq *ib_create_cq(struct ib_device *device, ib_comp_handler comp_handler, void (*event_handler)(struct ib_event *, void *), - void *cq_context, int cqe, int comp_vector); + void *cq_context, + const struct ib_cq_init_attr *cq_attr); /** * ib_resize_cq - Modifies the capacity of the CQ. diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index 1017e0b..036bd27 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -91,6 +91,7 @@ struct iw_cm_id { /* Used by provider to add and remove refs on IW cm_id */ void (*add_ref)(struct iw_cm_id *); void (*rem_ref)(struct iw_cm_id *); + u8 tos; }; struct iw_cm_conn_param { diff --git a/include/rdma/opa_smi.h b/include/rdma/opa_smi.h new file mode 100644 index 0000000..29063e8 --- /dev/null +++ b/include/rdma/opa_smi.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(OPA_SMI_H) +#define OPA_SMI_H + +#include <rdma/ib_mad.h> +#include <rdma/ib_smi.h> + +#define OPA_SMP_LID_DATA_SIZE 2016 +#define OPA_SMP_DR_DATA_SIZE 1872 +#define OPA_SMP_MAX_PATH_HOPS 64 + +#define OPA_SMI_CLASS_VERSION 0x80 + +#define OPA_LID_PERMISSIVE cpu_to_be32(0xFFFFFFFF) + +struct opa_smp { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + __be16 status; + u8 hop_ptr; + u8 hop_cnt; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; + __be64 mkey; + union { + struct { + uint8_t data[OPA_SMP_LID_DATA_SIZE]; + } lid; + struct { + __be32 dr_slid; + __be32 dr_dlid; + u8 initial_path[OPA_SMP_MAX_PATH_HOPS]; + u8 return_path[OPA_SMP_MAX_PATH_HOPS]; + u8 reserved[8]; + u8 data[OPA_SMP_DR_DATA_SIZE]; + } dr; + } route; +} __packed; + + +static inline u8 +opa_get_smp_direction(struct opa_smp *smp) +{ + return ib_get_smp_direction((struct ib_smp *)smp); +} + +static inline u8 *opa_get_smp_data(struct opa_smp *smp) +{ + if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + return smp->route.dr.data; + + return smp->route.lid.data; +} + +static inline size_t opa_get_smp_data_size(struct opa_smp *smp) +{ + if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + return sizeof(smp->route.dr.data); + + return sizeof(smp->route.lid.data); +} + +static inline size_t opa_get_smp_header_size(struct opa_smp *smp) +{ + if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + return sizeof(*smp) - sizeof(smp->route.dr.data); + + return sizeof(*smp) - sizeof(smp->route.lid.data); +} + +#endif /* OPA_SMI_H */ diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 1ed2088..c92522c 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -62,6 +62,8 @@ enum rdma_cm_event_type { RDMA_CM_EVENT_TIMEWAIT_EXIT }; +__attribute_const__ const char *rdma_event_msg(enum rdma_cm_event_type event); + enum rdma_port_space { RDMA_PS_SDP = 0x0001, RDMA_PS_IPOIB = 0x0002, diff --git a/include/scsi/srp.h b/include/scsi/srp.h index 1ae84db..5be834d 100644 --- a/include/scsi/srp.h +++ b/include/scsi/srp.h @@ -42,6 +42,7 @@ */ #include <linux/types.h> +#include <scsi/scsi.h> enum { SRP_LOGIN_REQ = 0x00, @@ -179,7 +180,7 @@ struct srp_tsk_mgmt { u8 reserved1[6]; u64 tag; u8 reserved2[4]; - __be64 lun __attribute__((packed)); + struct scsi_lun lun; u8 reserved3[2]; u8 tsk_mgmt_func; u8 reserved4; @@ -200,7 +201,7 @@ struct srp_cmd { u8 data_in_desc_cnt; u64 tag; u8 reserved2[4]; - __be64 lun __attribute__((packed)); + struct scsi_lun lun; u8 reserved3; u8 task_attr; u8 reserved4; @@ -265,7 +266,7 @@ struct srp_aer_req { __be32 req_lim_delta; u64 tag; u32 reserved2; - __be64 lun; + struct scsi_lun lun; __be32 sense_data_len; u32 reserved3; u8 sense_data[0]; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index b513e66..978841e 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -91,6 +91,7 @@ enum { enum { IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE, + IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ, IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, IB_USER_VERBS_EX_CMD_DESTROY_FLOW, }; @@ -222,6 +223,8 @@ struct ib_uverbs_ex_query_device_resp { __u32 comp_mask; __u32 response_length; struct ib_uverbs_odp_caps odp_caps; + __u64 timestamp_mask; + __u64 hca_core_clock; /* in KHZ */ }; struct ib_uverbs_query_port { @@ -353,11 +356,27 @@ struct ib_uverbs_create_cq { __u64 driver_data[0]; }; +struct ib_uverbs_ex_create_cq { + __u64 user_handle; + __u32 cqe; + __u32 comp_vector; + __s32 comp_channel; + __u32 comp_mask; + __u32 flags; + __u32 reserved; +}; + struct ib_uverbs_create_cq_resp { __u32 cq_handle; __u32 cqe; }; +struct ib_uverbs_ex_create_cq_resp { + struct ib_uverbs_create_cq_resp base; + __u32 comp_mask; + __u32 response_length; +}; + struct ib_uverbs_resize_cq { __u64 response; __u32 cq_handle; diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 3533d2a..37a78d2 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -648,6 +648,7 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; + struct ib_cq_init_attr cq_attr = {}; /* Parse the transport specific mount options */ err = parse_opts(args, &opts); @@ -705,9 +706,10 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) goto error; /* Create the Completion Queue */ + cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1; rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, cq_event_handler, client, - opts.sq_depth + opts.rq_depth + 1, 0); + &cq_attr); if (IS_ERR(rdma->cq)) goto error; ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 1044337..11b623c 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -40,15 +40,6 @@ #include "rds.h" -char *rds_str_array(char **array, size_t elements, size_t index) -{ - if ((index < elements) && array[index]) - return array[index]; - else - return "unknown"; -} -EXPORT_SYMBOL(rds_str_array); - /* this is just used for stats gathering :/ */ static DEFINE_SPINLOCK(rds_sock_lock); static unsigned long rds_sock_count; diff --git a/net/rds/ib.h b/net/rds/ib.h index c36d713..333611d 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -339,7 +339,6 @@ u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest); extern wait_queue_head_t rds_ib_ring_empty_wait; /* ib_send.c */ -char *rds_ib_wc_status_str(enum ib_wc_status status); void rds_ib_xmit_complete(struct rds_connection *conn); int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 8a09ee7..0da2a45 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -39,36 +39,6 @@ #include "rds.h" #include "ib.h" -static char *rds_ib_event_type_strings[] = { -#define RDS_IB_EVENT_STRING(foo) \ - [IB_EVENT_##foo] = __stringify(IB_EVENT_##foo) - RDS_IB_EVENT_STRING(CQ_ERR), - RDS_IB_EVENT_STRING(QP_FATAL), - RDS_IB_EVENT_STRING(QP_REQ_ERR), - RDS_IB_EVENT_STRING(QP_ACCESS_ERR), - RDS_IB_EVENT_STRING(COMM_EST), - RDS_IB_EVENT_STRING(SQ_DRAINED), - RDS_IB_EVENT_STRING(PATH_MIG), - RDS_IB_EVENT_STRING(PATH_MIG_ERR), - RDS_IB_EVENT_STRING(DEVICE_FATAL), - RDS_IB_EVENT_STRING(PORT_ACTIVE), - RDS_IB_EVENT_STRING(PORT_ERR), - RDS_IB_EVENT_STRING(LID_CHANGE), - RDS_IB_EVENT_STRING(PKEY_CHANGE), - RDS_IB_EVENT_STRING(SM_CHANGE), - RDS_IB_EVENT_STRING(SRQ_ERR), - RDS_IB_EVENT_STRING(SRQ_LIMIT_REACHED), - RDS_IB_EVENT_STRING(QP_LAST_WQE_REACHED), - RDS_IB_EVENT_STRING(CLIENT_REREGISTER), -#undef RDS_IB_EVENT_STRING -}; - -static char *rds_ib_event_str(enum ib_event_type type) -{ - return rds_str_array(rds_ib_event_type_strings, - ARRAY_SIZE(rds_ib_event_type_strings), type); -}; - /* * Set the selected protocol version */ @@ -243,7 +213,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, static void rds_ib_cq_event_handler(struct ib_event *event, void *data) { rdsdebug("event %u (%s) data %p\n", - event->event, rds_ib_event_str(event->event), data); + event->event, ib_event_msg(event->event), data); } static void rds_ib_qp_event_handler(struct ib_event *event, void *data) @@ -252,7 +222,7 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data) struct rds_ib_connection *ic = conn->c_transport_data; rdsdebug("conn %p ic %p event %u (%s)\n", conn, ic, event->event, - rds_ib_event_str(event->event)); + ib_event_msg(event->event)); switch (event->event) { case IB_EVENT_COMM_EST: @@ -261,7 +231,7 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data) default: rdsdebug("Fatal QP Event %u (%s) " "- connection %pI4->%pI4, reconnecting\n", - event->event, rds_ib_event_str(event->event), + event->event, ib_event_msg(event->event), &conn->c_laddr, &conn->c_faddr); rds_conn_drop(conn); break; @@ -277,6 +247,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) struct rds_ib_connection *ic = conn->c_transport_data; struct ib_device *dev = ic->i_cm_id->device; struct ib_qp_init_attr attr; + struct ib_cq_init_attr cq_attr = {}; struct rds_ib_device *rds_ibdev; int ret; @@ -300,9 +271,10 @@ static int rds_ib_setup_qp(struct rds_connection *conn) ic->i_pd = rds_ibdev->pd; ic->i_mr = rds_ibdev->mr; + cq_attr.cqe = ic->i_send_ring.w_nr + 1; ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler, rds_ib_cq_event_handler, conn, - ic->i_send_ring.w_nr + 1, 0); + &cq_attr); if (IS_ERR(ic->i_send_cq)) { ret = PTR_ERR(ic->i_send_cq); ic->i_send_cq = NULL; @@ -310,9 +282,10 @@ static int rds_ib_setup_qp(struct rds_connection *conn) goto out; } + cq_attr.cqe = ic->i_recv_ring.w_nr; ic->i_recv_cq = ib_create_cq(dev, rds_ib_recv_cq_comp_handler, rds_ib_cq_event_handler, conn, - ic->i_recv_ring.w_nr, 0); + &cq_attr); if (IS_ERR(ic->i_recv_cq)) { ret = PTR_ERR(ic->i_recv_cq); ic->i_recv_cq = NULL; diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 1b981a4..cac5b45 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -956,7 +956,7 @@ static inline void rds_poll_cq(struct rds_ib_connection *ic, while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) { rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n", (unsigned long long)wc.wr_id, wc.status, - rds_ib_wc_status_str(wc.status), wc.byte_len, + ib_wc_status_msg(wc.status), wc.byte_len, be32_to_cpu(wc.ex.imm_data)); rds_ib_stats_inc(s_ib_rx_cq_event); @@ -978,7 +978,7 @@ static inline void rds_poll_cq(struct rds_ib_connection *ic, "status %u (%s), disconnecting and " "reconnecting\n", &conn->c_faddr, wc.status, - rds_ib_wc_status_str(wc.status)); + ib_wc_status_msg(wc.status)); } /* diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index bd3825d..5d0a704 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -39,40 +39,6 @@ #include "rds.h" #include "ib.h" -static char *rds_ib_wc_status_strings[] = { -#define RDS_IB_WC_STATUS_STR(foo) \ - [IB_WC_##foo] = __stringify(IB_WC_##foo) - RDS_IB_WC_STATUS_STR(SUCCESS), - RDS_IB_WC_STATUS_STR(LOC_LEN_ERR), - RDS_IB_WC_STATUS_STR(LOC_QP_OP_ERR), - RDS_IB_WC_STATUS_STR(LOC_EEC_OP_ERR), - RDS_IB_WC_STATUS_STR(LOC_PROT_ERR), - RDS_IB_WC_STATUS_STR(WR_FLUSH_ERR), - RDS_IB_WC_STATUS_STR(MW_BIND_ERR), - RDS_IB_WC_STATUS_STR(BAD_RESP_ERR), - RDS_IB_WC_STATUS_STR(LOC_ACCESS_ERR), - RDS_IB_WC_STATUS_STR(REM_INV_REQ_ERR), - RDS_IB_WC_STATUS_STR(REM_ACCESS_ERR), - RDS_IB_WC_STATUS_STR(REM_OP_ERR), - RDS_IB_WC_STATUS_STR(RETRY_EXC_ERR), - RDS_IB_WC_STATUS_STR(RNR_RETRY_EXC_ERR), - RDS_IB_WC_STATUS_STR(LOC_RDD_VIOL_ERR), - RDS_IB_WC_STATUS_STR(REM_INV_RD_REQ_ERR), - RDS_IB_WC_STATUS_STR(REM_ABORT_ERR), - RDS_IB_WC_STATUS_STR(INV_EECN_ERR), - RDS_IB_WC_STATUS_STR(INV_EEC_STATE_ERR), - RDS_IB_WC_STATUS_STR(FATAL_ERR), - RDS_IB_WC_STATUS_STR(RESP_TIMEOUT_ERR), - RDS_IB_WC_STATUS_STR(GENERAL_ERR), -#undef RDS_IB_WC_STATUS_STR -}; - -char *rds_ib_wc_status_str(enum ib_wc_status status) -{ - return rds_str_array(rds_ib_wc_status_strings, - ARRAY_SIZE(rds_ib_wc_status_strings), status); -} - /* * Convert IB-specific error message to RDS error message and call core * completion handler. @@ -293,7 +259,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) while (ib_poll_cq(cq, 1, &wc) > 0) { rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n", (unsigned long long)wc.wr_id, wc.status, - rds_ib_wc_status_str(wc.status), wc.byte_len, + ib_wc_status_msg(wc.status), wc.byte_len, be32_to_cpu(wc.ex.imm_data)); rds_ib_stats_inc(s_ib_tx_cq_event); @@ -344,7 +310,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) rds_ib_conn_error(conn, "send completion on %pI4 had status " "%u (%s), disconnecting and reconnecting\n", &conn->c_faddr, wc.status, - rds_ib_wc_status_str(wc.status)); + ib_wc_status_msg(wc.status)); } } } @@ -605,6 +571,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, } rds_message_addref(rm); + rm->data.op_dmasg = 0; + rm->data.op_dmaoff = 0; ic->i_data_op = &rm->data; /* Finalize the header */ @@ -658,7 +626,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, send = &ic->i_sends[pos]; first = send; prev = NULL; - scat = &ic->i_data_op->op_sg[sg]; + scat = &ic->i_data_op->op_sg[rm->data.op_dmasg]; i = 0; do { unsigned int len = 0; @@ -680,17 +648,20 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, /* Set up the data, if present */ if (i < work_alloc && scat != &rm->data.op_sg[rm->data.op_count]) { - len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); + len = min(RDS_FRAG_SIZE, + ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff); send->s_wr.num_sge = 2; - send->s_sge[1].addr = ib_sg_dma_address(dev, scat) + off; + send->s_sge[1].addr = ib_sg_dma_address(dev, scat); + send->s_sge[1].addr += rm->data.op_dmaoff; send->s_sge[1].length = len; bytes_sent += len; - off += len; - if (off == ib_sg_dma_len(dev, scat)) { + rm->data.op_dmaoff += len; + if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) { scat++; - off = 0; + rm->data.op_dmasg++; + rm->data.op_dmaoff = 0; } } diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index a6c2bea..8f486fa 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -179,6 +179,7 @@ static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr, void *context) { struct ib_device *dev = rds_iwdev->dev; + struct ib_cq_init_attr cq_attr = {}; unsigned int send_size, recv_size; int ret; @@ -198,9 +199,10 @@ static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr, attr->sq_sig_type = IB_SIGNAL_REQ_WR; attr->qp_type = IB_QPT_RC; + cq_attr.cqe = send_size; attr->send_cq = ib_create_cq(dev, send_cq_handler, rds_iw_cq_event_handler, - context, send_size, 0); + context, &cq_attr); if (IS_ERR(attr->send_cq)) { ret = PTR_ERR(attr->send_cq); attr->send_cq = NULL; @@ -208,9 +210,10 @@ static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr, goto out; } + cq_attr.cqe = recv_size; attr->recv_cq = ib_create_cq(dev, recv_cq_handler, rds_iw_cq_event_handler, - context, recv_size, 0); + context, &cq_attr); if (IS_ERR(attr->recv_cq)) { ret = PTR_ERR(attr->recv_cq); attr->recv_cq = NULL; diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 1383478..334fe98 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -581,6 +581,8 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes; rds_message_addref(rm); + rm->data.op_dmasg = 0; + rm->data.op_dmaoff = 0; ic->i_rm = rm; /* Finalize the header */ @@ -622,7 +624,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, send = &ic->i_sends[pos]; first = send; prev = NULL; - scat = &rm->data.op_sg[sg]; + scat = &rm->data.op_sg[rm->data.op_dmasg]; sent = 0; i = 0; @@ -656,10 +658,11 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, send = &ic->i_sends[pos]; - len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); + len = min(RDS_FRAG_SIZE, + ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff); rds_iw_xmit_populate_wr(ic, send, pos, - ib_sg_dma_address(dev, scat) + off, len, - send_flags); + ib_sg_dma_address(dev, scat) + rm->data.op_dmaoff, len, + send_flags); /* * We want to delay signaling completions just enough to get @@ -687,10 +690,11 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, &send->s_wr, send->s_wr.num_sge, send->s_wr.next); sent += len; - off += len; - if (off == ib_sg_dma_len(dev, scat)) { + rm->data.op_dmaoff += len; + if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) { scat++; - off = 0; + rm->data.op_dmaoff = 0; + rm->data.op_dmasg++; } add_header: diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 6cd9d1d..2082408 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -37,34 +37,6 @@ static struct rdma_cm_id *rds_rdma_listen_id; -static char *rds_cm_event_strings[] = { -#define RDS_CM_EVENT_STRING(foo) \ - [RDMA_CM_EVENT_##foo] = __stringify(RDMA_CM_EVENT_##foo) - RDS_CM_EVENT_STRING(ADDR_RESOLVED), - RDS_CM_EVENT_STRING(ADDR_ERROR), - RDS_CM_EVENT_STRING(ROUTE_RESOLVED), - RDS_CM_EVENT_STRING(ROUTE_ERROR), - RDS_CM_EVENT_STRING(CONNECT_REQUEST), - RDS_CM_EVENT_STRING(CONNECT_RESPONSE), - RDS_CM_EVENT_STRING(CONNECT_ERROR), - RDS_CM_EVENT_STRING(UNREACHABLE), - RDS_CM_EVENT_STRING(REJECTED), - RDS_CM_EVENT_STRING(ESTABLISHED), - RDS_CM_EVENT_STRING(DISCONNECTED), - RDS_CM_EVENT_STRING(DEVICE_REMOVAL), - RDS_CM_EVENT_STRING(MULTICAST_JOIN), - RDS_CM_EVENT_STRING(MULTICAST_ERROR), - RDS_CM_EVENT_STRING(ADDR_CHANGE), - RDS_CM_EVENT_STRING(TIMEWAIT_EXIT), -#undef RDS_CM_EVENT_STRING -}; - -static char *rds_cm_event_str(enum rdma_cm_event_type type) -{ - return rds_str_array(rds_cm_event_strings, - ARRAY_SIZE(rds_cm_event_strings), type); -}; - int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { @@ -74,7 +46,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, int ret = 0; rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id, - event->event, rds_cm_event_str(event->event)); + event->event, rdma_event_msg(event->event)); if (cm_id->device->node_type == RDMA_NODE_RNIC) trans = &rds_iw_transport; @@ -139,7 +111,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, default: /* things like device disconnect? */ printk(KERN_ERR "RDS: unknown event %u (%s)!\n", - event->event, rds_cm_event_str(event->event)); + event->event, rdma_event_msg(event->event)); break; } @@ -148,7 +120,7 @@ out: mutex_unlock(&conn->c_cm_lock); rdsdebug("id %p event %u (%s) handling ret %d\n", cm_id, event->event, - rds_cm_event_str(event->event), ret); + rdma_event_msg(event->event), ret); return ret; } diff --git a/net/rds/rds.h b/net/rds/rds.h index 0d41155..cc54985 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -363,6 +363,8 @@ struct rds_message { unsigned int op_active:1; unsigned int op_nents; unsigned int op_count; + unsigned int op_dmasg; + unsigned int op_dmaoff; struct scatterlist *op_sg; } data; }; @@ -575,7 +577,6 @@ struct rds_statistics { }; /* af_rds.c */ -char *rds_str_array(char **array, size_t elements, size_t index); void rds_sock_addref(struct rds_sock *rs); void rds_sock_put(struct rds_sock *rs); void rds_wake_sk_sleep(struct rds_sock *rs); diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index dff0481..d234521 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -128,8 +128,8 @@ frwr_sendcompletion(struct ib_wc *wc) /* WARNING: Only wr_id and status are reliable at this point */ r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; - dprintk("RPC: %s: frmr %p (stale), status %d\n", - __func__, r, wc->status); + dprintk("RPC: %s: frmr %p (stale), status %s (%d)\n", + __func__, r, ib_wc_status_msg(wc->status), wc->status); r->r.frmr.fr_state = FRMR_IS_STALE; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index f9f13a3..86b4416 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -117,8 +117,8 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) { - if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) == - RDMA_TRANSPORT_IWARP) + if (!rdma_cap_read_multi_sge(xprt->sc_cm_id->device, + xprt->sc_cm_id->port_num)) return 1; else return min_t(int, sge_count, xprt->sc_max_sge); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index f609c1c..f4cfa76 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -175,8 +175,8 @@ void svc_rdma_put_req_map(struct svc_rdma_req_map *map) static void cq_event_handler(struct ib_event *event, void *context) { struct svc_xprt *xprt = context; - dprintk("svcrdma: received CQ event id=%d, context=%p\n", - event->event, context); + dprintk("svcrdma: received CQ event %s (%d), context=%p\n", + ib_event_msg(event->event), event->event, context); set_bit(XPT_CLOSE, &xprt->xpt_flags); } @@ -191,8 +191,9 @@ static void qp_event_handler(struct ib_event *event, void *context) case IB_EVENT_COMM_EST: case IB_EVENT_SQ_DRAINED: case IB_EVENT_QP_LAST_WQE_REACHED: - dprintk("svcrdma: QP event %d received for QP=%p\n", - event->event, event->element.qp); + dprintk("svcrdma: QP event %s (%d) received for QP=%p\n", + ib_event_msg(event->event), event->event, + event->element.qp); break; /* These are considered fatal events */ case IB_EVENT_PATH_MIG_ERR: @@ -201,9 +202,10 @@ static void qp_event_handler(struct ib_event *event, void *context) case IB_EVENT_QP_ACCESS_ERR: case IB_EVENT_DEVICE_FATAL: default: - dprintk("svcrdma: QP ERROR event %d received for QP=%p, " + dprintk("svcrdma: QP ERROR event %s (%d) received for QP=%p, " "closing transport\n", - event->event, event->element.qp); + ib_event_msg(event->event), event->event, + event->element.qp); set_bit(XPT_CLOSE, &xprt->xpt_flags); break; } @@ -402,7 +404,8 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) for (i = 0; i < ret; i++) { wc = &wc_a[i]; if (wc->status != IB_WC_SUCCESS) { - dprintk("svcrdma: sq wc err status %d\n", + dprintk("svcrdma: sq wc err status %s (%d)\n", + ib_wc_status_msg(wc->status), wc->status); /* Close the transport */ @@ -616,7 +619,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, switch (event->event) { case RDMA_CM_EVENT_CONNECT_REQUEST: dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " - "event=%d\n", cma_id, cma_id->context, event->event); + "event = %s (%d)\n", cma_id, cma_id->context, + rdma_event_msg(event->event), event->event); handle_connect_req(cma_id, event->param.conn.initiator_depth); break; @@ -636,7 +640,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, default: dprintk("svcrdma: Unexpected event on listening endpoint %p, " - "event=%d\n", cma_id, event->event); + "event = %s (%d)\n", cma_id, + rdma_event_msg(event->event), event->event); break; } @@ -669,7 +674,8 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id, break; case RDMA_CM_EVENT_DEVICE_REMOVAL: dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, " - "event=%d\n", cma_id, xprt, event->event); + "event = %s (%d)\n", cma_id, xprt, + rdma_event_msg(event->event), event->event); if (xprt) { set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); @@ -677,7 +683,8 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id, break; default: dprintk("svcrdma: Unexpected event on DTO endpoint %p, " - "event=%d\n", cma_id, event->event); + "event = %s (%d)\n", cma_id, + rdma_event_msg(event->event), event->event); break; } return 0; @@ -848,10 +855,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct svcxprt_rdma *listen_rdma; struct svcxprt_rdma *newxprt = NULL; struct rdma_conn_param conn_param; + struct ib_cq_init_attr cq_attr = {}; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; int uninitialized_var(dma_mr_acc); - int need_dma_mr; + int need_dma_mr = 0; int ret; int i; @@ -900,22 +908,22 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dprintk("svcrdma: error creating PD for connect request\n"); goto errout; } + cq_attr.cqe = newxprt->sc_sq_depth; newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device, sq_comp_handler, cq_event_handler, newxprt, - newxprt->sc_sq_depth, - 0); + &cq_attr); if (IS_ERR(newxprt->sc_sq_cq)) { dprintk("svcrdma: error creating SQ CQ for connect request\n"); goto errout; } + cq_attr.cqe = newxprt->sc_max_requests; newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device, rq_comp_handler, cq_event_handler, newxprt, - newxprt->sc_max_requests, - 0); + &cq_attr); if (IS_ERR(newxprt->sc_rq_cq)) { dprintk("svcrdma: error creating RQ CQ for connect request\n"); goto errout; @@ -985,35 +993,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) /* * Determine if a DMA MR is required and if so, what privs are required */ - switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) { - case RDMA_TRANSPORT_IWARP: - newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV; - if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) { - need_dma_mr = 1; - dma_mr_acc = - (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE); - } else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { - need_dma_mr = 1; - dma_mr_acc = IB_ACCESS_LOCAL_WRITE; - } else - need_dma_mr = 0; - break; - case RDMA_TRANSPORT_IB: - if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) { - need_dma_mr = 1; - dma_mr_acc = IB_ACCESS_LOCAL_WRITE; - } else if (!(devattr.device_cap_flags & - IB_DEVICE_LOCAL_DMA_LKEY)) { - need_dma_mr = 1; - dma_mr_acc = IB_ACCESS_LOCAL_WRITE; - } else - need_dma_mr = 0; - break; - default: + if (!rdma_protocol_iwarp(newxprt->sc_cm_id->device, + newxprt->sc_cm_id->port_num) && + !rdma_ib_or_roce(newxprt->sc_cm_id->device, + newxprt->sc_cm_id->port_num)) goto errout; + + if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) || + !(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { + need_dma_mr = 1; + dma_mr_acc = IB_ACCESS_LOCAL_WRITE; + if (rdma_protocol_iwarp(newxprt->sc_cm_id->device, + newxprt->sc_cm_id->port_num) && + !(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) + dma_mr_acc |= IB_ACCESS_REMOTE_WRITE; } + if (rdma_protocol_iwarp(newxprt->sc_cm_id->device, + newxprt->sc_cm_id->port_num)) + newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV; + /* Create the DMA MR if needed, otherwise, use the DMA LKEY */ if (need_dma_mr) { /* Register all of physical memory */ diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 4870d27..52df265 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -105,32 +105,6 @@ rpcrdma_run_tasklet(unsigned long data) static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); -static const char * const async_event[] = { - "CQ error", - "QP fatal error", - "QP request error", - "QP access error", - "communication established", - "send queue drained", - "path migration successful", - "path mig error", - "device fatal error", - "port active", - "port error", - "LID change", - "P_key change", - "SM change", - "SRQ error", - "SRQ limit reached", - "last WQE reached", - "client reregister", - "GID change", -}; - -#define ASYNC_MSG(status) \ - ((status) < ARRAY_SIZE(async_event) ? \ - async_event[(status)] : "unknown async error") - static void rpcrdma_schedule_tasklet(struct list_head *sched_list) { @@ -148,7 +122,7 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) struct rpcrdma_ep *ep = context; pr_err("RPC: %s: %s on device %s ep %p\n", - __func__, ASYNC_MSG(event->event), + __func__, ib_event_msg(event->event), event->device->name, context); if (ep->rep_connected == 1) { ep->rep_connected = -EIO; @@ -163,7 +137,7 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) struct rpcrdma_ep *ep = context; pr_err("RPC: %s: %s on device %s ep %p\n", - __func__, ASYNC_MSG(event->event), + __func__, ib_event_msg(event->event), event->device->name, context); if (ep->rep_connected == 1) { ep->rep_connected = -EIO; @@ -172,35 +146,6 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) } } -static const char * const wc_status[] = { - "success", - "local length error", - "local QP operation error", - "local EE context operation error", - "local protection error", - "WR flushed", - "memory management operation error", - "bad response error", - "local access error", - "remote invalid request error", - "remote access error", - "remote operation error", - "transport retry counter exceeded", - "RNR retry counter exceeded", - "local RDD violation error", - "remove invalid RD request", - "operation aborted", - "invalid EE context number", - "invalid EE context state", - "fatal error", - "response timeout error", - "general error", -}; - -#define COMPLETION_MSG(status) \ - ((status) < ARRAY_SIZE(wc_status) ? \ - wc_status[(status)] : "unexpected completion error") - static void rpcrdma_sendcq_process_wc(struct ib_wc *wc) { @@ -209,7 +154,7 @@ rpcrdma_sendcq_process_wc(struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) pr_err("RPC: %s: SEND: %s\n", - __func__, COMPLETION_MSG(wc->status)); + __func__, ib_wc_status_msg(wc->status)); } else { struct rpcrdma_mw *r; @@ -302,7 +247,7 @@ out_schedule: out_fail: if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("RPC: %s: rep %p: %s\n", - __func__, rep, COMPLETION_MSG(wc->status)); + __func__, rep, ib_wc_status_msg(wc->status)); rep->rr_len = ~0U; goto out_schedule; } @@ -386,31 +331,6 @@ rpcrdma_flush_cqs(struct rpcrdma_ep *ep) rpcrdma_sendcq_process_wc(&wc); } -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -static const char * const conn[] = { - "address resolved", - "address error", - "route resolved", - "route error", - "connect request", - "connect response", - "connect error", - "unreachable", - "rejected", - "established", - "disconnected", - "device removal", - "multicast join", - "multicast error", - "address change", - "timewait exit", -}; - -#define CONNECTION_MSG(status) \ - ((status) < ARRAY_SIZE(conn) ? \ - conn[(status)] : "unrecognized connection error") -#endif - static int rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) { @@ -476,7 +396,7 @@ connected: default: dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n", __func__, sap, rpc_get_port(sap), ep, - CONNECTION_MSG(event->event)); + rdma_event_msg(event->event)); break; } @@ -724,6 +644,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, { struct ib_device_attr *devattr = &ia->ri_devattr; struct ib_cq *sendcq, *recvcq; + struct ib_cq_init_attr cq_attr = {}; int rc, err; /* check provider's send/recv wr limits */ @@ -771,9 +692,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, init_waitqueue_head(&ep->rep_connect_wait); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); + cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1; sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall, - rpcrdma_cq_async_error_upcall, ep, - ep->rep_attr.cap.max_send_wr + 1, 0); + rpcrdma_cq_async_error_upcall, ep, &cq_attr); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); dprintk("RPC: %s: failed to create send CQ: %i\n", @@ -788,9 +709,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, goto out2; } + cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1; recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall, - rpcrdma_cq_async_error_upcall, ep, - ep->rep_attr.cap.max_recv_wr + 1, 0); + rpcrdma_cq_async_error_upcall, ep, &cq_attr); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); dprintk("RPC: %s: failed to create recv CQ: %i\n", |