diff options
Diffstat (limited to 'drivers/infiniband')
102 files changed, 2834 insertions, 1329 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index f80da50..746cdf5 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -457,8 +457,8 @@ static void resolve_cb(int status, struct sockaddr *src_addr, complete(&((struct resolve_cb_context *)context)->comp); } -int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, - u16 *vlan_id) +int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, + u8 *dmac, u16 *vlan_id) { int ret = 0; struct rdma_dev_addr dev_addr; @@ -472,13 +472,8 @@ int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, } sgid_addr, dgid_addr; - ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid); - if (ret) - return ret; - - ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid); - if (ret) - return ret; + rdma_gid2ip(&sgid_addr._sockaddr, sgid); + rdma_gid2ip(&dgid_addr._sockaddr, dgid); memset(&dev_addr, 0, sizeof(dev_addr)); @@ -512,10 +507,8 @@ int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) struct sockaddr_in6 _sockaddr_in6; } gid_addr; - ret = rdma_gid2ip(&gid_addr._sockaddr, sgid); + rdma_gid2ip(&gid_addr._sockaddr, sgid); - if (ret) - return ret; memset(&dev_addr, 0, sizeof(dev_addr)); ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); if (ret) diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index f6d2961..c7dcfe4 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -54,7 +54,7 @@ static DEFINE_SPINLOCK(ib_agent_port_list_lock); static LIST_HEAD(ib_agent_port_list); static struct ib_agent_port_private * -__ib_get_agent_port(struct ib_device *device, int port_num) +__ib_get_agent_port(const struct ib_device *device, int port_num) { struct ib_agent_port_private *entry; @@ -67,7 +67,7 @@ __ib_get_agent_port(struct ib_device *device, int port_num) } static struct ib_agent_port_private * -ib_get_agent_port(struct ib_device *device, int port_num) +ib_get_agent_port(const struct ib_device *device, int port_num) { struct ib_agent_port_private *entry; unsigned long flags; @@ -78,9 +78,9 @@ ib_get_agent_port(struct ib_device *device, int port_num) return entry; } -void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, - struct ib_wc *wc, struct ib_device *device, - int port_num, int qpn) +void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *grh, + const struct ib_wc *wc, const struct ib_device *device, + int port_num, int qpn, size_t resp_mad_len, bool opa) { struct ib_agent_port_private *port_priv; struct ib_mad_agent *agent; @@ -106,15 +106,20 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, return; } + if (opa && mad_hdr->base_version != OPA_MGMT_BASE_VERSION) + resp_mad_len = IB_MGMT_MAD_SIZE; + send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0, - IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, - GFP_KERNEL); + IB_MGMT_MAD_HDR, + resp_mad_len - IB_MGMT_MAD_HDR, + GFP_KERNEL, + mad_hdr->base_version); if (IS_ERR(send_buf)) { dev_err(&device->dev, "ib_create_send_mad error\n"); goto err1; } - memcpy(send_buf->mad, mad, sizeof *mad); + memcpy(send_buf->mad, mad_hdr, resp_mad_len); send_buf->ah = ah; if (device->node_type == RDMA_NODE_IB_SWITCH) { @@ -156,7 +161,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num) goto error1; } - if (rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND) { + if (rdma_cap_ib_smi(device, port_num)) { /* Obtain send only MAD agent for SMI QP */ port_priv->agent[0] = ib_register_mad_agent(device, port_num, IB_QPT_SMI, NULL, 0, diff --git a/drivers/infiniband/core/agent.h b/drivers/infiniband/core/agent.h index 6669287..65f92be 100644 --- a/drivers/infiniband/core/agent.h +++ b/drivers/infiniband/core/agent.h @@ -44,8 +44,8 @@ extern int ib_agent_port_open(struct ib_device *device, int port_num); extern int ib_agent_port_close(struct ib_device *device, int port_num); -extern void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, - struct ib_wc *wc, struct ib_device *device, - int port_num, int qpn); +extern void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *grh, + const struct ib_wc *wc, const struct ib_device *device, + int port_num, int qpn, size_t resp_mad_len, bool opa); #endif /* __AGENT_H_ */ diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 80f6cf2..871da83 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -58,17 +58,6 @@ struct ib_update_work { u8 port_num; }; -static inline int start_port(struct ib_device *device) -{ - return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; -} - -static inline int end_port(struct ib_device *device) -{ - return (device->node_type == RDMA_NODE_IB_SWITCH) ? - 0 : device->phys_port_cnt; -} - int ib_get_cached_gid(struct ib_device *device, u8 port_num, int index, @@ -78,12 +67,12 @@ int ib_get_cached_gid(struct ib_device *device, unsigned long flags; int ret = 0; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - cache = device->cache.gid_cache[port_num - start_port(device)]; + cache = device->cache.gid_cache[port_num - rdma_start_port(device)]; if (index < 0 || index >= cache->table_len) ret = -EINVAL; @@ -96,10 +85,10 @@ int ib_get_cached_gid(struct ib_device *device, } EXPORT_SYMBOL(ib_get_cached_gid); -int ib_find_cached_gid(struct ib_device *device, - union ib_gid *gid, - u8 *port_num, - u16 *index) +int ib_find_cached_gid(struct ib_device *device, + const union ib_gid *gid, + u8 *port_num, + u16 *index) { struct ib_gid_cache *cache; unsigned long flags; @@ -112,11 +101,11 @@ int ib_find_cached_gid(struct ib_device *device, read_lock_irqsave(&device->cache.lock, flags); - for (p = 0; p <= end_port(device) - start_port(device); ++p) { + for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) { cache = device->cache.gid_cache[p]; for (i = 0; i < cache->table_len; ++i) { if (!memcmp(gid, &cache->table[i], sizeof *gid)) { - *port_num = p + start_port(device); + *port_num = p + rdma_start_port(device); if (index) *index = i; ret = 0; @@ -140,12 +129,12 @@ int ib_get_cached_pkey(struct ib_device *device, unsigned long flags; int ret = 0; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - cache = device->cache.pkey_cache[port_num - start_port(device)]; + cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; if (index < 0 || index >= cache->table_len) ret = -EINVAL; @@ -169,12 +158,12 @@ int ib_find_cached_pkey(struct ib_device *device, int ret = -ENOENT; int partial_ix = -1; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - cache = device->cache.pkey_cache[port_num - start_port(device)]; + cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; *index = -1; @@ -209,12 +198,12 @@ int ib_find_exact_cached_pkey(struct ib_device *device, int i; int ret = -ENOENT; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - cache = device->cache.pkey_cache[port_num - start_port(device)]; + cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; *index = -1; @@ -238,11 +227,11 @@ int ib_get_cached_lmc(struct ib_device *device, unsigned long flags; int ret = 0; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); - *lmc = device->cache.lmc_cache[port_num - start_port(device)]; + *lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)]; read_unlock_irqrestore(&device->cache.lock, flags); return ret; @@ -303,13 +292,13 @@ static void ib_cache_update(struct ib_device *device, write_lock_irq(&device->cache.lock); - old_pkey_cache = device->cache.pkey_cache[port - start_port(device)]; - old_gid_cache = device->cache.gid_cache [port - start_port(device)]; + old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)]; + old_gid_cache = device->cache.gid_cache [port - rdma_start_port(device)]; - device->cache.pkey_cache[port - start_port(device)] = pkey_cache; - device->cache.gid_cache [port - start_port(device)] = gid_cache; + device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache; + device->cache.gid_cache [port - rdma_start_port(device)] = gid_cache; - device->cache.lmc_cache[port - start_port(device)] = tprops->lmc; + device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc; write_unlock_irq(&device->cache.lock); @@ -363,14 +352,14 @@ static void ib_cache_setup_one(struct ib_device *device) device->cache.pkey_cache = kmalloc(sizeof *device->cache.pkey_cache * - (end_port(device) - start_port(device) + 1), GFP_KERNEL); + (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL); device->cache.gid_cache = kmalloc(sizeof *device->cache.gid_cache * - (end_port(device) - start_port(device) + 1), GFP_KERNEL); + (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL); device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache * - (end_port(device) - - start_port(device) + 1), + (rdma_end_port(device) - + rdma_start_port(device) + 1), GFP_KERNEL); if (!device->cache.pkey_cache || !device->cache.gid_cache || @@ -380,10 +369,10 @@ static void ib_cache_setup_one(struct ib_device *device) goto err; } - for (p = 0; p <= end_port(device) - start_port(device); ++p) { + for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) { device->cache.pkey_cache[p] = NULL; device->cache.gid_cache [p] = NULL; - ib_cache_update(device, p + start_port(device)); + ib_cache_update(device, p + rdma_start_port(device)); } INIT_IB_EVENT_HANDLER(&device->cache.event_handler, @@ -394,7 +383,7 @@ static void ib_cache_setup_one(struct ib_device *device) return; err_cache: - for (p = 0; p <= end_port(device) - start_port(device); ++p) { + for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) { kfree(device->cache.pkey_cache[p]); kfree(device->cache.gid_cache[p]); } @@ -412,7 +401,7 @@ static void ib_cache_cleanup_one(struct ib_device *device) ib_unregister_event_handler(&device->cache.event_handler); flush_workqueue(ib_wq); - for (p = 0; p <= end_port(device) - start_port(device); ++p) { + for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) { kfree(device->cache.pkey_cache[p]); kfree(device->cache.gid_cache[p]); } diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index e28a494..dbddddd 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -267,7 +267,8 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, cm_id_priv->av.pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, - GFP_ATOMIC); + GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { ib_destroy_ah(ah); return PTR_ERR(m); @@ -297,7 +298,8 @@ static int cm_alloc_response_msg(struct cm_port *port, m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, - GFP_ATOMIC); + GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { ib_destroy_ah(ah); return PTR_ERR(m); @@ -437,39 +439,38 @@ static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) return cm_id_priv; } -static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask) +static void cm_mask_copy(u32 *dst, const u32 *src, const u32 *mask) { int i; - for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++) - ((unsigned long *) dst)[i] = ((unsigned long *) src)[i] & - ((unsigned long *) mask)[i]; + for (i = 0; i < IB_CM_COMPARE_SIZE; i++) + dst[i] = src[i] & mask[i]; } static int cm_compare_data(struct ib_cm_compare_data *src_data, struct ib_cm_compare_data *dst_data) { - u8 src[IB_CM_COMPARE_SIZE]; - u8 dst[IB_CM_COMPARE_SIZE]; + u32 src[IB_CM_COMPARE_SIZE]; + u32 dst[IB_CM_COMPARE_SIZE]; if (!src_data || !dst_data) return 0; cm_mask_copy(src, src_data->data, dst_data->mask); cm_mask_copy(dst, dst_data->data, src_data->mask); - return memcmp(src, dst, IB_CM_COMPARE_SIZE); + return memcmp(src, dst, sizeof(src)); } -static int cm_compare_private_data(u8 *private_data, +static int cm_compare_private_data(u32 *private_data, struct ib_cm_compare_data *dst_data) { - u8 src[IB_CM_COMPARE_SIZE]; + u32 src[IB_CM_COMPARE_SIZE]; if (!dst_data) return 0; cm_mask_copy(src, private_data, dst_data->mask); - return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE); + return memcmp(src, dst_data->data, sizeof(src)); } /* @@ -538,7 +539,7 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) static struct cm_id_private * cm_find_listen(struct ib_device *device, __be64 service_id, - u8 *private_data) + u32 *private_data) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; @@ -862,6 +863,7 @@ retest: cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); break; case IB_CM_REQ_SENT: + case IB_CM_MRA_REQ_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, @@ -880,7 +882,6 @@ retest: NULL, 0, NULL, 0); } break; - case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); @@ -953,7 +954,7 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, cm_mask_copy(cm_id_priv->compare_data->data, compare_data->data, compare_data->mask); memcpy(cm_id_priv->compare_data->mask, compare_data->mask, - IB_CM_COMPARE_SIZE); + sizeof(compare_data->mask)); } cm_id->state = IB_CM_LISTEN; @@ -3760,11 +3761,9 @@ static void cm_add_one(struct ib_device *ib_device) }; unsigned long flags; int ret; + int count = 0; u8 i; - if (rdma_node_get_transport(ib_device->node_type) != RDMA_TRANSPORT_IB) - return; - cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) * ib_device->phys_port_cnt, GFP_KERNEL); if (!cm_dev) @@ -3783,6 +3782,9 @@ static void cm_add_one(struct ib_device *ib_device) set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); for (i = 1; i <= ib_device->phys_port_cnt; i++) { + if (!rdma_cap_ib_cm(ib_device, i)) + continue; + port = kzalloc(sizeof *port, GFP_KERNEL); if (!port) goto error1; @@ -3809,7 +3811,13 @@ static void cm_add_one(struct ib_device *ib_device) ret = ib_modify_port(ib_device, i, 0, &port_modify); if (ret) goto error3; + + count++; } + + if (!count) + goto free; + ib_set_client_data(ib_device, &cm_client, cm_dev); write_lock_irqsave(&cm.device_lock, flags); @@ -3825,11 +3833,15 @@ error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; while (--i) { + if (!rdma_cap_ib_cm(ib_device, i)) + continue; + port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } +free: device_unregister(cm_dev->device); kfree(cm_dev); } @@ -3853,6 +3865,9 @@ static void cm_remove_one(struct ib_device *ib_device) write_unlock_irqrestore(&cm.device_lock, flags); for (i = 1; i <= ib_device->phys_port_cnt; i++) { + if (!rdma_cap_ib_cm(ib_device, i)) + continue; + port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index be068f4..8b76f0e 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -103,7 +103,7 @@ struct cm_req_msg { /* local ACK timeout:5, rsvd:3 */ u8 alt_offset139; - u8 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE]; + u32 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; } __attribute__ ((packed)); @@ -801,7 +801,7 @@ struct cm_sidr_req_msg { __be16 rsvd; __be64 service_id; - u8 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE]; + u32 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; } __attribute__ ((packed)); struct cm_sidr_rep_msg { diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d570030..143ded2 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -65,6 +65,34 @@ MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) #define CMA_IBOE_PACKET_LIFETIME 18 +static const char * const cma_events[] = { + [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", + [RDMA_CM_EVENT_ADDR_ERROR] = "address error", + [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", + [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", + [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", + [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", + [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", + [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", + [RDMA_CM_EVENT_REJECTED] = "rejected", + [RDMA_CM_EVENT_ESTABLISHED] = "established", + [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", + [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", + [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", + [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", + [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", + [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", +}; + +const char *rdma_event_msg(enum rdma_cm_event_type event) +{ + size_t index = event; + + return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? + cma_events[index] : "unrecognized event"; +} +EXPORT_SYMBOL(rdma_event_msg); + static void cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device); @@ -349,18 +377,35 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a return ret; } +static inline int cma_validate_port(struct ib_device *device, u8 port, + union ib_gid *gid, int dev_type) +{ + u8 found_port; + int ret = -ENODEV; + + if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) + return ret; + + if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) + return ret; + + ret = ib_find_cached_gid(device, gid, &found_port, NULL); + if (port != found_port) + return -ENODEV; + + return ret; +} + static int cma_acquire_dev(struct rdma_id_private *id_priv, struct rdma_id_private *listen_id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct cma_device *cma_dev; - union ib_gid gid, iboe_gid; + union ib_gid gid, iboe_gid, *gidp; int ret = -ENODEV; - u8 port, found_port; - enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ? - IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; + u8 port; - if (dev_ll != IB_LINK_LAYER_INFINIBAND && + if (dev_addr->dev_type != ARPHRD_INFINIBAND && id_priv->id.ps == RDMA_PS_IPOIB) return -EINVAL; @@ -370,41 +415,36 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, memcpy(&gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof gid); - if (listen_id_priv && - rdma_port_get_link_layer(listen_id_priv->id.device, - listen_id_priv->id.port_num) == dev_ll) { + + if (listen_id_priv) { cma_dev = listen_id_priv->cma_dev; port = listen_id_priv->id.port_num; - if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && - rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) - ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, - &found_port, NULL); - else - ret = ib_find_cached_gid(cma_dev->device, &gid, - &found_port, NULL); + gidp = rdma_protocol_roce(cma_dev->device, port) ? + &iboe_gid : &gid; - if (!ret && (port == found_port)) { - id_priv->id.port_num = found_port; + ret = cma_validate_port(cma_dev->device, port, gidp, + dev_addr->dev_type); + if (!ret) { + id_priv->id.port_num = port; goto out; } } + list_for_each_entry(cma_dev, &dev_list, list) { for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { if (listen_id_priv && listen_id_priv->cma_dev == cma_dev && listen_id_priv->id.port_num == port) continue; - if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) { - if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && - rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) - ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL); - else - ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL); - - if (!ret && (port == found_port)) { - id_priv->id.port_num = found_port; - goto out; - } + + gidp = rdma_protocol_roce(cma_dev->device, port) ? + &iboe_gid : &gid; + + ret = cma_validate_port(cma_dev->device, port, gidp, + dev_addr->dev_type); + if (!ret) { + id_priv->id.port_num = port; + goto out; } } } @@ -435,10 +475,10 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) pkey = ntohs(addr->sib_pkey); list_for_each_entry(cur_dev, &dev_list, list) { - if (rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB) - continue; - for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { + if (!rdma_cap_af_ib(cur_dev->device, p)) + continue; + if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) continue; @@ -633,10 +673,9 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, if (ret) goto out; - if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) - == RDMA_TRANSPORT_IB && - rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) - == IB_LINK_LAYER_ETHERNET) { + BUG_ON(id_priv->cma_dev->device != id_priv->id.device); + + if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) { ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL); if (ret) @@ -700,11 +739,10 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, int ret; u16 pkey; - if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) == - IB_LINK_LAYER_INFINIBAND) - pkey = ib_addr_get_pkey(dev_addr); - else + if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) pkey = 0xffff; + else + pkey = ib_addr_get_pkey(dev_addr); ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, pkey, &qp_attr->pkey_index); @@ -735,8 +773,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, int ret = 0; id_priv = container_of(id, struct rdma_id_private, id); - switch (rdma_node_get_transport(id_priv->id.device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, id->port_num)) { if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); else @@ -745,19 +782,15 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, if (qp_attr->qp_state == IB_QPS_RTR) qp_attr->rq_psn = id_priv->seq_num; - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, id->port_num)) { if (!id_priv->cm_id.iw) { qp_attr->qp_access_flags = 0; *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; } else ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, qp_attr_mask); - break; - default: + } else ret = -ENOSYS; - break; - } return ret; } @@ -845,33 +878,49 @@ static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; ib = (struct sockaddr_ib *) &id->route.addr.src_addr; ib->sib_family = listen_ib->sib_family; - ib->sib_pkey = path->pkey; - ib->sib_flowinfo = path->flow_label; - memcpy(&ib->sib_addr, &path->sgid, 16); + if (path) { + ib->sib_pkey = path->pkey; + ib->sib_flowinfo = path->flow_label; + memcpy(&ib->sib_addr, &path->sgid, 16); + } else { + ib->sib_pkey = listen_ib->sib_pkey; + ib->sib_flowinfo = listen_ib->sib_flowinfo; + ib->sib_addr = listen_ib->sib_addr; + } ib->sib_sid = listen_ib->sib_sid; ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); ib->sib_scope_id = listen_ib->sib_scope_id; - ib = (struct sockaddr_ib *) &id->route.addr.dst_addr; - ib->sib_family = listen_ib->sib_family; - ib->sib_pkey = path->pkey; - ib->sib_flowinfo = path->flow_label; - memcpy(&ib->sib_addr, &path->dgid, 16); + if (path) { + ib = (struct sockaddr_ib *) &id->route.addr.dst_addr; + ib->sib_family = listen_ib->sib_family; + ib->sib_pkey = path->pkey; + ib->sib_flowinfo = path->flow_label; + memcpy(&ib->sib_addr, &path->dgid, 16); + } +} + +static __be16 ss_get_port(const struct sockaddr_storage *ss) +{ + if (ss->ss_family == AF_INET) + return ((struct sockaddr_in *)ss)->sin_port; + else if (ss->ss_family == AF_INET6) + return ((struct sockaddr_in6 *)ss)->sin6_port; + BUG(); } static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, struct cma_hdr *hdr) { - struct sockaddr_in *listen4, *ip4; + struct sockaddr_in *ip4; - listen4 = (struct sockaddr_in *) &listen_id->route.addr.src_addr; ip4 = (struct sockaddr_in *) &id->route.addr.src_addr; - ip4->sin_family = listen4->sin_family; + ip4->sin_family = AF_INET; ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr; - ip4->sin_port = listen4->sin_port; + ip4->sin_port = ss_get_port(&listen_id->route.addr.src_addr); ip4 = (struct sockaddr_in *) &id->route.addr.dst_addr; - ip4->sin_family = listen4->sin_family; + ip4->sin_family = AF_INET; ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr; ip4->sin_port = hdr->port; } @@ -879,16 +928,15 @@ static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_i static void cma_save_ip6_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, struct cma_hdr *hdr) { - struct sockaddr_in6 *listen6, *ip6; + struct sockaddr_in6 *ip6; - listen6 = (struct sockaddr_in6 *) &listen_id->route.addr.src_addr; ip6 = (struct sockaddr_in6 *) &id->route.addr.src_addr; - ip6->sin6_family = listen6->sin6_family; + ip6->sin6_family = AF_INET6; ip6->sin6_addr = hdr->dst_addr.ip6; - ip6->sin6_port = listen6->sin6_port; + ip6->sin6_port = ss_get_port(&listen_id->route.addr.src_addr); ip6 = (struct sockaddr_in6 *) &id->route.addr.dst_addr; - ip6->sin6_family = listen6->sin6_family; + ip6->sin6_family = AF_INET6; ip6->sin6_addr = hdr->src_addr.ip6; ip6->sin6_port = hdr->port; } @@ -898,9 +946,11 @@ static int cma_save_net_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id { struct cma_hdr *hdr; - if ((listen_id->route.addr.src_addr.ss_family == AF_IB) && - (ib_event->event == IB_CM_REQ_RECEIVED)) { - cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path); + if (listen_id->route.addr.src_addr.ss_family == AF_IB) { + if (ib_event->event == IB_CM_REQ_RECEIVED) + cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path); + else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) + cma_save_ib_info(id, listen_id, NULL); return 0; } @@ -928,13 +978,9 @@ static inline int cma_user_data_offset(struct rdma_id_private *id_priv) static void cma_cancel_route(struct rdma_id_private *id_priv) { - switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) { - case IB_LINK_LAYER_INFINIBAND: + if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { if (id_priv->query) ib_sa_cancel_query(id_priv->query_id, id_priv->query); - break; - default: - break; } } @@ -1006,17 +1052,12 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) mc = container_of(id_priv->mc_list.next, struct cma_multicast, list); list_del(&mc->list); - switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) { - case IB_LINK_LAYER_INFINIBAND: + if (rdma_cap_ib_mcast(id_priv->cma_dev->device, + id_priv->id.port_num)) { ib_sa_free_multicast(mc->multicast.ib); kfree(mc); - break; - case IB_LINK_LAYER_ETHERNET: + } else kref_put(&mc->mcref, release_mc); - break; - default: - break; - } } } @@ -1037,17 +1078,12 @@ void rdma_destroy_id(struct rdma_cm_id *id) mutex_unlock(&id_priv->handler_mutex); if (id_priv->cma_dev) { - switch (rdma_node_get_transport(id_priv->id.device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.ib) ib_destroy_cm_id(id_priv->cm_id.ib); - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.iw) iw_destroy_cm_id(id_priv->cm_id.iw); - break; - default: - break; } cma_leave_mc_groups(id_priv); cma_release_dev(id_priv); @@ -1593,6 +1629,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) if (IS_ERR(id)) return PTR_ERR(id); + id->tos = id_priv->tos; id_priv->cm_id.iw = id; memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), @@ -1625,8 +1662,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, struct rdma_cm_id *id; int ret; - if (cma_family(id_priv) == AF_IB && - rdma_node_get_transport(cma_dev->device->node_type) != RDMA_TRANSPORT_IB) + if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return; id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, @@ -1967,26 +2003,15 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) return -EINVAL; atomic_inc(&id_priv->refcount); - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: - switch (rdma_port_get_link_layer(id->device, id->port_num)) { - case IB_LINK_LAYER_INFINIBAND: - ret = cma_resolve_ib_route(id_priv, timeout_ms); - break; - case IB_LINK_LAYER_ETHERNET: - ret = cma_resolve_iboe_route(id_priv); - break; - default: - ret = -ENOSYS; - } - break; - case RDMA_TRANSPORT_IWARP: + if (rdma_cap_ib_sa(id->device, id->port_num)) + ret = cma_resolve_ib_route(id_priv, timeout_ms); + else if (rdma_protocol_roce(id->device, id->port_num)) + ret = cma_resolve_iboe_route(id_priv); + else if (rdma_protocol_iwarp(id->device, id->port_num)) ret = cma_resolve_iw_route(id_priv, timeout_ms); - break; - default: + else ret = -ENOSYS; - break; - } + if (ret) goto err; @@ -2028,7 +2053,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv) mutex_lock(&lock); list_for_each_entry(cur_dev, &dev_list, list) { if (cma_family(id_priv) == AF_IB && - rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB) + !rdma_cap_ib_cm(cur_dev->device, 1)) continue; if (!cma_dev) @@ -2060,7 +2085,7 @@ port_found: goto out; id_priv->id.route.addr.dev_addr.dev_type = - (rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ? + (rdma_protocol_ib(cma_dev->device, p)) ? ARPHRD_INFINIBAND : ARPHRD_ETHER; rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); @@ -2537,18 +2562,15 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) id_priv->backlog = backlog; if (id->device) { - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, 1)) { ret = cma_ib_listen(id_priv); if (ret) goto err; - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, 1)) { ret = cma_iw_listen(id_priv, backlog); if (ret) goto err; - break; - default: + } else { ret = -ENOSYS; goto err; } @@ -2840,6 +2862,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, if (IS_ERR(cm_id)) return PTR_ERR(cm_id); + cm_id->tos = id_priv->tos; id_priv->cm_id.iw = cm_id; memcpy(&cm_id->local_addr, cma_src_addr(id_priv), @@ -2884,20 +2907,15 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) id_priv->srq = conn_param->srq; } - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, id->port_num)) { if (id->qp_type == IB_QPT_UD) ret = cma_resolve_ib_udp(id_priv, conn_param); else ret = cma_connect_ib(id_priv, conn_param); - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, id->port_num)) ret = cma_connect_iw(id_priv, conn_param); - break; - default: + else ret = -ENOSYS; - break; - } if (ret) goto err; @@ -3000,8 +3018,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) id_priv->srq = conn_param->srq; } - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, id->port_num)) { if (id->qp_type == IB_QPT_UD) { if (conn_param) ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, @@ -3017,14 +3034,10 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) else ret = cma_rep_recv(id_priv); } - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, id->port_num)) ret = cma_accept_iw(id_priv, conn_param); - break; - default: + else ret = -ENOSYS; - break; - } if (ret) goto reject; @@ -3068,8 +3081,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, if (!id_priv->cm_id.ib) return -EINVAL; - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, id->port_num)) { if (id->qp_type == IB_QPT_UD) ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, private_data, private_data_len); @@ -3077,15 +3089,12 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, ret = ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, private_data, private_data_len); - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_reject(id_priv->cm_id.iw, private_data, private_data_len); - break; - default: + } else ret = -ENOSYS; - break; - } + return ret; } EXPORT_SYMBOL(rdma_reject); @@ -3099,22 +3108,18 @@ int rdma_disconnect(struct rdma_cm_id *id) if (!id_priv->cm_id.ib) return -EINVAL; - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: + if (rdma_cap_ib_cm(id->device, id->port_num)) { ret = cma_modify_qp_err(id_priv); if (ret) goto out; /* Initiate or respond to a disconnect. */ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); - break; - case RDMA_TRANSPORT_IWARP: + } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); - break; - default: + } else ret = -EINVAL; - break; - } + out: return ret; } @@ -3360,24 +3365,13 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, list_add(&mc->list, &id_priv->mc_list); spin_unlock(&id_priv->lock); - switch (rdma_node_get_transport(id->device->node_type)) { - case RDMA_TRANSPORT_IB: - switch (rdma_port_get_link_layer(id->device, id->port_num)) { - case IB_LINK_LAYER_INFINIBAND: - ret = cma_join_ib_multicast(id_priv, mc); - break; - case IB_LINK_LAYER_ETHERNET: - kref_init(&mc->mcref); - ret = cma_iboe_join_multicast(id_priv, mc); - break; - default: - ret = -EINVAL; - } - break; - default: + if (rdma_protocol_roce(id->device, id->port_num)) { + kref_init(&mc->mcref); + ret = cma_iboe_join_multicast(id_priv, mc); + } else if (rdma_cap_ib_mcast(id->device, id->port_num)) + ret = cma_join_ib_multicast(id_priv, mc); + else ret = -ENOSYS; - break; - } if (ret) { spin_lock_irq(&id_priv->lock); @@ -3405,19 +3399,15 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) ib_detach_mcast(id->qp, &mc->multicast.ib->rec.mgid, be16_to_cpu(mc->multicast.ib->rec.mlid)); - if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) { - switch (rdma_port_get_link_layer(id->device, id->port_num)) { - case IB_LINK_LAYER_INFINIBAND: - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); - break; - case IB_LINK_LAYER_ETHERNET: - kref_put(&mc->mcref, release_mc); - break; - default: - break; - } - } + + BUG_ON(id_priv->cma_dev->device != id->device); + + if (rdma_cap_ib_mcast(id->device, id->port_num)) { + ib_sa_free_multicast(mc->multicast.ib); + kfree(mc); + } else if (rdma_protocol_roce(id->device, id->port_num)) + kref_put(&mc->mcref, release_mc); + return; } } diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 18c1ece..9567756 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -92,7 +92,8 @@ static int ib_device_check_mandatory(struct ib_device *device) IB_MANDATORY_FUNC(poll_cq), IB_MANDATORY_FUNC(req_notify_cq), IB_MANDATORY_FUNC(get_dma_mr), - IB_MANDATORY_FUNC(dereg_mr) + IB_MANDATORY_FUNC(dereg_mr), + IB_MANDATORY_FUNC(get_port_immutable) }; int i; @@ -151,18 +152,6 @@ static int alloc_name(char *name) return 0; } -static int start_port(struct ib_device *device) -{ - return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1; -} - - -static int end_port(struct ib_device *device) -{ - return (device->node_type == RDMA_NODE_IB_SWITCH) ? - 0 : device->phys_port_cnt; -} - /** * ib_alloc_device - allocate an IB device struct * @size:size of structure to allocate @@ -222,42 +211,49 @@ static int add_client_context(struct ib_device *device, struct ib_client *client return 0; } -static int read_port_table_lengths(struct ib_device *device) +static int verify_immutable(const struct ib_device *dev, u8 port) { - struct ib_port_attr *tprops = NULL; - int num_ports, ret = -ENOMEM; - u8 port_index; - - tprops = kmalloc(sizeof *tprops, GFP_KERNEL); - if (!tprops) - goto out; - - num_ports = end_port(device) - start_port(device) + 1; + return WARN_ON(!rdma_cap_ib_mad(dev, port) && + rdma_max_mad_size(dev, port) != 0); +} - device->pkey_tbl_len = kmalloc(sizeof *device->pkey_tbl_len * num_ports, - GFP_KERNEL); - device->gid_tbl_len = kmalloc(sizeof *device->gid_tbl_len * num_ports, - GFP_KERNEL); - if (!device->pkey_tbl_len || !device->gid_tbl_len) +static int read_port_immutable(struct ib_device *device) +{ + int ret = -ENOMEM; + u8 start_port = rdma_start_port(device); + u8 end_port = rdma_end_port(device); + u8 port; + + /** + * device->port_immutable is indexed directly by the port number to make + * access to this data as efficient as possible. + * + * Therefore port_immutable is declared as a 1 based array with + * potential empty slots at the beginning. + */ + device->port_immutable = kzalloc(sizeof(*device->port_immutable) + * (end_port + 1), + GFP_KERNEL); + if (!device->port_immutable) goto err; - for (port_index = 0; port_index < num_ports; ++port_index) { - ret = ib_query_port(device, port_index + start_port(device), - tprops); + for (port = start_port; port <= end_port; ++port) { + ret = device->get_port_immutable(device, port, + &device->port_immutable[port]); if (ret) goto err; - device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len; - device->gid_tbl_len[port_index] = tprops->gid_tbl_len; + + if (verify_immutable(device, port)) { + ret = -EINVAL; + goto err; + } } ret = 0; goto out; - err: - kfree(device->gid_tbl_len); - kfree(device->pkey_tbl_len); + kfree(device->port_immutable); out: - kfree(tprops); return ret; } @@ -294,9 +290,9 @@ int ib_register_device(struct ib_device *device, spin_lock_init(&device->event_handler_lock); spin_lock_init(&device->client_data_lock); - ret = read_port_table_lengths(device); + ret = read_port_immutable(device); if (ret) { - printk(KERN_WARNING "Couldn't create table lengths cache for device %s\n", + printk(KERN_WARNING "Couldn't create per port immutable data %s\n", device->name); goto out; } @@ -305,8 +301,7 @@ int ib_register_device(struct ib_device *device, if (ret) { printk(KERN_WARNING "Couldn't register device %s with driver model\n", device->name); - kfree(device->gid_tbl_len); - kfree(device->pkey_tbl_len); + kfree(device->port_immutable); goto out; } @@ -348,9 +343,6 @@ void ib_unregister_device(struct ib_device *device) list_del(&device->core_list); - kfree(device->gid_tbl_len); - kfree(device->pkey_tbl_len); - mutex_unlock(&device_mutex); ib_device_unregister_sysfs(device); @@ -558,7 +550,11 @@ EXPORT_SYMBOL(ib_dispatch_event); int ib_query_device(struct ib_device *device, struct ib_device_attr *device_attr) { - return device->query_device(device, device_attr); + struct ib_udata uhw = {.outlen = 0, .inlen = 0}; + + memset(device_attr, 0, sizeof(*device_attr)); + + return device->query_device(device, device_attr, &uhw); } EXPORT_SYMBOL(ib_query_device); @@ -575,7 +571,7 @@ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; return device->query_port(device, port_num, port_attr); @@ -653,7 +649,7 @@ int ib_modify_port(struct ib_device *device, if (!device->modify_port) return -ENOSYS; - if (port_num < start_port(device) || port_num > end_port(device)) + if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; return device->modify_port(device, port_num, port_modify_mask, @@ -676,8 +672,8 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, union ib_gid tmp_gid; int ret, port, i; - for (port = start_port(device); port <= end_port(device); ++port) { - for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) { + for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { + for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { ret = ib_query_gid(device, port, i, &tmp_gid); if (ret) return ret; @@ -709,7 +705,7 @@ int ib_find_pkey(struct ib_device *device, u16 tmp_pkey; int partial_ix = -1; - for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) { + for (i = 0; i < device->port_immutable[port_num].pkey_tbl_len; ++i) { ret = ib_query_pkey(device, port_num, i, &tmp_pkey); if (ret) return ret; diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index b85ddbc..e6ffa2e 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -33,7 +33,7 @@ #include "iwpm_util.h" -static const char iwpm_ulib_name[] = "iWarpPortMapperUser"; +static const char iwpm_ulib_name[IWPM_ULIBNAME_SIZE] = "iWarpPortMapperUser"; static int iwpm_ulib_version = 3; static int iwpm_user_pid = IWPM_PID_UNDEFINED; static atomic_t echo_nlmsg_seq; @@ -468,7 +468,8 @@ add_mapping_response_exit: } EXPORT_SYMBOL(iwpm_add_mapping_cb); -/* netlink attribute policy for the response to add and query mapping request */ +/* netlink attribute policy for the response to add and query mapping request + * and response with remote address info */ static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = { [IWPM_NLA_QUERY_MAPPING_SEQ] = { .type = NLA_U32 }, [IWPM_NLA_QUERY_LOCAL_ADDR] = { .len = sizeof(struct sockaddr_storage) }, @@ -559,6 +560,76 @@ query_mapping_response_exit: } EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb); +/* + * iwpm_remote_info_cb - Process a port mapper message, containing + * the remote connecting peer address info + */ +int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nlattr *nltb[IWPM_NLA_RQUERY_MAPPING_MAX]; + struct sockaddr_storage *local_sockaddr, *remote_sockaddr; + struct sockaddr_storage *mapped_loc_sockaddr, *mapped_rem_sockaddr; + struct iwpm_remote_info *rem_info; + const char *msg_type; + u8 nl_client; + int ret = -EINVAL; + + msg_type = "Remote Mapping info"; + if (iwpm_parse_nlmsg(cb, IWPM_NLA_RQUERY_MAPPING_MAX, + resp_query_policy, nltb, msg_type)) + return ret; + + nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type); + if (!iwpm_valid_client(nl_client)) { + pr_info("%s: Invalid port mapper client = %d\n", + __func__, nl_client); + return ret; + } + atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + + local_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]); + remote_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]); + mapped_loc_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]); + mapped_rem_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_REM_ADDR]); + + if (mapped_loc_sockaddr->ss_family != local_sockaddr->ss_family || + mapped_rem_sockaddr->ss_family != remote_sockaddr->ss_family) { + pr_info("%s: Sockaddr family doesn't match the requested one\n", + __func__); + return ret; + } + rem_info = kzalloc(sizeof(struct iwpm_remote_info), GFP_ATOMIC); + if (!rem_info) { + pr_err("%s: Unable to allocate a remote info\n", __func__); + ret = -ENOMEM; + return ret; + } + memcpy(&rem_info->mapped_loc_sockaddr, mapped_loc_sockaddr, + sizeof(struct sockaddr_storage)); + memcpy(&rem_info->remote_sockaddr, remote_sockaddr, + sizeof(struct sockaddr_storage)); + memcpy(&rem_info->mapped_rem_sockaddr, mapped_rem_sockaddr, + sizeof(struct sockaddr_storage)); + rem_info->nl_client = nl_client; + + iwpm_add_remote_info(rem_info); + + iwpm_print_sockaddr(local_sockaddr, + "remote_info: Local sockaddr:"); + iwpm_print_sockaddr(mapped_loc_sockaddr, + "remote_info: Mapped local sockaddr:"); + iwpm_print_sockaddr(remote_sockaddr, + "remote_info: Remote sockaddr:"); + iwpm_print_sockaddr(mapped_rem_sockaddr, + "remote_info: Mapped remote sockaddr:"); + return ret; +} +EXPORT_SYMBOL(iwpm_remote_info_cb); + /* netlink attribute policy for the received request for mapping info */ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = { [IWPM_NLA_MAPINFO_ULIB_NAME] = { .type = NLA_STRING, diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 69e9f84..a626795 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -33,8 +33,10 @@ #include "iwpm_util.h" -#define IWPM_HASH_BUCKET_SIZE 512 -#define IWPM_HASH_BUCKET_MASK (IWPM_HASH_BUCKET_SIZE - 1) +#define IWPM_MAPINFO_HASH_SIZE 512 +#define IWPM_MAPINFO_HASH_MASK (IWPM_MAPINFO_HASH_SIZE - 1) +#define IWPM_REMINFO_HASH_SIZE 64 +#define IWPM_REMINFO_HASH_MASK (IWPM_REMINFO_HASH_SIZE - 1) static LIST_HEAD(iwpm_nlmsg_req_list); static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); @@ -42,31 +44,49 @@ static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); static struct hlist_head *iwpm_hash_bucket; static DEFINE_SPINLOCK(iwpm_mapinfo_lock); +static struct hlist_head *iwpm_reminfo_bucket; +static DEFINE_SPINLOCK(iwpm_reminfo_lock); + static DEFINE_MUTEX(iwpm_admin_lock); static struct iwpm_admin_data iwpm_admin; int iwpm_init(u8 nl_client) { + int ret = 0; if (iwpm_valid_client(nl_client)) return -EINVAL; mutex_lock(&iwpm_admin_lock); if (atomic_read(&iwpm_admin.refcount) == 0) { - iwpm_hash_bucket = kzalloc(IWPM_HASH_BUCKET_SIZE * + iwpm_hash_bucket = kzalloc(IWPM_MAPINFO_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL); if (!iwpm_hash_bucket) { - mutex_unlock(&iwpm_admin_lock); + ret = -ENOMEM; pr_err("%s Unable to create mapinfo hash table\n", __func__); - return -ENOMEM; + goto init_exit; + } + iwpm_reminfo_bucket = kzalloc(IWPM_REMINFO_HASH_SIZE * + sizeof(struct hlist_head), GFP_KERNEL); + if (!iwpm_reminfo_bucket) { + kfree(iwpm_hash_bucket); + ret = -ENOMEM; + pr_err("%s Unable to create reminfo hash table\n", __func__); + goto init_exit; } } atomic_inc(&iwpm_admin.refcount); +init_exit: mutex_unlock(&iwpm_admin_lock); - iwpm_set_valid(nl_client, 1); - return 0; + if (!ret) { + iwpm_set_valid(nl_client, 1); + pr_debug("%s: Mapinfo and reminfo tables are created\n", + __func__); + } + return ret; } EXPORT_SYMBOL(iwpm_init); static void free_hash_bucket(void); +static void free_reminfo_bucket(void); int iwpm_exit(u8 nl_client) { @@ -81,7 +101,8 @@ int iwpm_exit(u8 nl_client) } if (atomic_dec_and_test(&iwpm_admin.refcount)) { free_hash_bucket(); - pr_debug("%s: Mapinfo hash table is destroyed\n", __func__); + free_reminfo_bucket(); + pr_debug("%s: Resources are destroyed\n", __func__); } mutex_unlock(&iwpm_admin_lock); iwpm_set_valid(nl_client, 0); @@ -89,7 +110,7 @@ int iwpm_exit(u8 nl_client) } EXPORT_SYMBOL(iwpm_exit); -static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage *, +static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *, struct sockaddr_storage *); int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, @@ -99,9 +120,10 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, struct hlist_head *hash_bucket_head; struct iwpm_mapping_info *map_info; unsigned long flags; + int ret = -EINVAL; if (!iwpm_valid_client(nl_client)) - return -EINVAL; + return ret; map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL); if (!map_info) { pr_err("%s: Unable to allocate a mapping info\n", __func__); @@ -115,13 +137,16 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, spin_lock_irqsave(&iwpm_mapinfo_lock, flags); if (iwpm_hash_bucket) { - hash_bucket_head = get_hash_bucket_head( + hash_bucket_head = get_mapinfo_hash_bucket( &map_info->local_sockaddr, &map_info->mapped_sockaddr); - hlist_add_head(&map_info->hlist_node, hash_bucket_head); + if (hash_bucket_head) { + hlist_add_head(&map_info->hlist_node, hash_bucket_head); + ret = 0; + } } spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); - return 0; + return ret; } EXPORT_SYMBOL(iwpm_create_mapinfo); @@ -136,9 +161,12 @@ int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr, spin_lock_irqsave(&iwpm_mapinfo_lock, flags); if (iwpm_hash_bucket) { - hash_bucket_head = get_hash_bucket_head( + hash_bucket_head = get_mapinfo_hash_bucket( local_sockaddr, mapped_local_addr); + if (!hash_bucket_head) + goto remove_mapinfo_exit; + hlist_for_each_entry_safe(map_info, tmp_hlist_node, hash_bucket_head, hlist_node) { @@ -152,6 +180,7 @@ int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr, } } } +remove_mapinfo_exit: spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); return ret; } @@ -166,7 +195,7 @@ static void free_hash_bucket(void) /* remove all the mapinfo data from the list */ spin_lock_irqsave(&iwpm_mapinfo_lock, flags); - for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { + for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) { hlist_for_each_entry_safe(map_info, tmp_hlist_node, &iwpm_hash_bucket[i], hlist_node) { @@ -180,6 +209,96 @@ static void free_hash_bucket(void) spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); } +static void free_reminfo_bucket(void) +{ + struct hlist_node *tmp_hlist_node; + struct iwpm_remote_info *rem_info; + unsigned long flags; + int i; + + /* remove all the remote info from the list */ + spin_lock_irqsave(&iwpm_reminfo_lock, flags); + for (i = 0; i < IWPM_REMINFO_HASH_SIZE; i++) { + hlist_for_each_entry_safe(rem_info, tmp_hlist_node, + &iwpm_reminfo_bucket[i], hlist_node) { + + hlist_del_init(&rem_info->hlist_node); + kfree(rem_info); + } + } + /* free the hash list */ + kfree(iwpm_reminfo_bucket); + iwpm_reminfo_bucket = NULL; + spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); +} + +static struct hlist_head *get_reminfo_hash_bucket(struct sockaddr_storage *, + struct sockaddr_storage *); + +void iwpm_add_remote_info(struct iwpm_remote_info *rem_info) +{ + struct hlist_head *hash_bucket_head; + unsigned long flags; + + spin_lock_irqsave(&iwpm_reminfo_lock, flags); + if (iwpm_reminfo_bucket) { + hash_bucket_head = get_reminfo_hash_bucket( + &rem_info->mapped_loc_sockaddr, + &rem_info->mapped_rem_sockaddr); + if (hash_bucket_head) + hlist_add_head(&rem_info->hlist_node, hash_bucket_head); + } + spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); +} + +int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr, + struct sockaddr_storage *mapped_rem_addr, + struct sockaddr_storage *remote_addr, + u8 nl_client) +{ + struct hlist_node *tmp_hlist_node; + struct hlist_head *hash_bucket_head; + struct iwpm_remote_info *rem_info = NULL; + unsigned long flags; + int ret = -EINVAL; + + if (!iwpm_valid_client(nl_client)) { + pr_info("%s: Invalid client = %d\n", __func__, nl_client); + return ret; + } + spin_lock_irqsave(&iwpm_reminfo_lock, flags); + if (iwpm_reminfo_bucket) { + hash_bucket_head = get_reminfo_hash_bucket( + mapped_loc_addr, + mapped_rem_addr); + if (!hash_bucket_head) + goto get_remote_info_exit; + hlist_for_each_entry_safe(rem_info, tmp_hlist_node, + hash_bucket_head, hlist_node) { + + if (!iwpm_compare_sockaddr(&rem_info->mapped_loc_sockaddr, + mapped_loc_addr) && + !iwpm_compare_sockaddr(&rem_info->mapped_rem_sockaddr, + mapped_rem_addr)) { + + memcpy(remote_addr, &rem_info->remote_sockaddr, + sizeof(struct sockaddr_storage)); + iwpm_print_sockaddr(remote_addr, + "get_remote_info: Remote sockaddr:"); + + hlist_del_init(&rem_info->hlist_node); + kfree(rem_info); + ret = 0; + break; + } + } + } +get_remote_info_exit: + spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); + return ret; +} +EXPORT_SYMBOL(iwpm_get_remote_info); + struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, u8 nl_client, gfp_t gfp) { @@ -409,31 +528,54 @@ static u32 iwpm_ipv4_jhash(struct sockaddr_in *ipv4_sockaddr) return hash; } -static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage - *local_sockaddr, - struct sockaddr_storage - *mapped_sockaddr) +static int get_hash_bucket(struct sockaddr_storage *a_sockaddr, + struct sockaddr_storage *b_sockaddr, u32 *hash) { - u32 local_hash, mapped_hash, hash; + u32 a_hash, b_hash; - if (local_sockaddr->ss_family == AF_INET) { - local_hash = iwpm_ipv4_jhash((struct sockaddr_in *) local_sockaddr); - mapped_hash = iwpm_ipv4_jhash((struct sockaddr_in *) mapped_sockaddr); + if (a_sockaddr->ss_family == AF_INET) { + a_hash = iwpm_ipv4_jhash((struct sockaddr_in *) a_sockaddr); + b_hash = iwpm_ipv4_jhash((struct sockaddr_in *) b_sockaddr); - } else if (local_sockaddr->ss_family == AF_INET6) { - local_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) local_sockaddr); - mapped_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) mapped_sockaddr); + } else if (a_sockaddr->ss_family == AF_INET6) { + a_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) a_sockaddr); + b_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) b_sockaddr); } else { pr_err("%s: Invalid sockaddr family\n", __func__); - return NULL; + return -EINVAL; } - if (local_hash == mapped_hash) /* if port mapper isn't available */ - hash = local_hash; + if (a_hash == b_hash) /* if port mapper isn't available */ + *hash = a_hash; else - hash = jhash_2words(local_hash, mapped_hash, 0); + *hash = jhash_2words(a_hash, b_hash, 0); + return 0; +} + +static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage + *local_sockaddr, struct sockaddr_storage + *mapped_sockaddr) +{ + u32 hash; + int ret; - return &iwpm_hash_bucket[hash & IWPM_HASH_BUCKET_MASK]; + ret = get_hash_bucket(local_sockaddr, mapped_sockaddr, &hash); + if (ret) + return NULL; + return &iwpm_hash_bucket[hash & IWPM_MAPINFO_HASH_MASK]; +} + +static struct hlist_head *get_reminfo_hash_bucket(struct sockaddr_storage + *mapped_loc_sockaddr, struct sockaddr_storage + *mapped_rem_sockaddr) +{ + u32 hash; + int ret; + + ret = get_hash_bucket(mapped_loc_sockaddr, mapped_rem_sockaddr, &hash); + if (ret) + return NULL; + return &iwpm_reminfo_bucket[hash & IWPM_REMINFO_HASH_MASK]; } static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid) @@ -512,7 +654,7 @@ int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid) } skb_num++; spin_lock_irqsave(&iwpm_mapinfo_lock, flags); - for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { + for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) { hlist_for_each_entry(map_info, &iwpm_hash_bucket[i], hlist_node) { if (map_info->nl_client != nl_client) @@ -595,7 +737,7 @@ int iwpm_mapinfo_available(void) spin_lock_irqsave(&iwpm_mapinfo_lock, flags); if (iwpm_hash_bucket) { - for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { + for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) { if (!hlist_empty(&iwpm_hash_bucket[i])) { full_bucket = 1; break; diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index 9777c86..ee2d9ff 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -76,6 +76,14 @@ struct iwpm_mapping_info { u8 nl_client; }; +struct iwpm_remote_info { + struct hlist_node hlist_node; + struct sockaddr_storage remote_sockaddr; + struct sockaddr_storage mapped_loc_sockaddr; + struct sockaddr_storage mapped_rem_sockaddr; + u8 nl_client; +}; + struct iwpm_admin_data { atomic_t refcount; atomic_t nlmsg_seq; @@ -128,6 +136,13 @@ int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request); int iwpm_get_nlmsg_seq(void); /** + * iwpm_add_reminfo - Add remote address info of the connecting peer + * to the remote info hash table + * @reminfo: The remote info to be added + */ +void iwpm_add_remote_info(struct iwpm_remote_info *reminfo); + +/** * iwpm_valid_client - Check if the port mapper client is valid * @nl_client: The index of the netlink client * diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 74c30f4..a4b1466 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3,6 +3,7 @@ * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2009 HNR Consulting. All rights reserved. + * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -44,6 +45,7 @@ #include "mad_priv.h" #include "mad_rmpp.h" #include "smi.h" +#include "opa_smi.h" #include "agent.h" MODULE_LICENSE("Dual BSD/GPL"); @@ -59,8 +61,6 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); -static struct kmem_cache *ib_mad_cache; - static struct list_head ib_mad_port_list; static u32 ib_mad_client_id = 0; @@ -73,7 +73,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method, static void remove_mad_reg_req(struct ib_mad_agent_private *priv); static struct ib_mad_agent_private *find_mad_agent( struct ib_mad_port_private *port_priv, - struct ib_mad *mad); + const struct ib_mad_hdr *mad); static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad); static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); @@ -179,12 +179,12 @@ static int is_vendor_method_in_use( return 0; } -int ib_response_mad(struct ib_mad *mad) +int ib_response_mad(const struct ib_mad_hdr *hdr) { - return ((mad->mad_hdr.method & IB_MGMT_METHOD_RESP) || - (mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) || - ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_BM) && - (mad->mad_hdr.attr_mod & IB_BM_ATTR_MOD_RESP))); + return ((hdr->method & IB_MGMT_METHOD_RESP) || + (hdr->method == IB_MGMT_METHOD_TRAP_REPRESS) || + ((hdr->mgmt_class == IB_MGMT_CLASS_BM) && + (hdr->attr_mod & IB_BM_ATTR_MOD_RESP))); } EXPORT_SYMBOL(ib_response_mad); @@ -717,6 +717,32 @@ static void build_smp_wc(struct ib_qp *qp, wc->port_num = port_num; } +static size_t mad_priv_size(const struct ib_mad_private *mp) +{ + return sizeof(struct ib_mad_private) + mp->mad_size; +} + +static struct ib_mad_private *alloc_mad_private(size_t mad_size, gfp_t flags) +{ + size_t size = sizeof(struct ib_mad_private) + mad_size; + struct ib_mad_private *ret = kzalloc(size, flags); + + if (ret) + ret->mad_size = mad_size; + + return ret; +} + +static size_t port_mad_size(const struct ib_mad_port_private *port_priv) +{ + return rdma_max_mad_size(port_priv->device, port_priv->port_num); +} + +static size_t mad_priv_dma_size(const struct ib_mad_private *mp) +{ + return sizeof(struct ib_grh) + mp->mad_size; +} + /* * Return 0 if SMP is to be sent * Return 1 if SMP was consumed locally (whether or not solicited) @@ -727,6 +753,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, { int ret = 0; struct ib_smp *smp = mad_send_wr->send_buf.mad; + struct opa_smp *opa_smp = (struct opa_smp *)smp; unsigned long flags; struct ib_mad_local_private *local; struct ib_mad_private *mad_priv; @@ -736,6 +763,11 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, u8 port_num; struct ib_wc mad_wc; struct ib_send_wr *send_wr = &mad_send_wr->send_wr; + size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv); + u16 out_mad_pkey_index = 0; + u16 drslid; + bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, + mad_agent_priv->qp_info->port_priv->port_num); if (device->node_type == RDMA_NODE_IB_SWITCH && smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) @@ -749,19 +781,48 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, * If we are at the start of the LID routed part, don't update the * hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec. */ - if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == - IB_LID_PERMISSIVE && - smi_handle_dr_smp_send(smp, device->node_type, port_num) == - IB_SMI_DISCARD) { - ret = -EINVAL; - dev_err(&device->dev, "Invalid directed route\n"); - goto out; - } + if (opa && smp->class_version == OPA_SMP_CLASS_VERSION) { + u32 opa_drslid; + + if ((opa_get_smp_direction(opa_smp) + ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) == + OPA_LID_PERMISSIVE && + opa_smi_handle_dr_smp_send(opa_smp, device->node_type, + port_num) == IB_SMI_DISCARD) { + ret = -EINVAL; + dev_err(&device->dev, "OPA Invalid directed route\n"); + goto out; + } + opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid); + if (opa_drslid != OPA_LID_PERMISSIVE && + opa_drslid & 0xffff0000) { + ret = -EINVAL; + dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n", + opa_drslid); + goto out; + } + drslid = (u16)(opa_drslid & 0x0000ffff); - /* Check to post send on QP or process locally */ - if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && - smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) - goto out; + /* Check to post send on QP or process locally */ + if (opa_smi_check_local_smp(opa_smp, device) == IB_SMI_DISCARD && + opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD) + goto out; + } else { + if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == + IB_LID_PERMISSIVE && + smi_handle_dr_smp_send(smp, device->node_type, port_num) == + IB_SMI_DISCARD) { + ret = -EINVAL; + dev_err(&device->dev, "Invalid directed route\n"); + goto out; + } + drslid = be16_to_cpu(smp->dr_slid); + + /* Check to post send on QP or process locally */ + if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && + smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) + goto out; + } local = kmalloc(sizeof *local, GFP_ATOMIC); if (!local) { @@ -771,7 +832,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, } local->mad_priv = NULL; local->recv_mad_agent = NULL; - mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC); + mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC); if (!mad_priv) { ret = -ENOMEM; dev_err(&device->dev, "No memory for local response MAD\n"); @@ -780,18 +841,25 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, } build_smp_wc(mad_agent_priv->agent.qp, - send_wr->wr_id, be16_to_cpu(smp->dr_slid), + send_wr->wr_id, drslid, send_wr->wr.ud.pkey_index, send_wr->wr.ud.port_num, &mad_wc); + if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) { + mad_wc.byte_len = mad_send_wr->send_buf.hdr_len + + mad_send_wr->send_buf.data_len + + sizeof(struct ib_grh); + } + /* No GRH for DR SMP */ ret = device->process_mad(device, 0, port_num, &mad_wc, NULL, - (struct ib_mad *)smp, - (struct ib_mad *)&mad_priv->mad); + (const struct ib_mad_hdr *)smp, mad_size, + (struct ib_mad_hdr *)mad_priv->mad, + &mad_size, &out_mad_pkey_index); switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: - if (ib_response_mad(&mad_priv->mad.mad) && + if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) && mad_agent_priv->agent.recv_handler) { local->mad_priv = mad_priv; local->recv_mad_agent = mad_agent_priv; @@ -801,39 +869,43 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, */ atomic_inc(&mad_agent_priv->refcount); } else - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); break; case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED: - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); break; case IB_MAD_RESULT_SUCCESS: /* Treat like an incoming receive MAD */ port_priv = ib_get_mad_port(mad_agent_priv->agent.device, mad_agent_priv->agent.port_num); if (port_priv) { - memcpy(&mad_priv->mad.mad, smp, sizeof(struct ib_mad)); + memcpy(mad_priv->mad, smp, mad_priv->mad_size); recv_mad_agent = find_mad_agent(port_priv, - &mad_priv->mad.mad); + (const struct ib_mad_hdr *)mad_priv->mad); } if (!port_priv || !recv_mad_agent) { /* * No receiving agent so drop packet and * generate send completion. */ - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); break; } local->mad_priv = mad_priv; local->recv_mad_agent = recv_mad_agent; break; default: - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); kfree(local); ret = -EINVAL; goto out; } local->mad_send_wr = mad_send_wr; + if (opa) { + local->mad_send_wr->send_wr.wr.ud.pkey_index = out_mad_pkey_index; + local->return_wc_byte_len = mad_size; + } /* Reference MAD agent until send side of local completion handled */ atomic_inc(&mad_agent_priv->refcount); /* Queue local completion to local list */ @@ -847,11 +919,11 @@ out: return ret; } -static int get_pad_size(int hdr_len, int data_len) +static int get_pad_size(int hdr_len, int data_len, size_t mad_size) { int seg_size, pad; - seg_size = sizeof(struct ib_mad) - hdr_len; + seg_size = mad_size - hdr_len; if (data_len && seg_size) { pad = seg_size - data_len % seg_size; return pad == seg_size ? 0 : pad; @@ -870,14 +942,15 @@ static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr) } static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, - gfp_t gfp_mask) + size_t mad_size, gfp_t gfp_mask) { struct ib_mad_send_buf *send_buf = &send_wr->send_buf; struct ib_rmpp_mad *rmpp_mad = send_buf->mad; struct ib_rmpp_segment *seg = NULL; int left, seg_size, pad; - send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len; + send_buf->seg_size = mad_size - send_buf->hdr_len; + send_buf->seg_rmpp_size = mad_size - IB_MGMT_RMPP_HDR; seg_size = send_buf->seg_size; pad = send_wr->pad; @@ -910,7 +983,7 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, return 0; } -int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent) +int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent) { return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP); } @@ -920,26 +993,37 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, int rmpp_active, int hdr_len, int data_len, - gfp_t gfp_mask) + gfp_t gfp_mask, + u8 base_version) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; int pad, message_size, ret, size; void *buf; + size_t mad_size; + bool opa; mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); - pad = get_pad_size(hdr_len, data_len); + + opa = rdma_cap_opa_mad(mad_agent->device, mad_agent->port_num); + + if (opa && base_version == OPA_MGMT_BASE_VERSION) + mad_size = sizeof(struct opa_mad); + else + mad_size = sizeof(struct ib_mad); + + pad = get_pad_size(hdr_len, data_len, mad_size); message_size = hdr_len + data_len + pad; if (ib_mad_kernel_rmpp_agent(mad_agent)) { - if (!rmpp_active && message_size > sizeof(struct ib_mad)) + if (!rmpp_active && message_size > mad_size) return ERR_PTR(-EINVAL); } else - if (rmpp_active || message_size > sizeof(struct ib_mad)) + if (rmpp_active || message_size > mad_size) return ERR_PTR(-EINVAL); - size = rmpp_active ? hdr_len : sizeof(struct ib_mad); + size = rmpp_active ? hdr_len : mad_size; buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask); if (!buf) return ERR_PTR(-ENOMEM); @@ -954,7 +1038,14 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, mad_send_wr->mad_agent_priv = mad_agent_priv; mad_send_wr->sg_list[0].length = hdr_len; mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey; - mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len; + + /* OPA MADs don't have to be the full 2048 bytes */ + if (opa && base_version == OPA_MGMT_BASE_VERSION && + data_len < mad_size - hdr_len) + mad_send_wr->sg_list[1].length = data_len; + else + mad_send_wr->sg_list[1].length = mad_size - hdr_len; + mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey; mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; @@ -967,7 +1058,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; if (rmpp_active) { - ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask); + ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask); if (ret) { kfree(buf); return ERR_PTR(ret); @@ -1237,7 +1328,7 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) recv_wc); priv = container_of(mad_priv_hdr, struct ib_mad_private, header); - kmem_cache_free(ib_mad_cache, priv); + kfree(priv); } } EXPORT_SYMBOL(ib_free_recv_mad); @@ -1324,7 +1415,7 @@ static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class) } static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class, - char *oui) + const char *oui) { int i; @@ -1622,13 +1713,13 @@ out: static struct ib_mad_agent_private * find_mad_agent(struct ib_mad_port_private *port_priv, - struct ib_mad *mad) + const struct ib_mad_hdr *mad_hdr) { struct ib_mad_agent_private *mad_agent = NULL; unsigned long flags; spin_lock_irqsave(&port_priv->reg_lock, flags); - if (ib_response_mad(mad)) { + if (ib_response_mad(mad_hdr)) { u32 hi_tid; struct ib_mad_agent_private *entry; @@ -1636,7 +1727,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, * Routing is based on high 32 bits of transaction ID * of MAD. */ - hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32; + hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; list_for_each_entry(entry, &port_priv->agent_list, agent_list) { if (entry->agent.hi_tid == hi_tid) { mad_agent = entry; @@ -1648,45 +1739,45 @@ find_mad_agent(struct ib_mad_port_private *port_priv, struct ib_mad_mgmt_method_table *method; struct ib_mad_mgmt_vendor_class_table *vendor; struct ib_mad_mgmt_vendor_class *vendor_class; - struct ib_vendor_mad *vendor_mad; + const struct ib_vendor_mad *vendor_mad; int index; /* * Routing is based on version, class, and method * For "newer" vendor MADs, also based on OUI */ - if (mad->mad_hdr.class_version >= MAX_MGMT_VERSION) + if (mad_hdr->class_version >= MAX_MGMT_VERSION) goto out; - if (!is_vendor_class(mad->mad_hdr.mgmt_class)) { + if (!is_vendor_class(mad_hdr->mgmt_class)) { class = port_priv->version[ - mad->mad_hdr.class_version].class; + mad_hdr->class_version].class; if (!class) goto out; - if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >= + if (convert_mgmt_class(mad_hdr->mgmt_class) >= IB_MGMT_MAX_METHODS) goto out; method = class->method_table[convert_mgmt_class( - mad->mad_hdr.mgmt_class)]; + mad_hdr->mgmt_class)]; if (method) - mad_agent = method->agent[mad->mad_hdr.method & + mad_agent = method->agent[mad_hdr->method & ~IB_MGMT_METHOD_RESP]; } else { vendor = port_priv->version[ - mad->mad_hdr.class_version].vendor; + mad_hdr->class_version].vendor; if (!vendor) goto out; vendor_class = vendor->vendor_class[vendor_class_index( - mad->mad_hdr.mgmt_class)]; + mad_hdr->mgmt_class)]; if (!vendor_class) goto out; /* Find matching OUI */ - vendor_mad = (struct ib_vendor_mad *)mad; + vendor_mad = (const struct ib_vendor_mad *)mad_hdr; index = find_vendor_oui(vendor_class, vendor_mad->oui); if (index == -1) goto out; method = vendor_class->method_table[index]; if (method) { - mad_agent = method->agent[mad->mad_hdr.method & + mad_agent = method->agent[mad_hdr->method & ~IB_MGMT_METHOD_RESP]; } } @@ -1708,20 +1799,24 @@ out: return mad_agent; } -static int validate_mad(struct ib_mad *mad, u32 qp_num) +static int validate_mad(const struct ib_mad_hdr *mad_hdr, + const struct ib_mad_qp_info *qp_info, + bool opa) { int valid = 0; + u32 qp_num = qp_info->qp->qp_num; /* Make sure MAD base version is understood */ - if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) { - pr_err("MAD received with unsupported base version %d\n", - mad->mad_hdr.base_version); + if (mad_hdr->base_version != IB_MGMT_BASE_VERSION && + (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) { + pr_err("MAD received with unsupported base version %d %s\n", + mad_hdr->base_version, opa ? "(opa)" : ""); goto out; } /* Filter SMI packets sent to other than QP0 */ - if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || - (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || + (mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { if (qp_num == 0) valid = 1; } else { @@ -1734,8 +1829,8 @@ out: return valid; } -static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_hdr *mad_hdr) +static int is_rmpp_data_mad(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_hdr *mad_hdr) { struct ib_rmpp_mad *rmpp_mad; @@ -1747,16 +1842,16 @@ static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv, (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); } -static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr, - struct ib_mad_recv_wc *rwc) +static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr, + const struct ib_mad_recv_wc *rwc) { - return ((struct ib_mad *)(wr->send_buf.mad))->mad_hdr.mgmt_class == + return ((struct ib_mad_hdr *)(wr->send_buf.mad))->mgmt_class == rwc->recv_buf.mad->mad_hdr.mgmt_class; } -static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_send_wr_private *wr, - struct ib_mad_recv_wc *rwc ) +static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_send_wr_private *wr, + const struct ib_mad_recv_wc *rwc ) { struct ib_ah_attr attr; u8 send_resp, rcv_resp; @@ -1765,8 +1860,8 @@ static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv, u8 port_num = mad_agent_priv->agent.port_num; u8 lmc; - send_resp = ib_response_mad((struct ib_mad *)wr->send_buf.mad); - rcv_resp = ib_response_mad(rwc->recv_buf.mad); + send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad); + rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr); if (send_resp == rcv_resp) /* both requests, or both responses. GIDs different */ @@ -1811,22 +1906,22 @@ static inline int is_direct(u8 class) } struct ib_mad_send_wr_private* -ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_recv_wc *wc) +ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_recv_wc *wc) { struct ib_mad_send_wr_private *wr; - struct ib_mad *mad; + const struct ib_mad_hdr *mad_hdr; - mad = (struct ib_mad *)wc->recv_buf.mad; + mad_hdr = &wc->recv_buf.mad->mad_hdr; list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) { - if ((wr->tid == mad->mad_hdr.tid) && + if ((wr->tid == mad_hdr->tid) && rcv_has_same_class(wr, wc) && /* * Don't check GID for direct routed MADs. * These might have permissive LIDs. */ - (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) || + (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) return (wr->status == IB_WC_SUCCESS) ? wr : NULL; } @@ -1836,15 +1931,15 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, * been notified that the send has completed */ list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) { - if (is_data_mad(mad_agent_priv, wr->send_buf.mad) && - wr->tid == mad->mad_hdr.tid && + if (is_rmpp_data_mad(mad_agent_priv, wr->send_buf.mad) && + wr->tid == mad_hdr->tid && wr->timeout && rcv_has_same_class(wr, wc) && /* * Don't check GID for direct routed MADs. * These might have permissive LIDs. */ - (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) || + (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) /* Verify request has not been canceled */ return (wr->status == IB_WC_SUCCESS) ? wr : NULL; @@ -1879,7 +1974,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, } /* Complete corresponding request */ - if (ib_response_mad(mad_recv_wc->recv_buf.mad)) { + if (ib_response_mad(&mad_recv_wc->recv_buf.mad->mad_hdr)) { spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); if (!mad_send_wr) { @@ -1924,26 +2019,163 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, } } -static bool generate_unmatched_resp(struct ib_mad_private *recv, - struct ib_mad_private *response) +static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv, + const struct ib_mad_qp_info *qp_info, + const struct ib_wc *wc, + int port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response) +{ + enum smi_forward_action retsmi; + struct ib_smp *smp = (struct ib_smp *)recv->mad; + + if (smi_handle_dr_smp_recv(smp, + port_priv->device->node_type, + port_num, + port_priv->device->phys_port_cnt) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + retsmi = smi_check_forward_dr_smp(smp); + if (retsmi == IB_SMI_LOCAL) + return IB_SMI_HANDLE; + + if (retsmi == IB_SMI_SEND) { /* don't forward */ + if (smi_handle_dr_smp_send(smp, + port_priv->device->node_type, + port_num) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { + /* forward case for switches */ + memcpy(response, recv, mad_priv_size(response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + + agent_send_response((const struct ib_mad_hdr *)response->mad, + &response->grh, wc, + port_priv->device, + smi_get_fwd_port(smp), + qp_info->qp->qp_num, + response->mad_size, + false); + + return IB_SMI_DISCARD; + } + return IB_SMI_HANDLE; +} + +static bool generate_unmatched_resp(const struct ib_mad_private *recv, + struct ib_mad_private *response, + size_t *resp_len, bool opa) { - if (recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_GET || - recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_SET) { - memcpy(response, recv, sizeof *response); + const struct ib_mad_hdr *recv_hdr = (const struct ib_mad_hdr *)recv->mad; + struct ib_mad_hdr *resp_hdr = (struct ib_mad_hdr *)response->mad; + + if (recv_hdr->method == IB_MGMT_METHOD_GET || + recv_hdr->method == IB_MGMT_METHOD_SET) { + memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; - response->header.recv_wc.recv_buf.mad = &response->mad.mad; + response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; response->header.recv_wc.recv_buf.grh = &response->grh; - response->mad.mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP; - response->mad.mad.mad_hdr.status = - cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); - if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - response->mad.mad.mad_hdr.status |= IB_SMP_DIRECTION; + resp_hdr->method = IB_MGMT_METHOD_GET_RESP; + resp_hdr->status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); + if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + resp_hdr->status |= IB_SMP_DIRECTION; + + if (opa && recv_hdr->base_version == OPA_MGMT_BASE_VERSION) { + if (recv_hdr->mgmt_class == + IB_MGMT_CLASS_SUBN_LID_ROUTED || + recv_hdr->mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + *resp_len = opa_get_smp_header_size( + (struct opa_smp *)recv->mad); + else + *resp_len = sizeof(struct ib_mad_hdr); + } return true; } else { return false; } } + +static enum smi_action +handle_opa_smi(struct ib_mad_port_private *port_priv, + struct ib_mad_qp_info *qp_info, + struct ib_wc *wc, + int port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response) +{ + enum smi_forward_action retsmi; + struct opa_smp *smp = (struct opa_smp *)recv->mad; + + if (opa_smi_handle_dr_smp_recv(smp, + port_priv->device->node_type, + port_num, + port_priv->device->phys_port_cnt) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + retsmi = opa_smi_check_forward_dr_smp(smp); + if (retsmi == IB_SMI_LOCAL) + return IB_SMI_HANDLE; + + if (retsmi == IB_SMI_SEND) { /* don't forward */ + if (opa_smi_handle_dr_smp_send(smp, + port_priv->device->node_type, + port_num) == IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + if (opa_smi_check_local_smp(smp, port_priv->device) == + IB_SMI_DISCARD) + return IB_SMI_DISCARD; + + } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { + /* forward case for switches */ + memcpy(response, recv, mad_priv_size(response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.opa_mad = + (struct opa_mad *)response->mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + + agent_send_response((const struct ib_mad_hdr *)response->mad, + &response->grh, wc, + port_priv->device, + opa_smi_get_fwd_port(smp), + qp_info->qp->qp_num, + recv->header.wc.byte_len, + true); + + return IB_SMI_DISCARD; + } + + return IB_SMI_HANDLE; +} + +static enum smi_action +handle_smi(struct ib_mad_port_private *port_priv, + struct ib_mad_qp_info *qp_info, + struct ib_wc *wc, + int port_num, + struct ib_mad_private *recv, + struct ib_mad_private *response, + bool opa) +{ + struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad; + + if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION && + mad_hdr->class_version == OPA_SMI_CLASS_VERSION) + return handle_opa_smi(port_priv, qp_info, wc, port_num, recv, + response); + + return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response); +} + static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_wc *wc) { @@ -1954,35 +2186,49 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_mad_agent_private *mad_agent; int port_num; int ret = IB_MAD_RESULT_SUCCESS; + size_t mad_size; + u16 resp_mad_pkey_index = 0; + bool opa; mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id; qp_info = mad_list->mad_queue->qp_info; dequeue_mad(mad_list); + opa = rdma_cap_opa_mad(qp_info->port_priv->device, + qp_info->port_priv->port_num); + mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header, mad_list); recv = container_of(mad_priv_hdr, struct ib_mad_private, header); ib_dma_unmap_single(port_priv->device, recv->header.mapping, - sizeof(struct ib_mad_private) - - sizeof(struct ib_mad_private_header), + mad_priv_dma_size(recv), DMA_FROM_DEVICE); /* Setup MAD receive work completion from "normal" work completion */ recv->header.wc = *wc; recv->header.recv_wc.wc = &recv->header.wc; - recv->header.recv_wc.mad_len = sizeof(struct ib_mad); - recv->header.recv_wc.recv_buf.mad = &recv->mad.mad; + + if (opa && ((struct ib_mad_hdr *)(recv->mad))->base_version == OPA_MGMT_BASE_VERSION) { + recv->header.recv_wc.mad_len = wc->byte_len - sizeof(struct ib_grh); + recv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); + } else { + recv->header.recv_wc.mad_len = sizeof(struct ib_mad); + recv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); + } + + recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad; recv->header.recv_wc.recv_buf.grh = &recv->grh; if (atomic_read(&qp_info->snoop_count)) snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS); /* Validate MAD */ - if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num)) + if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa)) goto out; - response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); + mad_size = recv->mad_size; + response = alloc_mad_private(mad_size, GFP_KERNEL); if (!response) { dev_err(&port_priv->device->dev, "ib_mad_recv_done_handler no memory for response buffer\n"); @@ -1994,69 +2240,43 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, else port_num = port_priv->port_num; - if (recv->mad.mad.mad_hdr.mgmt_class == + if (((struct ib_mad_hdr *)recv->mad)->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - enum smi_forward_action retsmi; - - if (smi_handle_dr_smp_recv(&recv->mad.smp, - port_priv->device->node_type, - port_num, - port_priv->device->phys_port_cnt) == - IB_SMI_DISCARD) + if (handle_smi(port_priv, qp_info, wc, port_num, recv, + response, opa) + == IB_SMI_DISCARD) goto out; - - retsmi = smi_check_forward_dr_smp(&recv->mad.smp); - if (retsmi == IB_SMI_LOCAL) - goto local; - - if (retsmi == IB_SMI_SEND) { /* don't forward */ - if (smi_handle_dr_smp_send(&recv->mad.smp, - port_priv->device->node_type, - port_num) == IB_SMI_DISCARD) - goto out; - - if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD) - goto out; - } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { - /* forward case for switches */ - memcpy(response, recv, sizeof(*response)); - response->header.recv_wc.wc = &response->header.wc; - response->header.recv_wc.recv_buf.mad = &response->mad.mad; - response->header.recv_wc.recv_buf.grh = &response->grh; - - agent_send_response(&response->mad.mad, - &response->grh, wc, - port_priv->device, - smi_get_fwd_port(&recv->mad.smp), - qp_info->qp->qp_num); - - goto out; - } } -local: /* Give driver "right of first refusal" on incoming MAD */ if (port_priv->device->process_mad) { ret = port_priv->device->process_mad(port_priv->device, 0, port_priv->port_num, wc, &recv->grh, - &recv->mad.mad, - &response->mad.mad); + (const struct ib_mad_hdr *)recv->mad, + recv->mad_size, + (struct ib_mad_hdr *)response->mad, + &mad_size, &resp_mad_pkey_index); + + if (opa) + wc->pkey_index = resp_mad_pkey_index; + if (ret & IB_MAD_RESULT_SUCCESS) { if (ret & IB_MAD_RESULT_CONSUMED) goto out; if (ret & IB_MAD_RESULT_REPLY) { - agent_send_response(&response->mad.mad, + agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, port_priv->device, port_num, - qp_info->qp->qp_num); + qp_info->qp->qp_num, + mad_size, opa); goto out; } } } - mad_agent = find_mad_agent(port_priv, &recv->mad.mad); + mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad); if (mad_agent) { ib_mad_complete_recv(mad_agent, &recv->header.recv_wc); /* @@ -2065,17 +2285,17 @@ local: */ recv = NULL; } else if ((ret & IB_MAD_RESULT_SUCCESS) && - generate_unmatched_resp(recv, response)) { - agent_send_response(&response->mad.mad, &recv->grh, wc, - port_priv->device, port_num, qp_info->qp->qp_num); + generate_unmatched_resp(recv, response, &mad_size, opa)) { + agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, + port_priv->device, port_num, + qp_info->qp->qp_num, mad_size, opa); } out: /* Post another receive request for this QP */ if (response) { ib_mad_post_receive_mads(qp_info, response); - if (recv) - kmem_cache_free(ib_mad_cache, recv); + kfree(recv); } else ib_mad_post_receive_mads(qp_info, recv); } @@ -2411,7 +2631,8 @@ find_send_wr(struct ib_mad_agent_private *mad_agent_priv, list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { - if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && + if (is_rmpp_data_mad(mad_agent_priv, + mad_send_wr->send_buf.mad) && &mad_send_wr->send_buf == send_buf) return mad_send_wr; } @@ -2468,10 +2689,14 @@ static void local_completions(struct work_struct *work) int free_mad; struct ib_wc wc; struct ib_mad_send_wc mad_send_wc; + bool opa; mad_agent_priv = container_of(work, struct ib_mad_agent_private, local_work); + opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, + mad_agent_priv->qp_info->port_priv->port_num); + spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->local_list)) { local = list_entry(mad_agent_priv->local_list.next, @@ -2481,6 +2706,7 @@ static void local_completions(struct work_struct *work) spin_unlock_irqrestore(&mad_agent_priv->lock, flags); free_mad = 0; if (local->mad_priv) { + u8 base_version; recv_mad_agent = local->recv_mad_agent; if (!recv_mad_agent) { dev_err(&mad_agent_priv->agent.device->dev, @@ -2496,17 +2722,26 @@ static void local_completions(struct work_struct *work) build_smp_wc(recv_mad_agent->agent.qp, (unsigned long) local->mad_send_wr, be16_to_cpu(IB_LID_PERMISSIVE), - 0, recv_mad_agent->agent.port_num, &wc); + local->mad_send_wr->send_wr.wr.ud.pkey_index, + recv_mad_agent->agent.port_num, &wc); local->mad_priv->header.recv_wc.wc = &wc; - local->mad_priv->header.recv_wc.mad_len = - sizeof(struct ib_mad); + + base_version = ((struct ib_mad_hdr *)(local->mad_priv->mad))->base_version; + if (opa && base_version == OPA_MGMT_BASE_VERSION) { + local->mad_priv->header.recv_wc.mad_len = local->return_wc_byte_len; + local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); + } else { + local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad); + local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); + } + INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list); list_add(&local->mad_priv->header.recv_wc.recv_buf.list, &local->mad_priv->header.recv_wc.rmpp_list); local->mad_priv->header.recv_wc.recv_buf.grh = NULL; local->mad_priv->header.recv_wc.recv_buf.mad = - &local->mad_priv->mad.mad; + (struct ib_mad *)local->mad_priv->mad; if (atomic_read(&recv_mad_agent->qp_info->snoop_count)) snoop_recv(recv_mad_agent->qp_info, &local->mad_priv->header.recv_wc, @@ -2534,7 +2769,7 @@ local_send_completion: spin_lock_irqsave(&mad_agent_priv->lock, flags); atomic_dec(&mad_agent_priv->refcount); if (free_mad) - kmem_cache_free(ib_mad_cache, local->mad_priv); + kfree(local->mad_priv); kfree(local); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); @@ -2649,7 +2884,6 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_queue *recv_queue = &qp_info->recv_queue; /* Initialize common scatter list fields */ - sg_list.length = sizeof *mad_priv - sizeof mad_priv->header; sg_list.lkey = (*qp_info->port_priv->mr).lkey; /* Initialize common receive WR fields */ @@ -2663,7 +2897,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, mad_priv = mad; mad = NULL; } else { - mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); + mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv), + GFP_ATOMIC); if (!mad_priv) { dev_err(&qp_info->port_priv->device->dev, "No memory for receive buffer\n"); @@ -2671,10 +2906,10 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, break; } } + sg_list.length = mad_priv_dma_size(mad_priv); sg_list.addr = ib_dma_map_single(qp_info->port_priv->device, &mad_priv->grh, - sizeof *mad_priv - - sizeof mad_priv->header, + mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, sg_list.addr))) { @@ -2698,10 +2933,9 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, spin_unlock_irqrestore(&recv_queue->lock, flags); ib_dma_unmap_single(qp_info->port_priv->device, mad_priv->header.mapping, - sizeof *mad_priv - - sizeof mad_priv->header, + mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE); - kmem_cache_free(ib_mad_cache, mad_priv); + kfree(mad_priv); dev_err(&qp_info->port_priv->device->dev, "ib_post_recv failed: %d\n", ret); break; @@ -2738,10 +2972,9 @@ static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info) ib_dma_unmap_single(qp_info->port_priv->device, recv->header.mapping, - sizeof(struct ib_mad_private) - - sizeof(struct ib_mad_private_header), + mad_priv_dma_size(recv), DMA_FROM_DEVICE); - kmem_cache_free(ib_mad_cache, recv); + kfree(recv); } qp_info->recv_queue.count = 0; @@ -2922,6 +3155,14 @@ static int ib_mad_port_open(struct ib_device *device, unsigned long flags; char name[sizeof "ib_mad123"]; int has_smi; + struct ib_cq_init_attr cq_attr = {}; + + if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE)) + return -EFAULT; + + if (WARN_ON(rdma_cap_opa_mad(device, port_num) && + rdma_max_mad_size(device, port_num) < OPA_MGMT_MAD_SIZE)) + return -EFAULT; /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); @@ -2938,13 +3179,14 @@ static int ib_mad_port_open(struct ib_device *device, init_mad_qp(port_priv, &port_priv->qp_info[1]); cq_size = mad_sendq_size + mad_recvq_size; - has_smi = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND; + has_smi = rdma_cap_ib_smi(device, port_num); if (has_smi) cq_size *= 2; + cq_attr.cqe = cq_size; port_priv->cq = ib_create_cq(port_priv->device, ib_mad_thread_completion_handler, - NULL, port_priv, cq_size, 0); + NULL, port_priv, &cq_attr); if (IS_ERR(port_priv->cq)) { dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); ret = PTR_ERR(port_priv->cq); @@ -3057,9 +3299,6 @@ static void ib_mad_init_device(struct ib_device *device) { int start, end, i; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; - if (device->node_type == RDMA_NODE_IB_SWITCH) { start = 0; end = 0; @@ -3069,6 +3308,9 @@ static void ib_mad_init_device(struct ib_device *device) } for (i = start; i <= end; i++) { + if (!rdma_cap_ib_mad(device, i)) + continue; + if (ib_mad_port_open(device, i)) { dev_err(&device->dev, "Couldn't open port %d\n", i); goto error; @@ -3086,40 +3328,39 @@ error_agent: dev_err(&device->dev, "Couldn't close port %d\n", i); error: - i--; + while (--i >= start) { + if (!rdma_cap_ib_mad(device, i)) + continue; - while (i >= start) { if (ib_agent_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d for agents\n", i); if (ib_mad_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d\n", i); - i--; } } static void ib_mad_remove_device(struct ib_device *device) { - int i, num_ports, cur_port; - - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; + int start, end, i; if (device->node_type == RDMA_NODE_IB_SWITCH) { - num_ports = 1; - cur_port = 0; + start = 0; + end = 0; } else { - num_ports = device->phys_port_cnt; - cur_port = 1; + start = 1; + end = device->phys_port_cnt; } - for (i = 0; i < num_ports; i++, cur_port++) { - if (ib_agent_port_close(device, cur_port)) + + for (i = start; i <= end; i++) { + if (!rdma_cap_ib_mad(device, i)) + continue; + + if (ib_agent_port_close(device, i)) dev_err(&device->dev, - "Couldn't close port %d for agents\n", - cur_port); - if (ib_mad_port_close(device, cur_port)) - dev_err(&device->dev, "Couldn't close port %d\n", - cur_port); + "Couldn't close port %d for agents\n", i); + if (ib_mad_port_close(device, i)) + dev_err(&device->dev, "Couldn't close port %d\n", i); } } @@ -3131,45 +3372,25 @@ static struct ib_client mad_client = { static int __init ib_mad_init_module(void) { - int ret; - mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE); mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE); - ib_mad_cache = kmem_cache_create("ib_mad", - sizeof(struct ib_mad_private), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!ib_mad_cache) { - pr_err("Couldn't create ib_mad cache\n"); - ret = -ENOMEM; - goto error1; - } - INIT_LIST_HEAD(&ib_mad_port_list); if (ib_register_client(&mad_client)) { pr_err("Couldn't register ib_mad client\n"); - ret = -EINVAL; - goto error2; + return -EINVAL; } return 0; - -error2: - kmem_cache_destroy(ib_mad_cache); -error1: - return ret; } static void __exit ib_mad_cleanup_module(void) { ib_unregister_client(&mad_client); - kmem_cache_destroy(ib_mad_cache); } module_init(ib_mad_init_module); diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index d1a0b0e..5be89f9 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -41,6 +41,7 @@ #include <linux/workqueue.h> #include <rdma/ib_mad.h> #include <rdma/ib_smi.h> +#include <rdma/opa_smi.h> #define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */ @@ -56,7 +57,7 @@ /* Registration table sizes */ #define MAX_MGMT_CLASS 80 -#define MAX_MGMT_VERSION 8 +#define MAX_MGMT_VERSION 0x83 #define MAX_MGMT_OUI 8 #define MAX_MGMT_VENDOR_RANGE2 (IB_MGMT_CLASS_VENDOR_RANGE2_END - \ IB_MGMT_CLASS_VENDOR_RANGE2_START + 1) @@ -75,12 +76,9 @@ struct ib_mad_private_header { struct ib_mad_private { struct ib_mad_private_header header; + size_t mad_size; struct ib_grh grh; - union { - struct ib_mad mad; - struct ib_rmpp_mad rmpp_mad; - struct ib_smp smp; - } mad; + u8 mad[0]; } __attribute__ ((packed)); struct ib_rmpp_segment { @@ -150,6 +148,7 @@ struct ib_mad_local_private { struct ib_mad_private *mad_priv; struct ib_mad_agent_private *recv_mad_agent; struct ib_mad_send_wr_private *mad_send_wr; + size_t return_wc_byte_len; }; struct ib_mad_mgmt_method_table { @@ -213,8 +212,8 @@ struct ib_mad_port_private { int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr); struct ib_mad_send_wr_private * -ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, - struct ib_mad_recv_wc *mad_recv_wc); +ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv, + const struct ib_mad_recv_wc *mad_recv_wc); void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc); diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index f37878c..382941b 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2005 Intel Inc. All rights reserved. * Copyright (c) 2005-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -67,6 +68,7 @@ struct mad_rmpp_recv { u8 mgmt_class; u8 class_version; u8 method; + u8 base_version; }; static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) @@ -139,7 +141,8 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv, hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class); msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, recv_wc->wc->pkey_index, 1, hdr_len, - 0, GFP_KERNEL); + 0, GFP_KERNEL, + IB_MGMT_BASE_VERSION); if (IS_ERR(msg)) return; @@ -165,7 +168,8 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class); msg = ib_create_send_mad(agent, recv_wc->wc->src_qp, recv_wc->wc->pkey_index, 1, - hdr_len, 0, GFP_KERNEL); + hdr_len, 0, GFP_KERNEL, + IB_MGMT_BASE_VERSION); if (IS_ERR(msg)) ib_destroy_ah(ah); else { @@ -316,6 +320,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent, rmpp_recv->mgmt_class = mad_hdr->mgmt_class; rmpp_recv->class_version = mad_hdr->class_version; rmpp_recv->method = mad_hdr->method; + rmpp_recv->base_version = mad_hdr->base_version; return rmpp_recv; error: kfree(rmpp_recv); @@ -431,14 +436,23 @@ static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv) { struct ib_rmpp_mad *rmpp_mad; int hdr_size, data_size, pad; + bool opa = rdma_cap_opa_mad(rmpp_recv->agent->qp_info->port_priv->device, + rmpp_recv->agent->qp_info->port_priv->port_num); rmpp_mad = (struct ib_rmpp_mad *)rmpp_recv->cur_seg_buf->mad; hdr_size = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); - data_size = sizeof(struct ib_rmpp_mad) - hdr_size; - pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); - if (pad > IB_MGMT_RMPP_DATA || pad < 0) - pad = 0; + if (opa && rmpp_recv->base_version == OPA_MGMT_BASE_VERSION) { + data_size = sizeof(struct opa_rmpp_mad) - hdr_size; + pad = OPA_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + if (pad > OPA_MGMT_RMPP_DATA || pad < 0) + pad = 0; + } else { + data_size = sizeof(struct ib_rmpp_mad) - hdr_size; + pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + if (pad > IB_MGMT_RMPP_DATA || pad < 0) + pad = 0; + } return hdr_size + rmpp_recv->seg_num * data_size - pad; } @@ -570,13 +584,14 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) if (mad_send_wr->seg_num == 1) { rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST; - paylen = mad_send_wr->send_buf.seg_count * IB_MGMT_RMPP_DATA - - mad_send_wr->pad; + paylen = (mad_send_wr->send_buf.seg_count * + mad_send_wr->send_buf.seg_rmpp_size) - + mad_send_wr->pad; } if (mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) { rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST; - paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad; + paylen = mad_send_wr->send_buf.seg_rmpp_size - mad_send_wr->pad; } rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen); diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index fa17b55..1244f02 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -780,8 +780,7 @@ static void mcast_event_handler(struct ib_event_handler *handler, int index; dev = container_of(handler, struct mcast_device, event_handler); - if (rdma_port_get_link_layer(dev->device, event->element.port_num) != - IB_LINK_LAYER_INFINIBAND) + if (!rdma_cap_ib_mcast(dev->device, event->element.port_num)) return; index = event->element.port_num - dev->start_port; @@ -808,9 +807,6 @@ static void mcast_add_one(struct ib_device *device) int i; int count = 0; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; - dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port, GFP_KERNEL); if (!dev) @@ -824,8 +820,7 @@ static void mcast_add_one(struct ib_device *device) } for (i = 0; i <= dev->end_port - dev->start_port; i++) { - if (rdma_port_get_link_layer(device, dev->start_port + i) != - IB_LINK_LAYER_INFINIBAND) + if (!rdma_cap_ib_mcast(device, dev->start_port + i)) continue; port = &dev->port[i]; port->dev = dev; @@ -863,8 +858,7 @@ static void mcast_remove_one(struct ib_device *device) flush_workqueue(mcast_wq); for (i = 0; i <= dev->end_port - dev->start_port; i++) { - if (rdma_port_get_link_layer(device, dev->start_port + i) == - IB_LINK_LAYER_INFINIBAND) { + if (rdma_cap_ib_mcast(device, dev->start_port + i)) { port = &dev->port[i]; deref_port(port); wait_for_completion(&port->comp); diff --git a/drivers/infiniband/core/opa_smi.h b/drivers/infiniband/core/opa_smi.h new file mode 100644 index 0000000..62d91bf --- /dev/null +++ b/drivers/infiniband/core/opa_smi.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __OPA_SMI_H_ +#define __OPA_SMI_H_ + +#include <rdma/ib_smi.h> +#include <rdma/opa_smi.h> + +#include "smi.h" + +enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, u8 node_type, + int port_num, int phys_port_cnt); +int opa_smi_get_fwd_port(struct opa_smp *smp); +extern enum smi_forward_action opa_smi_check_forward_dr_smp(struct opa_smp *smp); +extern enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, + u8 node_type, int port_num); + +/* + * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM + * via process_mad + */ +static inline enum smi_action opa_smi_check_local_smp(struct opa_smp *smp, + struct ib_device *device) +{ + /* C14-9:3 -- We're at the end of the DR segment of path */ + /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */ + return (device->process_mad && + !opa_get_smp_direction(smp) && + (smp->hop_ptr == smp->hop_cnt + 1)) ? + IB_SMI_HANDLE : IB_SMI_DISCARD; +} + +/* + * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM + * via process_mad + */ +static inline enum smi_action opa_smi_check_local_returning_smp(struct opa_smp *smp, + struct ib_device *device) +{ + /* C14-13:3 -- We're at the end of the DR segment of path */ + /* C14-13:4 -- Hop Pointer == 0 -> give to SM */ + return (device->process_mad && + opa_get_smp_direction(smp) && + !smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD; +} + +#endif /* __OPA_SMI_H_ */ diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index c38f030..0fae850 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -450,7 +450,7 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event struct ib_sa_port *port = &sa_dev->port[event->element.port_num - sa_dev->start_port]; - if (rdma_port_get_link_layer(handler->device, port->port_num) != IB_LINK_LAYER_INFINIBAND) + if (!rdma_cap_ib_sa(handler->device, port->port_num)) return; spin_lock_irqsave(&port->ah_lock, flags); @@ -540,7 +540,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, ah_attr->port_num = port_num; ah_attr->static_rate = rec->rate; - force_grh = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_ETHERNET; + force_grh = rdma_cap_eth_ah(device, port_num); if (rec->hop_limit > 1 || force_grh) { ah_attr->ah_flags = IB_AH_GRH; @@ -583,7 +583,8 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask) query->mad_buf = ib_create_send_mad(query->port->agent, 1, query->sm_ah->pkey_index, 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, - gfp_mask); + gfp_mask, + IB_MGMT_BASE_VERSION); if (IS_ERR(query->mad_buf)) { kref_put(&query->sm_ah->ref, free_sm_ah); return -ENOMEM; @@ -1153,9 +1154,7 @@ static void ib_sa_add_one(struct ib_device *device) { struct ib_sa_device *sa_dev; int s, e, i; - - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; + int count = 0; if (device->node_type == RDMA_NODE_IB_SWITCH) s = e = 0; @@ -1175,7 +1174,7 @@ static void ib_sa_add_one(struct ib_device *device) for (i = 0; i <= e - s; ++i) { spin_lock_init(&sa_dev->port[i].ah_lock); - if (rdma_port_get_link_layer(device, i + 1) != IB_LINK_LAYER_INFINIBAND) + if (!rdma_cap_ib_sa(device, i + 1)) continue; sa_dev->port[i].sm_ah = NULL; @@ -1189,8 +1188,13 @@ static void ib_sa_add_one(struct ib_device *device) goto err; INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah); + + count++; } + if (!count) + goto free; + ib_set_client_data(device, &sa_client, sa_dev); /* @@ -1204,19 +1208,20 @@ static void ib_sa_add_one(struct ib_device *device) if (ib_register_event_handler(&sa_dev->event_handler)) goto err; - for (i = 0; i <= e - s; ++i) - if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) + for (i = 0; i <= e - s; ++i) { + if (rdma_cap_ib_sa(device, i + 1)) update_sm_ah(&sa_dev->port[i].update_task); + } return; err: - while (--i >= 0) - if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) + while (--i >= 0) { + if (rdma_cap_ib_sa(device, i + 1)) ib_unregister_mad_agent(sa_dev->port[i].agent); - + } +free: kfree(sa_dev); - return; } @@ -1233,7 +1238,7 @@ static void ib_sa_remove_one(struct ib_device *device) flush_workqueue(ib_wq); for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { - if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) { + if (rdma_cap_ib_sa(device, i + 1)) { ib_unregister_mad_agent(sa_dev->port[i].agent); if (sa_dev->port[i].sm_ah) kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c index 5855e44..368a561 100644 --- a/drivers/infiniband/core/smi.c +++ b/drivers/infiniband/core/smi.c @@ -5,6 +5,7 @@ * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -38,85 +39,82 @@ #include <rdma/ib_smi.h> #include "smi.h" - -/* - * Fixup a directed route SMP for sending - * Return 0 if the SMP should be discarded - */ -enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, - u8 node_type, int port_num) +#include "opa_smi.h" + +static enum smi_action __smi_handle_dr_smp_send(u8 node_type, int port_num, + u8 *hop_ptr, u8 hop_cnt, + const u8 *initial_path, + const u8 *return_path, + u8 direction, + bool dr_dlid_is_permissive, + bool dr_slid_is_permissive) { - u8 hop_ptr, hop_cnt; - - hop_ptr = smp->hop_ptr; - hop_cnt = smp->hop_cnt; - /* See section 14.2.2.2, Vol 1 IB spec */ /* C14-6 -- valid hop_cnt values are from 0 to 63 */ if (hop_cnt >= IB_SMP_MAX_PATH_HOPS) return IB_SMI_DISCARD; - if (!ib_get_smp_direction(smp)) { + if (!direction) { /* C14-9:1 */ - if (hop_cnt && hop_ptr == 0) { - smp->hop_ptr++; - return (smp->initial_path[smp->hop_ptr] == + if (hop_cnt && *hop_ptr == 0) { + (*hop_ptr)++; + return (initial_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:2 */ - if (hop_ptr && hop_ptr < hop_cnt) { + if (*hop_ptr && *hop_ptr < hop_cnt) { if (node_type != RDMA_NODE_IB_SWITCH) return IB_SMI_DISCARD; - /* smp->return_path set when received */ - smp->hop_ptr++; - return (smp->initial_path[smp->hop_ptr] == + /* return_path set when received */ + (*hop_ptr)++; + return (initial_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:3 -- We're at the end of the DR segment of path */ - if (hop_ptr == hop_cnt) { - /* smp->return_path set when received */ - smp->hop_ptr++; + if (*hop_ptr == hop_cnt) { + /* return_path set when received */ + (*hop_ptr)++; return (node_type == RDMA_NODE_IB_SWITCH || - smp->dr_dlid == IB_LID_PERMISSIVE ? + dr_dlid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ /* C14-9:5 -- Fail unreasonable hop pointer */ - return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD); + return (*hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD); } else { /* C14-13:1 */ - if (hop_cnt && hop_ptr == hop_cnt + 1) { - smp->hop_ptr--; - return (smp->return_path[smp->hop_ptr] == + if (hop_cnt && *hop_ptr == hop_cnt + 1) { + (*hop_ptr)--; + return (return_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:2 */ - if (2 <= hop_ptr && hop_ptr <= hop_cnt) { + if (2 <= *hop_ptr && *hop_ptr <= hop_cnt) { if (node_type != RDMA_NODE_IB_SWITCH) return IB_SMI_DISCARD; - smp->hop_ptr--; - return (smp->return_path[smp->hop_ptr] == + (*hop_ptr)--; + return (return_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:3 -- at the end of the DR segment of path */ - if (hop_ptr == 1) { - smp->hop_ptr--; + if (*hop_ptr == 1) { + (*hop_ptr)--; /* C14-13:3 -- SMPs destined for SM shouldn't be here */ return (node_type == RDMA_NODE_IB_SWITCH || - smp->dr_slid == IB_LID_PERMISSIVE ? + dr_slid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:4 -- hop_ptr = 0 -> should have gone to SM */ - if (hop_ptr == 0) + if (*hop_ptr == 0) return IB_SMI_HANDLE; /* C14-13:5 -- Check for unreasonable hop pointer */ @@ -125,105 +123,164 @@ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, } /* - * Adjust information for a received SMP - * Return 0 if the SMP should be dropped + * Fixup a directed route SMP for sending + * Return IB_SMI_DISCARD if the SMP should be discarded */ -enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, - int port_num, int phys_port_cnt) +enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, + u8 node_type, int port_num) { - u8 hop_ptr, hop_cnt; + return __smi_handle_dr_smp_send(node_type, port_num, + &smp->hop_ptr, smp->hop_cnt, + smp->initial_path, + smp->return_path, + ib_get_smp_direction(smp), + smp->dr_dlid == IB_LID_PERMISSIVE, + smp->dr_slid == IB_LID_PERMISSIVE); +} - hop_ptr = smp->hop_ptr; - hop_cnt = smp->hop_cnt; +enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, + u8 node_type, int port_num) +{ + return __smi_handle_dr_smp_send(node_type, port_num, + &smp->hop_ptr, smp->hop_cnt, + smp->route.dr.initial_path, + smp->route.dr.return_path, + opa_get_smp_direction(smp), + smp->route.dr.dr_dlid == + OPA_LID_PERMISSIVE, + smp->route.dr.dr_slid == + OPA_LID_PERMISSIVE); +} +static enum smi_action __smi_handle_dr_smp_recv(u8 node_type, int port_num, + int phys_port_cnt, + u8 *hop_ptr, u8 hop_cnt, + const u8 *initial_path, + u8 *return_path, + u8 direction, + bool dr_dlid_is_permissive, + bool dr_slid_is_permissive) +{ /* See section 14.2.2.2, Vol 1 IB spec */ /* C14-6 -- valid hop_cnt values are from 0 to 63 */ if (hop_cnt >= IB_SMP_MAX_PATH_HOPS) return IB_SMI_DISCARD; - if (!ib_get_smp_direction(smp)) { + if (!direction) { /* C14-9:1 -- sender should have incremented hop_ptr */ - if (hop_cnt && hop_ptr == 0) + if (hop_cnt && *hop_ptr == 0) return IB_SMI_DISCARD; /* C14-9:2 -- intermediate hop */ - if (hop_ptr && hop_ptr < hop_cnt) { + if (*hop_ptr && *hop_ptr < hop_cnt) { if (node_type != RDMA_NODE_IB_SWITCH) return IB_SMI_DISCARD; - smp->return_path[hop_ptr] = port_num; - /* smp->hop_ptr updated when sending */ - return (smp->initial_path[hop_ptr+1] <= phys_port_cnt ? + return_path[*hop_ptr] = port_num; + /* hop_ptr updated when sending */ + return (initial_path[*hop_ptr+1] <= phys_port_cnt ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:3 -- We're at the end of the DR segment of path */ - if (hop_ptr == hop_cnt) { + if (*hop_ptr == hop_cnt) { if (hop_cnt) - smp->return_path[hop_ptr] = port_num; - /* smp->hop_ptr updated when sending */ + return_path[*hop_ptr] = port_num; + /* hop_ptr updated when sending */ return (node_type == RDMA_NODE_IB_SWITCH || - smp->dr_dlid == IB_LID_PERMISSIVE ? + dr_dlid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ /* C14-9:5 -- fail unreasonable hop pointer */ - return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD); + return (*hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD); } else { /* C14-13:1 */ - if (hop_cnt && hop_ptr == hop_cnt + 1) { - smp->hop_ptr--; - return (smp->return_path[smp->hop_ptr] == + if (hop_cnt && *hop_ptr == hop_cnt + 1) { + (*hop_ptr)--; + return (return_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:2 */ - if (2 <= hop_ptr && hop_ptr <= hop_cnt) { + if (2 <= *hop_ptr && *hop_ptr <= hop_cnt) { if (node_type != RDMA_NODE_IB_SWITCH) return IB_SMI_DISCARD; - /* smp->hop_ptr updated when sending */ - return (smp->return_path[hop_ptr-1] <= phys_port_cnt ? + /* hop_ptr updated when sending */ + return (return_path[*hop_ptr-1] <= phys_port_cnt ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:3 -- We're at the end of the DR segment of path */ - if (hop_ptr == 1) { - if (smp->dr_slid == IB_LID_PERMISSIVE) { + if (*hop_ptr == 1) { + if (dr_slid_is_permissive) { /* giving SMP to SM - update hop_ptr */ - smp->hop_ptr--; + (*hop_ptr)--; return IB_SMI_HANDLE; } - /* smp->hop_ptr updated when sending */ + /* hop_ptr updated when sending */ return (node_type == RDMA_NODE_IB_SWITCH ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:4 -- hop_ptr = 0 -> give to SM */ /* C14-13:5 -- Check for unreasonable hop pointer */ - return (hop_ptr == 0 ? IB_SMI_HANDLE : IB_SMI_DISCARD); + return (*hop_ptr == 0 ? IB_SMI_HANDLE : IB_SMI_DISCARD); } } -enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp) +/* + * Adjust information for a received SMP + * Return IB_SMI_DISCARD if the SMP should be dropped + */ +enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, + int port_num, int phys_port_cnt) { - u8 hop_ptr, hop_cnt; + return __smi_handle_dr_smp_recv(node_type, port_num, phys_port_cnt, + &smp->hop_ptr, smp->hop_cnt, + smp->initial_path, + smp->return_path, + ib_get_smp_direction(smp), + smp->dr_dlid == IB_LID_PERMISSIVE, + smp->dr_slid == IB_LID_PERMISSIVE); +} - hop_ptr = smp->hop_ptr; - hop_cnt = smp->hop_cnt; +/* + * Adjust information for a received SMP + * Return IB_SMI_DISCARD if the SMP should be dropped + */ +enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, u8 node_type, + int port_num, int phys_port_cnt) +{ + return __smi_handle_dr_smp_recv(node_type, port_num, phys_port_cnt, + &smp->hop_ptr, smp->hop_cnt, + smp->route.dr.initial_path, + smp->route.dr.return_path, + opa_get_smp_direction(smp), + smp->route.dr.dr_dlid == + OPA_LID_PERMISSIVE, + smp->route.dr.dr_slid == + OPA_LID_PERMISSIVE); +} - if (!ib_get_smp_direction(smp)) { +static enum smi_forward_action __smi_check_forward_dr_smp(u8 hop_ptr, u8 hop_cnt, + u8 direction, + bool dr_dlid_is_permissive, + bool dr_slid_is_permissive) +{ + if (!direction) { /* C14-9:2 -- intermediate hop */ if (hop_ptr && hop_ptr < hop_cnt) return IB_SMI_FORWARD; /* C14-9:3 -- at the end of the DR segment of path */ if (hop_ptr == hop_cnt) - return (smp->dr_dlid == IB_LID_PERMISSIVE ? + return (dr_dlid_is_permissive ? IB_SMI_SEND : IB_SMI_LOCAL); /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ @@ -236,10 +293,29 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp) /* C14-13:3 -- at the end of the DR segment of path */ if (hop_ptr == 1) - return (smp->dr_slid != IB_LID_PERMISSIVE ? + return (!dr_slid_is_permissive ? IB_SMI_SEND : IB_SMI_LOCAL); } return IB_SMI_LOCAL; + +} + +enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp) +{ + return __smi_check_forward_dr_smp(smp->hop_ptr, smp->hop_cnt, + ib_get_smp_direction(smp), + smp->dr_dlid == IB_LID_PERMISSIVE, + smp->dr_slid == IB_LID_PERMISSIVE); +} + +enum smi_forward_action opa_smi_check_forward_dr_smp(struct opa_smp *smp) +{ + return __smi_check_forward_dr_smp(smp->hop_ptr, smp->hop_cnt, + opa_get_smp_direction(smp), + smp->route.dr.dr_dlid == + OPA_LID_PERMISSIVE, + smp->route.dr.dr_slid == + OPA_LID_PERMISSIVE); } /* @@ -251,3 +327,13 @@ int smi_get_fwd_port(struct ib_smp *smp) return (!ib_get_smp_direction(smp) ? smp->initial_path[smp->hop_ptr+1] : smp->return_path[smp->hop_ptr-1]); } + +/* + * Return the forwarding port number from initial_path for outgoing SMP and + * from return_path for returning SMP + */ +int opa_smi_get_fwd_port(struct opa_smp *smp) +{ + return !opa_get_smp_direction(smp) ? smp->route.dr.initial_path[smp->hop_ptr+1] : + smp->route.dr.return_path[smp->hop_ptr-1]; +} diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index cbd0383..ed6b6c8 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -326,6 +326,8 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, int width = (tab_attr->index >> 16) & 0xff; struct ib_mad *in_mad = NULL; struct ib_mad *out_mad = NULL; + size_t mad_size = sizeof(*out_mad); + u16 out_mad_pkey_index = 0; ssize_t ret; if (!p->ibdev->process_mad) @@ -347,7 +349,10 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, in_mad->data[41] = p->port_num; /* PortSelect field */ if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY, - p->port_num, NULL, NULL, in_mad, out_mad) & + p->port_num, NULL, NULL, + (const struct ib_mad_hdr *)in_mad, mad_size, + (struct ib_mad_hdr *)out_mad, &mad_size, + &out_mad_pkey_index) & (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) != (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) { ret = -EINVAL; @@ -456,6 +461,7 @@ static void ib_device_release(struct device *device) { struct ib_device *dev = container_of(device, struct ib_device, dev); + kfree(dev->port_immutable); kfree(dev); } diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index f2f6393..62c24b1 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1253,8 +1253,7 @@ static void ib_ucm_add_one(struct ib_device *device) dev_t base; struct ib_ucm_device *ucm_dev; - if (!device->alloc_ucontext || - rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + if (!device->alloc_ucontext || !rdma_cap_ib_cm(device, 1)) return; ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 45d67e9..ad45469 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -722,26 +722,13 @@ static ssize_t ucma_query_route(struct ucma_file *file, resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; resp.port_num = ctx->cm_id->port_num; - switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) { - case RDMA_TRANSPORT_IB: - switch (rdma_port_get_link_layer(ctx->cm_id->device, - ctx->cm_id->port_num)) { - case IB_LINK_LAYER_INFINIBAND: - ucma_copy_ib_route(&resp, &ctx->cm_id->route); - break; - case IB_LINK_LAYER_ETHERNET: - ucma_copy_iboe_route(&resp, &ctx->cm_id->route); - break; - default: - break; - } - break; - case RDMA_TRANSPORT_IWARP: + + if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) + ucma_copy_ib_route(&resp, &ctx->cm_id->route); + else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) + ucma_copy_iboe_route(&resp, &ctx->cm_id->route); + else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) ucma_copy_iw_route(&resp, &ctx->cm_id->route); - break; - default: - break; - } out: if (copy_to_user((void __user *)(unsigned long)cmd.response, diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 8b8cc6f..40becdb 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -446,7 +446,6 @@ static int ib_umem_odp_map_dma_single_page( int remove_existing_mapping = 0; int ret = 0; - mutex_lock(&umem->odp_data->umem_mutex); /* * Note: we avoid writing if seq is different from the initial seq, to * handle case of a racing notifier. This check also allows us to bail @@ -479,8 +478,6 @@ static int ib_umem_odp_map_dma_single_page( } out: - mutex_unlock(&umem->odp_data->umem_mutex); - /* On Demand Paging - avoid pinning the page */ if (umem->context->invalidate_range || !stored_page) put_page(page); @@ -586,6 +583,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt); user_virt += npages << PAGE_SHIFT; + mutex_lock(&umem->odp_data->umem_mutex); for (j = 0; j < npages; ++j) { ret = ib_umem_odp_map_dma_single_page( umem, k, base_virt_addr, local_page_list[j], @@ -594,6 +592,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, break; k++; } + mutex_unlock(&umem->odp_data->umem_mutex); if (ret < 0) { /* Release left over pages when handling errors. */ @@ -633,12 +632,11 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, * faults from completion. We might be racing with other * invalidations, so we must make sure we free each page only * once. */ + mutex_lock(&umem->odp_data->umem_mutex); for (addr = virt; addr < bound; addr += (u64)umem->page_size) { idx = (addr - ib_umem_start(umem)) / PAGE_SIZE; - mutex_lock(&umem->odp_data->umem_mutex); if (umem->odp_data->page_list[idx]) { struct page *page = umem->odp_data->page_list[idx]; - struct page *head_page = compound_head(page); dma_addr_t dma = umem->odp_data->dma_list[idx]; dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK; @@ -646,7 +644,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, ib_dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); - if (dma & ODP_WRITE_ALLOWED_BIT) + if (dma & ODP_WRITE_ALLOWED_BIT) { + struct page *head_page = compound_head(page); /* * set_page_dirty prefers being called with * the page lock. However, MMU notifiers are @@ -657,13 +656,14 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, * be removed. */ set_page_dirty(head_page); + } /* on demand pinning support */ if (!umem->context->invalidate_range) put_page(page); umem->odp_data->page_list[idx] = NULL; umem->odp_data->dma_list[idx] = 0; } - mutex_unlock(&umem->odp_data->umem_mutex); } + mutex_unlock(&umem->odp_data->umem_mutex); } EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 928cdd2..35567ff 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -99,7 +99,6 @@ struct ib_umad_port { }; struct ib_umad_device { - int start_port, end_port; struct kobject kobj; struct ib_umad_port port[0]; }; @@ -263,20 +262,23 @@ static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, { struct ib_mad_recv_buf *recv_buf; int left, seg_payload, offset, max_seg_payload; + size_t seg_size; - /* We need enough room to copy the first (or only) MAD segment. */ recv_buf = &packet->recv_wc->recv_buf; - if ((packet->length <= sizeof (*recv_buf->mad) && + seg_size = packet->recv_wc->mad_seg_size; + + /* We need enough room to copy the first (or only) MAD segment. */ + if ((packet->length <= seg_size && count < hdr_size(file) + packet->length) || - (packet->length > sizeof (*recv_buf->mad) && - count < hdr_size(file) + sizeof (*recv_buf->mad))) + (packet->length > seg_size && + count < hdr_size(file) + seg_size)) return -EINVAL; if (copy_to_user(buf, &packet->mad, hdr_size(file))) return -EFAULT; buf += hdr_size(file); - seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad)); + seg_payload = min_t(int, packet->length, seg_size); if (copy_to_user(buf, recv_buf->mad, seg_payload)) return -EFAULT; @@ -293,7 +295,7 @@ static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, return -ENOSPC; } offset = ib_get_mad_data_offset(recv_buf->mad->mad_hdr.mgmt_class); - max_seg_payload = sizeof (struct ib_mad) - offset; + max_seg_payload = seg_size - offset; for (left = packet->length - seg_payload, buf += seg_payload; left; left -= seg_payload, buf += seg_payload) { @@ -426,11 +428,11 @@ static int is_duplicate(struct ib_umad_file *file, * the same TID, reject the second as a duplicate. This is more * restrictive than required by the spec. */ - if (!ib_response_mad((struct ib_mad *) hdr)) { - if (!ib_response_mad((struct ib_mad *) sent_hdr)) + if (!ib_response_mad(hdr)) { + if (!ib_response_mad(sent_hdr)) return 1; continue; - } else if (!ib_response_mad((struct ib_mad *) sent_hdr)) + } else if (!ib_response_mad(sent_hdr)) continue; if (same_destination(&packet->mad.hdr, &sent_packet->mad.hdr)) @@ -451,6 +453,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_rmpp_mad *rmpp_mad; __be64 *tid; int ret, data_len, hdr_len, copy_offset, rmpp_active; + u8 base_version; if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) return -EINVAL; @@ -517,11 +520,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, rmpp_active = 0; } + base_version = ((struct ib_mad_hdr *)&packet->mad.data)->base_version; data_len = count - hdr_size(file) - hdr_len; packet->msg = ib_create_send_mad(agent, be32_to_cpu(packet->mad.hdr.qpn), packet->mad.hdr.pkey_index, rmpp_active, - hdr_len, data_len, GFP_KERNEL); + hdr_len, data_len, GFP_KERNEL, + base_version); if (IS_ERR(packet->msg)) { ret = PTR_ERR(packet->msg); goto err_ah; @@ -1273,16 +1278,10 @@ static void ib_umad_add_one(struct ib_device *device) { struct ib_umad_device *umad_dev; int s, e, i; + int count = 0; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; - - if (device->node_type == RDMA_NODE_IB_SWITCH) - s = e = 0; - else { - s = 1; - e = device->phys_port_cnt; - } + s = rdma_start_port(device); + e = rdma_end_port(device); umad_dev = kzalloc(sizeof *umad_dev + (e - s + 1) * sizeof (struct ib_umad_port), @@ -1292,25 +1291,34 @@ static void ib_umad_add_one(struct ib_device *device) kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype); - umad_dev->start_port = s; - umad_dev->end_port = e; - for (i = s; i <= e; ++i) { + if (!rdma_cap_ib_mad(device, i)) + continue; + umad_dev->port[i - s].umad_dev = umad_dev; if (ib_umad_init_port(device, i, umad_dev, &umad_dev->port[i - s])) goto err; + + count++; } + if (!count) + goto free; + ib_set_client_data(device, &umad_client, umad_dev); return; err: - while (--i >= s) - ib_umad_kill_port(&umad_dev->port[i - s]); + while (--i >= s) { + if (!rdma_cap_ib_mad(device, i)) + continue; + ib_umad_kill_port(&umad_dev->port[i - s]); + } +free: kobject_put(&umad_dev->kobj); } @@ -1322,8 +1330,10 @@ static void ib_umad_remove_one(struct ib_device *device) if (!umad_dev) return; - for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) - ib_umad_kill_port(&umad_dev->port[i]); + for (i = 0; i <= rdma_end_port(device) - rdma_start_port(device); ++i) { + if (rdma_cap_ib_mad(device, i + rdma_start_port(device))) + ib_umad_kill_port(&umad_dev->port[i]); + } kobject_put(&umad_dev->kobj); } diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index b716b08..ba365b6 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -259,5 +259,6 @@ IB_UVERBS_DECLARE_CMD(close_xrcd); IB_UVERBS_DECLARE_EX_CMD(create_flow); IB_UVERBS_DECLARE_EX_CMD(destroy_flow); IB_UVERBS_DECLARE_EX_CMD(query_device); +IB_UVERBS_DECLARE_EX_CMD(create_cq); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a9f0489..bbb02ff 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1330,40 +1330,37 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, return in_len; } -ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw, + struct ib_uverbs_ex_create_cq *cmd, + size_t cmd_sz, + int (*cb)(struct ib_uverbs_file *file, + struct ib_ucq_object *obj, + struct ib_uverbs_ex_create_cq_resp *resp, + struct ib_udata *udata, + void *context), + void *context) { - struct ib_uverbs_create_cq cmd; - struct ib_uverbs_create_cq_resp resp; - struct ib_udata udata; struct ib_ucq_object *obj; struct ib_uverbs_event_file *ev_file = NULL; struct ib_cq *cq; int ret; + struct ib_uverbs_ex_create_cq_resp resp; + struct ib_cq_init_attr attr = {}; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); - - if (cmd.comp_vector >= file->device->num_comp_vectors) - return -EINVAL; + if (cmd->comp_vector >= file->device->num_comp_vectors) + return ERR_PTR(-EINVAL); obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_class); + init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class); down_write(&obj->uobject.mutex); - if (cmd.comp_channel >= 0) { - ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); + if (cmd->comp_channel >= 0) { + ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel); if (!ev_file) { ret = -EINVAL; goto err; @@ -1376,9 +1373,14 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, INIT_LIST_HEAD(&obj->comp_list); INIT_LIST_HEAD(&obj->async_list); - cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe, - cmd.comp_vector, - file->ucontext, &udata); + attr.cqe = cmd->cqe; + attr.comp_vector = cmd->comp_vector; + + if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) + attr.flags = cmd->flags; + + cq = file->device->ib_dev->create_cq(file->device->ib_dev, &attr, + file->ucontext, uhw); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_file; @@ -1397,14 +1399,15 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, goto err_free; memset(&resp, 0, sizeof resp); - resp.cq_handle = obj->uobject.id; - resp.cqe = cq->cqe; + resp.base.cq_handle = obj->uobject.id; + resp.base.cqe = cq->cqe; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_copy; - } + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + + ret = cb(file, obj, &resp, ucore, context); + if (ret) + goto err_cb; mutex_lock(&file->mutex); list_add_tail(&obj->uobject.list, &file->ucontext->cq_list); @@ -1414,9 +1417,9 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, up_write(&obj->uobject.mutex); - return in_len; + return obj; -err_copy: +err_cb: idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); err_free: @@ -1428,7 +1431,106 @@ err_file: err: put_uobj_write(&obj->uobject); - return ret; + + return ERR_PTR(ret); +} + +static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file, + struct ib_ucq_object *obj, + struct ib_uverbs_ex_create_cq_resp *resp, + struct ib_udata *ucore, void *context) +{ + if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) + return -EFAULT; + + return 0; +} + +ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_cq cmd; + struct ib_uverbs_ex_create_cq cmd_ex; + struct ib_uverbs_create_cq_resp resp; + struct ib_udata ucore; + struct ib_udata uhw; + struct ib_ucq_object *obj; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + INIT_UDATA(&ucore, buf, cmd.response, sizeof(cmd), sizeof(resp)); + + INIT_UDATA(&uhw, buf + sizeof(cmd), + (unsigned long)cmd.response + sizeof(resp), + in_len - sizeof(cmd), out_len - sizeof(resp)); + + memset(&cmd_ex, 0, sizeof(cmd_ex)); + cmd_ex.user_handle = cmd.user_handle; + cmd_ex.cqe = cmd.cqe; + cmd_ex.comp_vector = cmd.comp_vector; + cmd_ex.comp_channel = cmd.comp_channel; + + obj = create_cq(file, &ucore, &uhw, &cmd_ex, + offsetof(typeof(cmd_ex), comp_channel) + + sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb, + NULL); + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + return in_len; +} + +static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file, + struct ib_ucq_object *obj, + struct ib_uverbs_ex_create_cq_resp *resp, + struct ib_udata *ucore, void *context) +{ + if (ib_copy_to_udata(ucore, resp, resp->response_length)) + return -EFAULT; + + return 0; +} + +int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_cq_resp resp; + struct ib_uverbs_ex_create_cq cmd; + struct ib_ucq_object *obj; + int err; + + if (ucore->inlen < sizeof(cmd)) + return -EINVAL; + + err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + if (err) + return err; + + if (cmd.comp_mask) + return -EINVAL; + + if (cmd.reserved) + return -EINVAL; + + if (ucore->outlen < (offsetof(typeof(resp), response_length) + + sizeof(resp.response_length))) + return -ENOSPC; + + obj = create_cq(file, ucore, uhw, &cmd, + min(ucore->inlen, sizeof(cmd)), + ib_uverbs_ex_create_cq_cb, NULL); + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + return 0; } ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, @@ -3324,7 +3426,9 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, if (ucore->outlen < resp.response_length) return -ENOSPC; - err = device->query_device(device, &attr); + memset(&attr, 0, sizeof(attr)); + + err = device->query_device(device, &attr, uhw); if (err) return err; @@ -3348,6 +3452,18 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, #endif resp.response_length += sizeof(resp.odp_caps); + if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask)) + goto end; + + resp.timestamp_mask = attr.timestamp_mask; + resp.response_length += sizeof(resp.timestamp_mask); + + if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock)) + goto end; + + resp.hca_core_clock = attr.hca_core_clock; + resp.response_length += sizeof(resp.hca_core_clock); + end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); if (err) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 88cce9b..f6eef2d 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -124,6 +124,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, + [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, }; static void ib_uverbs_add_one(struct ib_device *device); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index f93eb8d..bac3fb4 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -48,6 +48,71 @@ #include "core_priv.h" +static const char * const ib_events[] = { + [IB_EVENT_CQ_ERR] = "CQ error", + [IB_EVENT_QP_FATAL] = "QP fatal error", + [IB_EVENT_QP_REQ_ERR] = "QP request error", + [IB_EVENT_QP_ACCESS_ERR] = "QP access error", + [IB_EVENT_COMM_EST] = "communication established", + [IB_EVENT_SQ_DRAINED] = "send queue drained", + [IB_EVENT_PATH_MIG] = "path migration successful", + [IB_EVENT_PATH_MIG_ERR] = "path migration error", + [IB_EVENT_DEVICE_FATAL] = "device fatal error", + [IB_EVENT_PORT_ACTIVE] = "port active", + [IB_EVENT_PORT_ERR] = "port error", + [IB_EVENT_LID_CHANGE] = "LID change", + [IB_EVENT_PKEY_CHANGE] = "P_key change", + [IB_EVENT_SM_CHANGE] = "SM change", + [IB_EVENT_SRQ_ERR] = "SRQ error", + [IB_EVENT_SRQ_LIMIT_REACHED] = "SRQ limit reached", + [IB_EVENT_QP_LAST_WQE_REACHED] = "last WQE reached", + [IB_EVENT_CLIENT_REREGISTER] = "client reregister", + [IB_EVENT_GID_CHANGE] = "GID changed", +}; + +const char *ib_event_msg(enum ib_event_type event) +{ + size_t index = event; + + return (index < ARRAY_SIZE(ib_events) && ib_events[index]) ? + ib_events[index] : "unrecognized event"; +} +EXPORT_SYMBOL(ib_event_msg); + +static const char * const wc_statuses[] = { + [IB_WC_SUCCESS] = "success", + [IB_WC_LOC_LEN_ERR] = "local length error", + [IB_WC_LOC_QP_OP_ERR] = "local QP operation error", + [IB_WC_LOC_EEC_OP_ERR] = "local EE context operation error", + [IB_WC_LOC_PROT_ERR] = "local protection error", + [IB_WC_WR_FLUSH_ERR] = "WR flushed", + [IB_WC_MW_BIND_ERR] = "memory management operation error", + [IB_WC_BAD_RESP_ERR] = "bad response error", + [IB_WC_LOC_ACCESS_ERR] = "local access error", + [IB_WC_REM_INV_REQ_ERR] = "invalid request error", + [IB_WC_REM_ACCESS_ERR] = "remote access error", + [IB_WC_REM_OP_ERR] = "remote operation error", + [IB_WC_RETRY_EXC_ERR] = "transport retry counter exceeded", + [IB_WC_RNR_RETRY_EXC_ERR] = "RNR retry counter exceeded", + [IB_WC_LOC_RDD_VIOL_ERR] = "local RDD violation error", + [IB_WC_REM_INV_RD_REQ_ERR] = "remote invalid RD request", + [IB_WC_REM_ABORT_ERR] = "operation aborted", + [IB_WC_INV_EECN_ERR] = "invalid EE context number", + [IB_WC_INV_EEC_STATE_ERR] = "invalid EE context state", + [IB_WC_FATAL_ERR] = "fatal error", + [IB_WC_RESP_TIMEOUT_ERR] = "response timeout error", + [IB_WC_GENERAL_ERR] = "general error", +}; + +const char *ib_wc_status_msg(enum ib_wc_status status) +{ + size_t index = status; + + return (index < ARRAY_SIZE(wc_statuses) && wc_statuses[index]) ? + wc_statuses[index] : "unrecognized status"; +} +EXPORT_SYMBOL(ib_wc_status_msg); + __attribute_const__ int ib_rate_to_mult(enum ib_rate rate) { switch (rate) { @@ -192,17 +257,16 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } EXPORT_SYMBOL(ib_create_ah); -int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, - struct ib_grh *grh, struct ib_ah_attr *ah_attr) +int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, + const struct ib_wc *wc, const struct ib_grh *grh, + struct ib_ah_attr *ah_attr) { u32 flow_class; u16 gid_index; int ret; - int is_eth = (rdma_port_get_link_layer(device, port_num) == - IB_LINK_LAYER_ETHERNET); memset(ah_attr, 0, sizeof *ah_attr); - if (is_eth) { + if (rdma_cap_eth_ah(device, port_num)) { if (!(wc->wc_flags & IB_WC_GRH)) return -EPROTOTYPE; @@ -244,8 +308,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, } EXPORT_SYMBOL(ib_init_ah_from_wc); -struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, - struct ib_grh *grh, u8 port_num) +struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, + const struct ib_grh *grh, u8 port_num) { struct ib_ah_attr ah_attr; int ret; @@ -871,7 +935,7 @@ int ib_resolve_eth_l2_attrs(struct ib_qp *qp, union ib_gid sgid; if ((*qp_attr_mask & IB_QP_AV) && - (rdma_port_get_link_layer(qp->device, qp_attr->ah_attr.port_num) == IB_LINK_LAYER_ETHERNET)) { + (rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))) { ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num, qp_attr->ah_attr.grh.sgid_index, &sgid); if (ret) @@ -1012,11 +1076,12 @@ EXPORT_SYMBOL(ib_destroy_qp); struct ib_cq *ib_create_cq(struct ib_device *device, ib_comp_handler comp_handler, void (*event_handler)(struct ib_event *, void *), - void *cq_context, int cqe, int comp_vector) + void *cq_context, + const struct ib_cq_init_attr *cq_attr) { struct ib_cq *cq; - cq = device->create_cq(device, cqe, comp_vector, NULL, NULL); + cq = device->create_cq(device, cq_attr, NULL, NULL); if (!IS_ERR(cq)) { cq->device = device; diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index bdf3507..25c3f00 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -63,13 +63,16 @@ #include "c2_provider.h" #include "c2_user.h" -static int c2_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) +static int c2_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct c2_dev *c2dev = to_c2dev(ibdev); pr_debug("%s:%u\n", __func__, __LINE__); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + *props = c2dev->props; return 0; } @@ -286,13 +289,18 @@ static int c2_destroy_qp(struct ib_qp *ib_qp) return 0; } -static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries, int vector, +static struct ib_cq *c2_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) { + int entries = attr->cqe; struct c2_cq *cq; int err; + if (attr->flags) + return ERR_PTR(-EINVAL); + cq = kmalloc(sizeof(*cq), GFP_KERNEL); if (!cq) { pr_debug("%s: Unable to allocate CQ\n", __func__); @@ -582,9 +590,13 @@ static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) static int c2_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in_mad, + size_t in_mad_size, + struct ib_mad_hdr *out_mad, + size_t *out_mad_size, + u16 *out_mad_pkey_index) { pr_debug("%s:%u\n", __func__, __LINE__); return -ENOSYS; @@ -757,6 +769,23 @@ static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev) return netdev; } +static int c2_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = c2_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + return 0; +} + int c2_register_device(struct c2_dev *dev) { int ret = -ENOMEM; @@ -820,6 +849,7 @@ int c2_register_device(struct c2_dev *dev) dev->ibdev.reg_phys_mr = c2_reg_phys_mr; dev->ibdev.reg_user_mr = c2_reg_user_mr; dev->ibdev.dereg_mr = c2_dereg_mr; + dev->ibdev.get_port_immutable = c2_port_immutable; dev->ibdev.alloc_fmr = NULL; dev->ibdev.unmap_fmr = NULL; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 811b24a..b1b7323 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -85,9 +85,13 @@ static int iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) static int iwch_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in_mad, + size_t in_mad_size, + struct ib_mad_hdr *out_mad, + size_t *out_mad_size, + u16 *out_mad_pkey_index) { return -ENOSYS; } @@ -138,10 +142,12 @@ static int iwch_destroy_cq(struct ib_cq *ib_cq) return 0; } -static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int vector, - struct ib_ucontext *ib_context, - struct ib_udata *udata) +static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_context, + struct ib_udata *udata) { + int entries = attr->cqe; struct iwch_dev *rhp; struct iwch_cq *chp; struct iwch_create_cq_resp uresp; @@ -151,6 +157,9 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve size_t resplen; PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries); + if (attr->flags) + return ERR_PTR(-EINVAL); + rhp = to_iwch_dev(ibdev); chp = kzalloc(sizeof(*chp), GFP_KERNEL); if (!chp) @@ -1145,13 +1154,17 @@ static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev) (fw_mic & 0xffff); } -static int iwch_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) +static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct iwch_dev *dev; + PDBG("%s ibdev %p\n", __func__, ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + dev = to_iwch_dev(ibdev); memset(props, 0, sizeof *props); memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); @@ -1343,6 +1356,23 @@ static struct device_attribute *iwch_class_attributes[] = { &dev_attr_board_id, }; +static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = iwch_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + return 0; +} + int iwch_register_device(struct iwch_dev *dev) { int ret; @@ -1420,6 +1450,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.post_recv = iwch_post_receive; dev->ibdev.get_protocol_stats = iwch_get_mib; dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; + dev->ibdev.get_port_immutable = iwch_port_immutable; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 57176dd..3ad8dc7 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -583,6 +583,22 @@ static void c4iw_record_pm_msg(struct c4iw_ep *ep, sizeof(ep->com.mapped_remote_addr)); } +static int get_remote_addr(struct c4iw_ep *parent_ep, struct c4iw_ep *child_ep) +{ + int ret; + + print_addr(&parent_ep->com, __func__, "get_remote_addr parent_ep "); + print_addr(&child_ep->com, __func__, "get_remote_addr child_ep "); + + ret = iwpm_get_remote_info(&parent_ep->com.mapped_local_addr, + &child_ep->com.mapped_remote_addr, + &child_ep->com.remote_addr, RDMA_NL_C4IW); + if (ret) + PDBG("Unable to find remote peer addr info - err %d\n", ret); + + return ret; +} + static void best_mtu(const unsigned short *mtus, unsigned short mtu, unsigned int *idx, int use_ts, int ipv6) { @@ -675,7 +691,7 @@ static int send_connect(struct c4iw_ep *ep) if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { opt2 |= T5_OPT_2_VALID_F; opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); - opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ + opt2 |= T5_ISS_F; } t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure); @@ -2042,9 +2058,12 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) status, status2errno(status)); if (is_neg_adv(status)) { - dev_warn(&dev->rdev.lldi.pdev->dev, - "Connection problems for atid %u status %u (%s)\n", - atid, status, neg_adv_str(status)); + PDBG("%s Connection problems for atid %u status %u (%s)\n", + __func__, atid, status, neg_adv_str(status)); + ep->stats.connect_neg_adv++; + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.neg_adv++; + mutex_unlock(&dev->rdev.stats.lock); return 0; } @@ -2214,7 +2233,7 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, u32 isn = (prandom_u32() & ~7UL) - 1; opt2 |= T5_OPT_2_VALID_F; opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); - opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ + opt2 |= T5_ISS_F; rpl5 = (void *)rpl; memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16)); if (peer2peer) @@ -2352,27 +2371,57 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) state_set(&child_ep->com, CONNECTING); child_ep->com.dev = dev; child_ep->com.cm_id = NULL; + + /* + * The mapped_local and mapped_remote addresses get setup with + * the actual 4-tuple. The local address will be based on the + * actual local address of the connection, but on the port number + * of the parent listening endpoint. The remote address is + * setup based on a query to the IWPM since we don't know what it + * originally was before mapping. If no mapping was done, then + * mapped_remote == remote, and mapped_local == local. + */ if (iptype == 4) { struct sockaddr_in *sin = (struct sockaddr_in *) - &child_ep->com.local_addr; + &child_ep->com.mapped_local_addr; + sin->sin_family = PF_INET; sin->sin_port = local_port; sin->sin_addr.s_addr = *(__be32 *)local_ip; - sin = (struct sockaddr_in *)&child_ep->com.remote_addr; + + sin = (struct sockaddr_in *)&child_ep->com.local_addr; + sin->sin_family = PF_INET; + sin->sin_port = ((struct sockaddr_in *) + &parent_ep->com.local_addr)->sin_port; + sin->sin_addr.s_addr = *(__be32 *)local_ip; + + sin = (struct sockaddr_in *)&child_ep->com.mapped_remote_addr; sin->sin_family = PF_INET; sin->sin_port = peer_port; sin->sin_addr.s_addr = *(__be32 *)peer_ip; } else { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) - &child_ep->com.local_addr; + &child_ep->com.mapped_local_addr; + sin6->sin6_family = PF_INET6; sin6->sin6_port = local_port; memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); - sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr; + + sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; + sin6->sin6_family = PF_INET6; + sin6->sin6_port = ((struct sockaddr_in6 *) + &parent_ep->com.local_addr)->sin6_port; + memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); + + sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_remote_addr; sin6->sin6_family = PF_INET6; sin6->sin6_port = peer_port; memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16); } + memcpy(&child_ep->com.remote_addr, &child_ep->com.mapped_remote_addr, + sizeof(child_ep->com.remote_addr)); + get_remote_addr(parent_ep, child_ep); + c4iw_get_ep(&parent_ep->com); child_ep->parent_ep = parent_ep; child_ep->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); @@ -2520,9 +2569,13 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) ep = lookup_tid(t, tid); if (is_neg_adv(req->status)) { - dev_warn(&dev->rdev.lldi.pdev->dev, - "Negative advice on abort - tid %u status %d (%s)\n", - ep->hwtid, req->status, neg_adv_str(req->status)); + PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", + __func__, ep->hwtid, req->status, + neg_adv_str(req->status)); + ep->stats.abort_neg_adv++; + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.neg_adv++; + mutex_unlock(&dev->rdev.stats.lock); return 0; } PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, @@ -3571,7 +3624,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, * TP will ignore any value > 0 for MSS index. */ req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF)); - req->cookie = (unsigned long)skb; + req->cookie = (uintptr_t)skb; set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); @@ -3931,9 +3984,11 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } if (is_neg_adv(req->status)) { - dev_warn(&dev->rdev.lldi.pdev->dev, - "Negative advice on abort - tid %u status %d (%s)\n", - ep->hwtid, req->status, neg_adv_str(req->status)); + PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", + __func__, ep->hwtid, req->status, + neg_adv_str(req->status)); + ep->stats.abort_neg_adv++; + dev->rdev.stats.neg_adv++; kfree_skb(skb); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index ab7692a..c7aab48 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -55,7 +55,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, FW_RI_RES_WR_NRES_V(1) | FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (unsigned long) &wr_wait; + res_wr->cookie = (uintptr_t)&wr_wait; res = res_wr->res; res->u.cq.restype = FW_RI_RES_TYPE_CQ; res->u.cq.op = FW_RI_RES_OP_RESET; @@ -125,7 +125,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, FW_RI_RES_WR_NRES_V(1) | FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (unsigned long) &wr_wait; + res_wr->cookie = (uintptr_t)&wr_wait; res = res_wr->res; res->u.cq.restype = FW_RI_RES_TYPE_CQ; res->u.cq.op = FW_RI_RES_OP_WRITE; @@ -158,10 +158,15 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, cq->gen = 1; cq->gts = rdev->lldi.gts_reg; cq->rdev = rdev; - if (user) { - cq->ugts = (u64)pci_resource_start(rdev->lldi.pdev, 2) + - (cq->cqid << rdev->cqshift); - cq->ugts &= PAGE_MASK; + + cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS, + &cq->bar2_qid, + user ? &cq->bar2_pa : NULL); + if (user && !cq->bar2_va) { + pr_warn(MOD "%s: cqid %u not in BAR2 range.\n", + pci_name(rdev->lldi.pdev), cq->cqid); + ret = -EINVAL; + goto err4; } return 0; err4: @@ -859,10 +864,13 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq) return 0; } -struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *ib_context, +struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_context, struct ib_udata *udata) { + int entries = attr->cqe; + int vector = attr->comp_vector; struct c4iw_dev *rhp; struct c4iw_cq *chp; struct c4iw_create_cq_resp uresp; @@ -872,6 +880,8 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, struct c4iw_mm_entry *mm, *mm2; PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries); + if (attr->flags) + return ERR_PTR(-EINVAL); rhp = to_c4iw_dev(ibdev); @@ -964,14 +974,13 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, insert_mmap(ucontext, mm); mm2->key = uresp.gts_key; - mm2->addr = chp->cq.ugts; + mm2->addr = chp->cq.bar2_pa; mm2->len = PAGE_SIZE; insert_mmap(ucontext, mm2); } PDBG("%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n", __func__, chp->cq.cqid, chp, chp->cq.size, - chp->cq.memsize, - (unsigned long long) chp->cq.dma_addr); + chp->cq.memsize, (unsigned long long) chp->cq.dma_addr); return &chp->ibcq; err5: kfree(mm2); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 8fb295e..1a29739 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -93,6 +93,7 @@ static struct ibnl_client_cbs c4iw_nl_cb_table[] = { [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, + [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} }; @@ -151,7 +152,7 @@ static int wr_log_show(struct seq_file *seq, void *v) int prev_ts_set = 0; int idx, end; -#define ts2ns(ts) div64_ul((ts) * dev->rdev.lldi.cclk_ps, 1000) +#define ts2ns(ts) div64_u64((ts) * dev->rdev.lldi.cclk_ps, 1000) idx = atomic_read(&dev->rdev.wr_log_idx) & (dev->rdev.wr_log_size - 1); @@ -489,6 +490,7 @@ static int stats_show(struct seq_file *seq, void *v) dev->rdev.stats.act_ofld_conn_fails); seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n", dev->rdev.stats.pas_ofld_conn_fails); + seq_printf(seq, "NEG_ADV_RCVD: %10llu\n", dev->rdev.stats.neg_adv); seq_printf(seq, "AVAILABLE IRD: %10u\n", dev->avail_ird); return 0; } @@ -560,10 +562,13 @@ static int dump_ep(int id, void *p, void *data) cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p qp %p state %d flags 0x%lx " "history 0x%lx hwtid %d atid %d " + "conn_na %u abort_na %u " "%pI4:%d/%d <-> %pI4:%d/%d\n", ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state, ep->com.flags, ep->com.history, ep->hwtid, ep->atid, + ep->stats.connect_neg_adv, + ep->stats.abort_neg_adv, &lsin->sin_addr, ntohs(lsin->sin_port), ntohs(mapped_lsin->sin_port), &rsin->sin_addr, ntohs(rsin->sin_port), @@ -581,10 +586,13 @@ static int dump_ep(int id, void *p, void *data) cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p qp %p state %d flags 0x%lx " "history 0x%lx hwtid %d atid %d " + "conn_na %u abort_na %u " "%pI6:%d/%d <-> %pI6:%d/%d\n", ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state, ep->com.flags, ep->com.history, ep->hwtid, ep->atid, + ep->stats.connect_neg_adv, + ep->stats.abort_neg_adv, &lsin6->sin6_addr, ntohs(lsin6->sin6_port), ntohs(mapped_lsin6->sin6_port), &rsin6->sin6_addr, ntohs(rsin6->sin6_port), @@ -765,12 +773,29 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) c4iw_init_dev_ucontext(rdev, &rdev->uctx); /* - * qpshift is the number of bits to shift the qpid left in order - * to get the correct address of the doorbell for that qp. + * This implementation assumes udb_density == ucq_density! Eventually + * we might need to support this but for now fail the open. Also the + * cqid and qpid range must match for now. */ - rdev->qpshift = PAGE_SHIFT - ilog2(rdev->lldi.udb_density); + if (rdev->lldi.udb_density != rdev->lldi.ucq_density) { + pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n", + pci_name(rdev->lldi.pdev), rdev->lldi.udb_density, + rdev->lldi.ucq_density); + err = -EINVAL; + goto err1; + } + if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start || + rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) { + pr_err(MOD "%s: unsupported qp and cq id ranges " + "qp start %u size %u cq start %u size %u\n", + pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start, + rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size, + rdev->lldi.vr->cq.size); + err = -EINVAL; + goto err1; + } + rdev->qpmask = rdev->lldi.udb_density - 1; - rdev->cqshift = PAGE_SHIFT - ilog2(rdev->lldi.ucq_density); rdev->cqmask = rdev->lldi.ucq_density - 1; PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d " "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x " @@ -784,14 +809,12 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.start, rdev->lldi.vr->cq.size); - PDBG("udb len 0x%x udb base %llx db_reg %p gts_reg %p qpshift %lu " - "qpmask 0x%x cqshift %lu cqmask 0x%x\n", + PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p " + "qpmask 0x%x cqmask 0x%x\n", (unsigned)pci_resource_len(rdev->lldi.pdev, 2), - (u64)pci_resource_start(rdev->lldi.pdev, 2), - rdev->lldi.db_reg, - rdev->lldi.gts_reg, - rdev->qpshift, rdev->qpmask, - rdev->cqshift, rdev->cqmask); + (void *)pci_resource_start(rdev->lldi.pdev, 2), + rdev->lldi.db_reg, rdev->lldi.gts_reg, + rdev->qpmask, rdev->cqmask); if (c4iw_num_stags(rdev) == 0) { err = -EINVAL; @@ -1355,7 +1378,7 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) t4_sq_host_wq_pidx(&qp->wq), t4_sq_wq_size(&qp->wq)); if (ret) { - pr_err(KERN_ERR MOD "%s: Fatal error - " + pr_err(MOD "%s: Fatal error - " "DB overflow recovery failed - " "error syncing SQ qid %u\n", pci_name(ctx->lldi.pdev), qp->wq.sq.qid); @@ -1371,7 +1394,7 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) t4_rq_wq_size(&qp->wq)); if (ret) { - pr_err(KERN_ERR MOD "%s: Fatal error - " + pr_err(MOD "%s: Fatal error - " "DB overflow recovery failed - " "error syncing RQ qid %u\n", pci_name(ctx->lldi.pdev), qp->wq.rq.qid); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index d87e165..cc77844 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -137,6 +137,7 @@ struct c4iw_stats { u64 tcam_full; u64 act_ofld_conn_fails; u64 pas_ofld_conn_fails; + u64 neg_adv; }; struct c4iw_hw_queue { @@ -164,9 +165,7 @@ struct wr_log_entry { struct c4iw_rdev { struct c4iw_resource resource; - unsigned long qpshift; u32 qpmask; - unsigned long cqshift; u32 cqmask; struct c4iw_dev_ucontext uctx; struct gen_pool *pbl_pool; @@ -814,6 +813,11 @@ struct c4iw_listen_ep { int backlog; }; +struct c4iw_ep_stats { + unsigned connect_neg_adv; + unsigned abort_neg_adv; +}; + struct c4iw_ep { struct c4iw_ep_common com; struct c4iw_ep *parent_ep; @@ -846,6 +850,7 @@ struct c4iw_ep { unsigned int retry_count; int snd_win; int rcv_win; + struct c4iw_ep_stats stats; }; static inline void print_addr(struct c4iw_ep_common *epc, const char *func, @@ -985,10 +990,10 @@ int c4iw_reregister_phys_mem(struct ib_mr *mr, int acc, u64 *iova_start); int c4iw_dereg_mr(struct ib_mr *ib_mr); int c4iw_destroy_cq(struct ib_cq *ib_cq); -struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, - int vector, - struct ib_ucontext *ib_context, - struct ib_udata *udata); +struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_context, + struct ib_udata *udata); int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int c4iw_destroy_qp(struct ib_qp *ib_qp); @@ -1025,6 +1030,9 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe); extern struct cxgb4_client t4c_client; extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; +void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, + enum cxgb4_bar2_qtype qtype, + unsigned int *pbar2_qid, u64 *pbar2_pa); extern void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe); extern int c4iw_wr_log; extern int db_fc_threshold; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 3ef0cf9..cff815b 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -144,7 +144,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, if (i == (num_wqe-1)) { req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) | FW_WR_COMPL_F); - req->wr.wr_lo = (__force __be64)(unsigned long) &wr_wait; + req->wr.wr_lo = (__force __be64)&wr_wait; } else req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR)); req->wr.wr_mid = cpu_to_be32( @@ -676,12 +676,12 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) mhp->attr.zbva = 0; mhp->attr.va_fbo = 0; mhp->attr.page_size = 0; - mhp->attr.len = ~0UL; + mhp->attr.len = ~0ULL; mhp->attr.pbl_size = 0; ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid, FW_RI_STAG_NSMR, mhp->attr.perms, - mhp->attr.mw_bind_enable, 0, 0, ~0UL, 0, 0, 0); + mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0); if (ret) goto err1; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 66bd6a2..62c816a 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -80,9 +80,13 @@ static int c4iw_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags, - u8 port_num, struct ib_wc *in_wc, - struct ib_grh *in_grh, struct ib_mad *in_mad, - struct ib_mad *out_mad) + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in_mad, + size_t in_mad_size, + struct ib_mad_hdr *out_mad, + size_t *out_mad_size, + u16 *out_mad_pkey_index) { return -ENOSYS; } @@ -301,13 +305,17 @@ static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index, return 0; } -static int c4iw_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) +static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct c4iw_dev *dev; + PDBG("%s ibdev %p\n", __func__, ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + dev = to_c4iw_dev(ibdev); memset(props, 0, sizeof *props); memcpy(&props->sys_image_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); @@ -465,6 +473,23 @@ static struct device_attribute *c4iw_class_attributes[] = { &dev_attr_board_id, }; +static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = c4iw_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + return 0; +} + int c4iw_register_device(struct c4iw_dev *dev) { int ret; @@ -542,6 +567,7 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.post_recv = c4iw_post_receive; dev->ibdev.get_protocol_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; + dev->ibdev.get_port_immutable = c4iw_port_immutable; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 15cae5a..6517e12 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -165,6 +165,29 @@ static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, return 0; } +/* + * Determine the BAR2 virtual address and qid. If pbar2_pa is not NULL, + * then this is a user mapping so compute the page-aligned physical address + * for mapping. + */ +void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, + enum cxgb4_bar2_qtype qtype, + unsigned int *pbar2_qid, u64 *pbar2_pa) +{ + u64 bar2_qoffset; + int ret; + + ret = cxgb4_bar2_sge_qregs(rdev->lldi.ports[0], qid, qtype, + pbar2_pa ? 1 : 0, + &bar2_qoffset, pbar2_qid); + if (ret) + return NULL; + + if (pbar2_pa) + *pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK; + return rdev->bar2_kva + bar2_qoffset; +} + static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct t4_cq *rcq, struct t4_cq *scq, struct c4iw_dev_ucontext *uctx) @@ -236,25 +259,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); wq->db = rdev->lldi.db_reg; - wq->gts = rdev->lldi.gts_reg; - if (user || is_t5(rdev->lldi.adapter_type)) { - u32 off; - off = (wq->sq.qid << rdev->qpshift) & PAGE_MASK; - if (user) { - wq->sq.udb = (u64 __iomem *)(rdev->bar2_pa + off); - } else { - off += 128 * (wq->sq.qid & rdev->qpmask) + 8; - wq->sq.udb = (u64 __iomem *)(rdev->bar2_kva + off); - } - off = (wq->rq.qid << rdev->qpshift) & PAGE_MASK; - if (user) { - wq->rq.udb = (u64 __iomem *)(rdev->bar2_pa + off); - } else { - off += 128 * (wq->rq.qid & rdev->qpmask) + 8; - wq->rq.udb = (u64 __iomem *)(rdev->bar2_kva + off); - } + wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS, + &wq->sq.bar2_qid, + user ? &wq->sq.bar2_pa : NULL); + wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, T4_BAR2_QTYPE_EGRESS, + &wq->rq.bar2_qid, + user ? &wq->rq.bar2_pa : NULL); + + /* + * User mode must have bar2 access. + */ + if (user && (!wq->sq.bar2_va || !wq->rq.bar2_va)) { + pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n", + pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid); + goto free_dma; } + wq->rdev = rdev; wq->rq.msn = 1; @@ -275,7 +296,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, FW_RI_RES_WR_NRES_V(2) | FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (unsigned long) &wr_wait; + res_wr->cookie = (uintptr_t)&wr_wait; res = res_wr->res; res->u.sqrq.restype = FW_RI_RES_TYPE_SQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; @@ -336,10 +357,9 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, if (ret) goto free_dma; - PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%lx rqudb 0x%lx\n", + PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n", __func__, wq->sq.qid, wq->rq.qid, wq->db, - (__force unsigned long) wq->sq.udb, - (__force unsigned long) wq->rq.udb); + wq->sq.bar2_va, wq->rq.bar2_va); return 0; free_dma: @@ -1209,7 +1229,7 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, wqe->flowid_len16 = cpu_to_be32( FW_WR_FLOWID_V(ep->hwtid) | FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); - wqe->cookie = (unsigned long) &ep->com.wr_wait; + wqe->cookie = (uintptr_t)&ep->com.wr_wait; wqe->u.fini.type = FW_RI_TYPE_FINI; ret = c4iw_ofld_send(&rhp->rdev, skb); @@ -1279,7 +1299,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) FW_WR_FLOWID_V(qhp->ep->hwtid) | FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); - wqe->cookie = (unsigned long) &qhp->ep->com.wr_wait; + wqe->cookie = (uintptr_t)&qhp->ep->com.wr_wait; wqe->u.init.type = FW_RI_TYPE_INIT; wqe->u.init.mpareqbit_p2ptype = @@ -1766,11 +1786,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); insert_mmap(ucontext, mm2); mm3->key = uresp.sq_db_gts_key; - mm3->addr = (__force unsigned long) qhp->wq.sq.udb; + mm3->addr = (__force unsigned long)qhp->wq.sq.bar2_pa; mm3->len = PAGE_SIZE; insert_mmap(ucontext, mm3); mm4->key = uresp.rq_db_gts_key; - mm4->addr = (__force unsigned long) qhp->wq.rq.udb; + mm4->addr = (__force unsigned long)qhp->wq.rq.bar2_pa; mm4->len = PAGE_SIZE; insert_mmap(ucontext, mm4); if (mm5) { diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 871cdca..274a7ab 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -33,6 +33,7 @@ #include "t4_hw.h" #include "t4_regs.h" +#include "t4_values.h" #include "t4_msg.h" #include "t4fw_ri_api.h" @@ -290,8 +291,10 @@ struct t4_sq { unsigned long phys_addr; struct t4_swsqe *sw_sq; struct t4_swsqe *oldest_read; - u64 __iomem *udb; + void __iomem *bar2_va; + u64 bar2_pa; size_t memsize; + u32 bar2_qid; u32 qid; u16 in_use; u16 size; @@ -314,8 +317,10 @@ struct t4_rq { dma_addr_t dma_addr; DEFINE_DMA_UNMAP_ADDR(mapping); struct t4_swrqe *sw_rq; - u64 __iomem *udb; + void __iomem *bar2_va; + u64 bar2_pa; size_t memsize; + u32 bar2_qid; u32 qid; u32 msn; u32 rqt_hwaddr; @@ -332,7 +337,6 @@ struct t4_wq { struct t4_sq sq; struct t4_rq rq; void __iomem *db; - void __iomem *gts; struct c4iw_rdev *rdev; int flushed; }; @@ -457,15 +461,18 @@ static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5, /* Flush host queue memory writes. */ wmb(); - if (t5) { - if (inc == 1 && wqe) { + if (wq->sq.bar2_va) { + if (inc == 1 && wq->sq.bar2_qid == 0 && wqe) { PDBG("%s: WC wq->sq.pidx = %d\n", __func__, wq->sq.pidx); - pio_copy(wq->sq.udb + 7, (void *)wqe); + pio_copy((u64 __iomem *) + (wq->sq.bar2_va + SGE_UDB_WCDOORBELL), + (u64 *)wqe); } else { PDBG("%s: DB wq->sq.pidx = %d\n", __func__, wq->sq.pidx); - writel(PIDX_T5_V(inc), wq->sq.udb); + writel(PIDX_T5_V(inc) | QID_V(wq->sq.bar2_qid), + wq->sq.bar2_va + SGE_UDB_KDOORBELL); } /* Flush user doorbell area writes. */ @@ -481,15 +488,18 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5, /* Flush host queue memory writes. */ wmb(); - if (t5) { - if (inc == 1 && wqe) { + if (wq->rq.bar2_va) { + if (inc == 1 && wq->rq.bar2_qid == 0 && wqe) { PDBG("%s: WC wq->rq.pidx = %d\n", __func__, wq->rq.pidx); - pio_copy(wq->rq.udb + 7, (void *)wqe); + pio_copy((u64 __iomem *) + (wq->rq.bar2_va + SGE_UDB_WCDOORBELL), + (void *)wqe); } else { PDBG("%s: DB wq->rq.pidx = %d\n", __func__, wq->rq.pidx); - writel(PIDX_T5_V(inc), wq->rq.udb); + writel(PIDX_T5_V(inc) | QID_V(wq->rq.bar2_qid), + wq->rq.bar2_va + SGE_UDB_KDOORBELL); } /* Flush user doorbell area writes. */ @@ -534,11 +544,14 @@ struct t4_cq { DEFINE_DMA_UNMAP_ADDR(mapping); struct t4_cqe *sw_queue; void __iomem *gts; + void __iomem *bar2_va; + u64 bar2_pa; + u32 bar2_qid; struct c4iw_rdev *rdev; - u64 ugts; size_t memsize; __be64 bits_type_ts; u32 cqid; + u32 qid_mask; int vector; u16 size; /* including status page */ u16 cidx; @@ -551,6 +564,15 @@ struct t4_cq { unsigned long flags; }; +static inline void write_gts(struct t4_cq *cq, u32 val) +{ + if (cq->bar2_va) + writel(val | INGRESSQID_V(cq->bar2_qid), + cq->bar2_va + SGE_UDB_GTS); + else + writel(val | INGRESSQID_V(cq->cqid), cq->gts); +} + static inline int t4_clear_cq_armed(struct t4_cq *cq) { return test_and_clear_bit(CQ_ARMED, &cq->flags); @@ -562,14 +584,12 @@ static inline int t4_arm_cq(struct t4_cq *cq, int se) set_bit(CQ_ARMED, &cq->flags); while (cq->cidx_inc > CIDXINC_M) { - val = SEINTARM_V(0) | CIDXINC_V(CIDXINC_M) | TIMERREG_V(7) | - INGRESSQID_V(cq->cqid); - writel(val, cq->gts); + val = SEINTARM_V(0) | CIDXINC_V(CIDXINC_M) | TIMERREG_V(7); + write_gts(cq, val); cq->cidx_inc -= CIDXINC_M; } - val = SEINTARM_V(se) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(6) | - INGRESSQID_V(cq->cqid); - writel(val, cq->gts); + val = SEINTARM_V(se) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(6); + write_gts(cq, val); cq->cidx_inc = 0; return 0; } @@ -600,9 +620,8 @@ static inline void t4_hwcq_consume(struct t4_cq *cq) if (++cq->cidx_inc == (cq->size >> 4) || cq->cidx_inc == CIDXINC_M) { u32 val; - val = SEINTARM_V(0) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(7) | - INGRESSQID_V(cq->cqid); - writel(val, cq->gts); + val = SEINTARM_V(0) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(7); + write_gts(cq, val); cq->cidx_inc = 0; } if (++cq->cidx == cq->size) { diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index 5e53327..343e8daf 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -848,6 +848,8 @@ enum { /* TCP congestion control algorithms */ #define CONG_CNTRL_V(x) ((x) << CONG_CNTRL_S) #define CONG_CNTRL_G(x) (((x) >> CONG_CNTRL_S) & CONG_CNTRL_M) -#define CONG_CNTRL_VALID (1 << 18) +#define T5_ISS_S 18 +#define T5_ISS_V(x) ((x) << T5_ISS_S) +#define T5_ISS_F T5_ISS_V(1U) #endif /* _T4FW_RI_API_H_ */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 8cc8375..9b68b17 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -113,10 +113,12 @@ struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) return ret; } -struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, +struct ib_cq *ehca_create_cq(struct ib_device *device, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) { + int cqe = attr->cqe; static const u32 additional_cqe = 20; struct ib_cq *cq; struct ehca_cq *my_cq; @@ -131,6 +133,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, int ipz_rc, i; unsigned long flags; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) return ERR_PTR(-EINVAL); diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index 9ed4d25..e8b1bb6 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -50,7 +50,8 @@ static unsigned int limit_uint(unsigned int value) return min_t(unsigned int, value, INT_MAX); } -int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { int i, ret = 0; struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, @@ -71,6 +72,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT, }; + if (uhw->inlen || uhw->outlen) + return -EINVAL; + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 22f79af..80e6a3d 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -44,11 +44,15 @@ #include "ehca_classes.h" -int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props); +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw); int ehca_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); +enum rdma_protocol_type +ehca_query_protocol(struct ib_device *device, u8 port_num); + int ehca_query_sma_attr(struct ehca_shca *shca, u8 port, struct ehca_sma_attr *attr); @@ -126,7 +130,8 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq); void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq); -struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, +struct ib_cq *ehca_create_cq(struct ib_device *device, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); @@ -188,9 +193,10 @@ int ehca_dealloc_ucontext(struct ib_ucontext *context); int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, - struct ib_mad *out_mad); + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); void ehca_poll_eqs(unsigned long data); diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index cd8d290..8246418 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -46,6 +46,7 @@ #include <linux/notifier.h> #include <linux/memory.h> +#include <rdma/ib_mad.h> #include "ehca_classes.h" #include "ehca_iverbs.h" #include "ehca_mrmw.h" @@ -431,6 +432,24 @@ init_node_guid1: return ret; } +static int ehca_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = ehca_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + static int ehca_init_device(struct ehca_shca *shca) { int ret; @@ -510,6 +529,7 @@ static int ehca_init_device(struct ehca_shca *shca) shca->ib_device.process_mad = ehca_process_mad; shca->ib_device.mmap = ehca_mmap; shca->ib_device.dma_ops = &ehca_dma_mapping_ops; + shca->ib_device.get_port_immutable = ehca_port_immutable; if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { shca->ib_device.uverbs_cmd_mask |= @@ -534,6 +554,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) struct ib_cq *ibcq; struct ib_qp *ibqp; struct ib_qp_init_attr qp_init_attr; + struct ib_cq_init_attr cq_attr = {}; int ret; if (sport->ibcq_aqp1) { @@ -541,7 +562,9 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) return -EPERM; } - ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), 10, 0); + cq_attr.cqe = 10; + ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), + &cq_attr); if (IS_ERR(ibcq)) { ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); return PTR_ERR(ibcq); diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c index 120aedf..cec1815 100644 --- a/drivers/infiniband/hw/ehca/ehca_mcast.c +++ b/drivers/infiniband/hw/ehca/ehca_mcast.c @@ -77,7 +77,7 @@ int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) return -EINVAL; } - memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid)); + memcpy(&my_gid, gid->raw, sizeof(union ib_gid)); subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); interface_id = be64_to_cpu(my_gid.global.interface_id); @@ -114,7 +114,7 @@ int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) return -EINVAL; } - memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid)); + memcpy(&my_gid, gid->raw, sizeof(union ib_gid)); subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); interface_id = be64_to_cpu(my_gid.global.interface_id); diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c index dba8f9f..12b5bc2 100644 --- a/drivers/infiniband/hw/ehca/ehca_sqp.c +++ b/drivers/infiniband/hw/ehca/ehca_sqp.c @@ -140,10 +140,10 @@ struct vertcfl { } __attribute__ ((packed)); static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in_mad, struct ib_mad *out_mad) { - struct ib_perf *in_perf = (struct ib_perf *)in_mad; + const struct ib_perf *in_perf = (const struct ib_perf *)in_mad; struct ib_perf *out_perf = (struct ib_perf *)out_mad; struct ib_class_port_info *poi = (struct ib_class_port_info *)out_perf->data; @@ -187,8 +187,8 @@ static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, /* if request was globally routed, copy route info */ if (in_grh) { - struct vertcfl *vertcfl = - (struct vertcfl *)&in_grh->version_tclass_flow; + const struct vertcfl *vertcfl = + (const struct vertcfl *)&in_grh->version_tclass_flow; memcpy(poi->redirect_gid, in_grh->dgid.raw, sizeof(poi->redirect_gid)); tcslfl->tc = vertcfl->tc; @@ -217,10 +217,17 @@ perf_reply: } int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { int ret; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + BUG_ON(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad)); if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) return IB_MAD_RESULT_FAILURE; diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig index 1d9bb11..8fe54ff 100644 --- a/drivers/infiniband/hw/ipath/Kconfig +++ b/drivers/infiniband/hw/ipath/Kconfig @@ -9,3 +9,6 @@ config INFINIBAND_IPATH as IP-over-InfiniBand as well as with userspace applications (in conjunction with InfiniBand userspace access). For QLogic PCIe QLE based cards, use the QIB driver instead. + + If you have this hardware you will need to boot with PAT disabled + on your x86-64 systems, use the nopat kernel parameter. diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 0416c6c..e9dd911 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -188,7 +188,7 @@ static void send_complete(unsigned long data) /** * ipath_create_cq - create a completion queue * @ibdev: the device this completion queue is attached to - * @entries: the minimum size of the completion queue + * @attr: creation attributes * @context: unused by the InfiniPath driver * @udata: unused by the InfiniPath driver * @@ -197,16 +197,21 @@ static void send_complete(unsigned long data) * * Called by ib_create_cq() in the generic verbs code. */ -struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector, +struct ib_cq *ipath_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) { + int entries = attr->cqe; struct ipath_ibdev *dev = to_idev(ibdev); struct ipath_cq *cq; struct ipath_cq_wc *wc; struct ib_cq *ret; u32 sz; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (entries < 1 || entries > ib_ipath_max_cqes) { ret = ERR_PTR(-EINVAL); goto done; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index bd0caed..2d7e503 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -42,6 +42,9 @@ #include <linux/bitmap.h> #include <linux/slab.h> #include <linux/module.h> +#ifdef CONFIG_X86_64 +#include <asm/pat.h> +#endif #include "ipath_kernel.h" #include "ipath_verbs.h" @@ -395,6 +398,14 @@ static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) unsigned long long addr; u32 bar0 = 0, bar1 = 0; +#ifdef CONFIG_X86_64 + if (WARN(pat_enabled(), + "ipath needs PAT disabled, boot with nopat kernel parameter\n")) { + ret = -ENODEV; + goto bail; + } +#endif + dd = ipath_alloc_devdata(pdev); if (IS_ERR(dd)) { ret = PTR_ERR(dd); @@ -542,6 +553,7 @@ static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dd->ipath_kregbase = __ioremap(addr, len, (_PAGE_NO_CACHE|_PAGE_WRITETHRU)); #else + /* XXX: split this properly to enable on PAT */ dd->ipath_kregbase = ioremap_nocache(addr, len); #endif @@ -587,12 +599,8 @@ static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ret = ipath_enable_wc(dd); - if (ret) { - ipath_dev_err(dd, "Write combining not enabled " - "(err %d): performance may be poor\n", - -ret); + if (ret) ret = 0; - } ipath_verify_pioperf(dd); diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index e08db70..f0f9471 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -463,9 +463,7 @@ struct ipath_devdata { /* offset in HT config space of slave/primary interface block */ u8 ipath_ht_slave_off; /* for write combining settings */ - unsigned long ipath_wc_cookie; - unsigned long ipath_wc_base; - unsigned long ipath_wc_len; + int wc_cookie; /* ref count for each pkey */ atomic_t ipath_pkeyrefs[4]; /* shadow copy of struct page *'s for exp tid pages */ diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index e890e5b..948188e 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -1257,7 +1257,7 @@ static int recv_pma_set_portcounters_ext(struct ib_pma_mad *pmp, } static int process_subn(struct ib_device *ibdev, int mad_flags, - u8 port_num, struct ib_mad *in_mad, + u8 port_num, const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_smp *smp = (struct ib_smp *)out_mad; @@ -1389,7 +1389,7 @@ bail: } static int process_perf(struct ib_device *ibdev, u8 port_num, - struct ib_mad *in_mad, + const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad; @@ -1490,10 +1490,17 @@ bail: * This is called by the ib_mad module. */ int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { int ret; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + BUG_ON(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad)); switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 44ea939..48253b8 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1495,11 +1495,14 @@ bail: return 0; } -static int ipath_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) +static int ipath_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct ipath_ibdev *dev = to_idev(ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + memset(props, 0, sizeof(*props)); props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | @@ -1980,6 +1983,24 @@ static int disable_timer(struct ipath_devdata *dd) return 0; } +static int ipath_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = ipath_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + /** * ipath_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -2179,6 +2200,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd) dev->process_mad = ipath_process_mad; dev->mmap = ipath_mmap; dev->dma_ops = &ipath_dma_mapping_ops; + dev->get_port_immutable = ipath_port_immutable; snprintf(dev->node_desc, sizeof(dev->node_desc), IPATH_IDSTR " %s", init_utsname()->nodename); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index ae6cff4..ec167e5 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -701,9 +701,11 @@ static inline void ipath_schedule_send(struct ipath_qp *qp) int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad); + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); /* * Compare the lower 24 bits of the two values. @@ -807,7 +809,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); -struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector, +struct ib_cq *ipath_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c index 4ad0b93..7b6e4c8 100644 --- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c +++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c @@ -37,7 +37,6 @@ */ #include <linux/pci.h> -#include <asm/mtrr.h> #include <asm/processor.h> #include "ipath_kernel.h" @@ -122,27 +121,14 @@ int ipath_enable_wc(struct ipath_devdata *dd) } if (!ret) { - int cookie; - ipath_cdbg(VERBOSE, "Setting mtrr for chip to WC " - "(addr %llx, len=0x%llx)\n", - (unsigned long long) pioaddr, - (unsigned long long) piolen); - cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0); - if (cookie < 0) { - { - dev_info(&dd->pcidev->dev, - "mtrr_add() WC for PIO bufs " - "failed (%d)\n", - cookie); - ret = -EINVAL; - } - } else { - ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, " - "cookie is %d\n", cookie); - dd->ipath_wc_cookie = cookie; - dd->ipath_wc_base = (unsigned long) pioaddr; - dd->ipath_wc_len = (unsigned long) piolen; - } + dd->wc_cookie = arch_phys_wc_add(pioaddr, piolen); + if (dd->wc_cookie < 0) { + ipath_dev_err(dd, "Seting mtrr failed on PIO buffers\n"); + ret = -ENODEV; + } else if (dd->wc_cookie == 0) + ipath_cdbg(VERBOSE, "Set mtrr for chip to WC not needed\n"); + else + ipath_cdbg(VERBOSE, "Set mtrr for chip to WC\n"); } return ret; @@ -154,16 +140,5 @@ int ipath_enable_wc(struct ipath_devdata *dd) */ void ipath_disable_wc(struct ipath_devdata *dd) { - if (dd->ipath_wc_cookie) { - int r; - ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n"); - r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base, - dd->ipath_wc_len); - if (r < 0) - dev_info(&dd->pcidev->dev, - "mtrr_del(%lx, %lx, %lx) failed: %d\n", - dd->ipath_wc_cookie, dd->ipath_wc_base, - dd->ipath_wc_len, r); - dd->ipath_wc_cookie = 0; /* even on failure */ - } + arch_phys_wc_del(dd->wc_cookie); } diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 0176caa..36eb3d0 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -166,10 +166,14 @@ err_buf: return err; } -struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, +#define CQ_CREATE_FLAGS_SUPPORTED IB_CQ_FLAGS_TIMESTAMP_COMPLETION +struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) { + int entries = attr->cqe; + int vector = attr->comp_vector; struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_cq *cq; struct mlx4_uar *uar; @@ -178,6 +182,9 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector if (entries < 1 || entries > dev->dev->caps.max_cqes) return ERR_PTR(-EINVAL); + if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED) + return ERR_PTR(-EINVAL); + cq = kmalloc(sizeof *cq, GFP_KERNEL); if (!cq) return ERR_PTR(-ENOMEM); @@ -188,6 +195,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector spin_lock_init(&cq->lock); cq->resize_buf = NULL; cq->resize_umem = NULL; + cq->create_flags = attr->flags; INIT_LIST_HEAD(&cq->send_qp_list); INIT_LIST_HEAD(&cq->recv_qp_list); @@ -231,7 +239,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector vector = dev->eq_table[vector % ibdev->num_comp_vectors]; err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, - cq->db.dma, &cq->mcq, vector, 0, 0); + cq->db.dma, &cq->mcq, vector, 0, + !!(cq->create_flags & IB_CQ_FLAGS_TIMESTAMP_COMPLETION)); if (err) goto err_dbmap; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 9cd2b00..3e2dee4 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -111,8 +111,9 @@ __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx) } int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, - int port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad) + int port, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const void *in_mad, void *response_mad) { struct mlx4_cmd_mailbox *inmailbox, *outmailbox; void *inbox; @@ -220,7 +221,7 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) * Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can * synthesize LID change, Client-Rereg, GID change, and P_Key change events. */ -static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, +static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad *mad, u16 prev_lid) { struct ib_port_info *pinfo; @@ -356,7 +357,7 @@ static void node_desc_override(struct ib_device *dev, } } -static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *mad) +static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, const struct ib_mad *mad) { int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; struct ib_mad_send_buf *send_buf; @@ -366,7 +367,8 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma if (agent) { send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, - IB_MGMT_MAD_DATA, GFP_ATOMIC); + IB_MGMT_MAD_DATA, GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(send_buf)) return; /* @@ -722,8 +724,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, } static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in_mad, struct ib_mad *out_mad) { u16 slid, prev_lid = 0; int err; @@ -825,8 +827,8 @@ static void edit_counter(struct mlx4_counter *cnt, } static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_ib_dev *dev = to_mdev(ibdev); @@ -866,9 +868,17 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + BUG_ON(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad)); + switch (rdma_port_get_link_layer(ibdev, port_num)) { case IB_LINK_LAYER_INFINIBAND: return ib_process_mad(ibdev, mad_flags, port_num, in_wc, @@ -1773,6 +1783,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, int create_tun, struct mlx4_ib_demux_pv_ctx *ctx) { int ret, cq_size; + struct ib_cq_init_attr cq_attr = {}; if (ctx->state != DEMUX_PV_STATE_DOWN) return -EEXIST; @@ -1801,8 +1812,9 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, if (ctx->has_smi) cq_size *= 2; + cq_attr.cqe = cq_size; ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler, - NULL, ctx, cq_size, 0); + NULL, ctx, &cq_attr); if (IS_ERR(ctx->cq)) { ret = PTR_ERR(ctx->cq); pr_err("Couldn't create tunnel CQ (%d)\n", ret); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 57070c5..166da78 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -132,14 +132,35 @@ static int num_ib_ports(struct mlx4_dev *dev) } static int mlx4_ib_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) + struct ib_device_attr *props, + struct ib_udata *uhw) { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; int have_ib_ports; + struct mlx4_uverbs_ex_query_device cmd; + struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0}; + struct mlx4_clock_params clock_params; + if (uhw->inlen) { + if (uhw->inlen < sizeof(cmd)) + return -EINVAL; + + err = ib_copy_from_udata(&cmd, uhw, sizeof(cmd)); + if (err) + return err; + + if (cmd.comp_mask) + return -EINVAL; + + if (cmd.reserved) + return -EINVAL; + } + + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) @@ -229,7 +250,24 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; + props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL; + props->timestamp_mask = 0xFFFFFFFFFFFFULL; + + err = mlx4_get_internal_clock_params(dev->dev, &clock_params); + if (err) + goto out; + + if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) { + resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE; + resp.response_length += sizeof(resp.hca_core_clock_offset); + resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP; + } + if (uhw->outlen) { + err = ib_copy_to_udata(uhw, &resp, resp.response_length); + if (err) + goto out; + } out: kfree(in_mad); kfree(out_mad); @@ -712,8 +750,24 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) dev->dev->caps.num_uars, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; - } else + } else if (vma->vm_pgoff == 3) { + struct mlx4_clock_params params; + int ret = mlx4_get_internal_clock_params(dev->dev, ¶ms); + + if (ret) + return ret; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (io_remap_pfn_range(vma, vma->vm_start, + (pci_resource_start(dev->dev->persist->pdev, + params.bar) + + params.offset) + >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + } else { return -EINVAL; + } return 0; } @@ -758,6 +812,7 @@ static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx4_ib_xrcd *xrcd; + struct ib_cq_init_attr cq_attr = {}; int err; if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) @@ -777,7 +832,8 @@ static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, goto err2; } - xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0); + cq_attr.cqe = 1; + xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr); if (IS_ERR(xrcd->cq)) { err = PTR_ERR(xrcd->cq); goto err3; @@ -1185,7 +1241,6 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, &mflow->reg_id[i].id); if (err) goto err_create_flow; - i++; if (is_bonded) { /* Application always sees one port so the mirror rule * must be on port #2 @@ -1200,6 +1255,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, j++; } + i++; } if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) { @@ -1207,7 +1263,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, &mflow->reg_id[i].id); if (err) goto err_create_flow; - i++; + if (is_bonded) { flow_attr->port = 2; err = mlx4_ib_tunnel_steer_add(qp, flow_attr, @@ -1218,6 +1274,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, j++; } /* function to create mirror rule */ + i++; } return &mflow->ibflow; @@ -1569,8 +1626,7 @@ static void reset_gids_task(struct work_struct *work) MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); if (err) - pr_warn(KERN_WARNING - "set port %d command failed\n", gw->port); + pr_warn("set port %d command failed\n", gw->port); } mlx4_free_cmd_mailbox(dev, mailbox); @@ -2115,6 +2171,29 @@ static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) kfree(ibdev->eq_table); } +static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = mlx4_ib_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + + if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + else + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + static void *mlx4_ib_add(struct mlx4_dev *dev) { struct mlx4_ib_dev *ibdev; @@ -2242,6 +2321,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; + ibdev->ib_dev.get_port_immutable = mlx4_port_immutable; if (!mlx4_is_slave(ibdev->dev)) { ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; @@ -2279,6 +2359,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); } + ibdev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ); + mlx4_ib_alloc_eqs(dev, ibdev); spin_lock_init(&iboe->lock); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index fce39343..7933adf 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -110,6 +110,7 @@ struct mlx4_ib_cq { struct mutex resize_mutex; struct ib_umem *umem; struct ib_umem *resize_umem; + int create_flags; /* List of qps that it serves.*/ struct list_head send_qp_list; struct list_head recv_qp_list; @@ -555,6 +556,21 @@ struct mlx4_ib_qp_tunnel_init_attr { u8 port; }; +struct mlx4_uverbs_ex_query_device { + __u32 comp_mask; + __u32 reserved; +}; + +enum query_device_resp_mask { + QUERY_DEVICE_RESP_MASK_TIMESTAMP = 1UL << 0, +}; + +struct mlx4_uverbs_ex_query_device_resp { + __u32 comp_mask; + __u32 response_length; + __u64 hca_core_clock_offset; +}; + static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx4_ib_dev, ib_dev); @@ -668,7 +684,8 @@ void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); -struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, +struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); int mlx4_ib_destroy_cq(struct ib_cq *cq); @@ -706,11 +723,13 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, - int port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad); + int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const void *in_mad, void *response_mad); int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad); + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); int mlx4_ib_mad_init(struct mlx4_ib_dev *dev); void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 2ee6b10..09fbae6 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -736,10 +736,13 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) mlx5_db_free(dev->mdev, &cq->db); } -struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *context, +struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, struct ib_udata *udata) { + int entries = attr->cqe; + int vector = attr->comp_vector; struct mlx5_create_cq_mbox_in *cqb = NULL; struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_cq *cq; @@ -750,6 +753,9 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, int eqn; int err; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (entries < 0) return ERR_PTR(-EINVAL); diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 9cf9a37..8e45714 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -41,8 +41,8 @@ enum { }; int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad) + u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const void *in_mad, void *response_mad) { u8 op_modifier = 0; @@ -58,11 +58,18 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, } int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { u16 slid; int err; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + BUG_ON(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad)); slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 57c9809..c6cb26e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -63,7 +63,8 @@ static char mlx5_version[] = DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; static int mlx5_ib_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) + struct ib_device_attr *props, + struct ib_udata *uhw) { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct ib_smp *in_mad = NULL; @@ -74,6 +75,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, int max_sq_sg; u64 flags; + if (uhw->inlen || uhw->outlen) + return -EINVAL; + gen = &dev->mdev->caps.gen; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); @@ -910,6 +914,7 @@ static int get_port_caps(struct mlx5_ib_dev *dev) struct mlx5_general_caps *gen; int err = -ENOMEM; int port; + struct ib_udata uhw = {.inlen = 0, .outlen = 0}; gen = &dev->mdev->caps.gen; pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); @@ -920,7 +925,7 @@ static int get_port_caps(struct mlx5_ib_dev *dev) if (!dprops) goto out; - err = mlx5_ib_query_device(&dev->ib_dev, dprops); + err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw); if (err) { mlx5_ib_warn(dev, "query_device failed %d\n", err); goto out; @@ -971,6 +976,7 @@ static int create_umr_res(struct mlx5_ib_dev *dev) struct ib_cq *cq; struct ib_qp *qp; struct ib_mr *mr; + struct ib_cq_init_attr cq_attr = {}; int ret; attr = kzalloc(sizeof(*attr), GFP_KERNEL); @@ -994,8 +1000,9 @@ static int create_umr_res(struct mlx5_ib_dev *dev) goto error_1; } - cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, 128, - 0); + cq_attr.cqe = 128; + cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, + &cq_attr); if (IS_ERR(cq)) { mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); ret = PTR_ERR(cq); @@ -1087,6 +1094,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) { struct ib_srq_init_attr attr; struct mlx5_ib_dev *dev; + struct ib_cq_init_attr cq_attr = {.cqe = 1}; int ret = 0; dev = container_of(devr, struct mlx5_ib_dev, devr); @@ -1100,7 +1108,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->p0->uobject = NULL; atomic_set(&devr->p0->usecnt, 0); - devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL); + devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL); if (IS_ERR(devr->c0)) { ret = PTR_ERR(devr->c0); goto error1; @@ -1182,6 +1190,24 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) mlx5_ib_dealloc_pd(devr->p0); } +static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = mlx5_ib_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; @@ -1285,6 +1311,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; + dev->ib_dev.get_port_immutable = mlx5_port_immutable; mlx5_ib_internal_query_odp_caps(dev); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index dff1cfc..178314e 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -525,8 +525,8 @@ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq) void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, - u8 port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad); + u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const void *in_mad, void *response_mad); struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, struct mlx5_ib_ah *ah); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); @@ -556,8 +556,9 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, void *buffer, u32 length); -struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *context, +struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, struct ib_udata *udata); int mlx5_ib_destroy_cq(struct ib_cq *cq); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); @@ -586,8 +587,10 @@ int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int mlx5_ib_unmap_fmr(struct list_head *fmr_list); int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad); + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 4d7024b..d35f62d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1392,7 +1392,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, if (ah->ah_flags & IB_AH_GRH) { if (ah->grh.sgid_index >= gen->port[port - 1].gid_table_len) { - pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n", + pr_err("sgid_index (%u) too large. max is %d\n", ah->grh.sgid_index, gen->port[port - 1].gid_table_len); return -EINVAL; } diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 9d3e5c1..c7f49bb 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1858,8 +1858,8 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn) } int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, - int port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad) + int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const void *in_mad, void *response_mad) { struct mthca_mailbox *inmailbox, *outmailbox; void *inbox; diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index f952244..d2e5b19 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -312,8 +312,8 @@ int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, struct mthca_mailbox *mailbox); int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn); int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, - int port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad); + int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const void *in_mad, void *response_mad); int mthca_READ_MGM(struct mthca_dev *dev, int index, struct mthca_mailbox *mailbox); int mthca_WRITE_MGM(struct mthca_dev *dev, int index, diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 7e6a6d6..4393a022 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -576,10 +576,11 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, - struct ib_mad *out_mad); + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); int mthca_create_agents(struct mthca_dev *dev); void mthca_free_agents(struct mthca_dev *dev); diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 8881fa3..6b2418b 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -104,7 +104,7 @@ static void update_sm_ah(struct mthca_dev *dev, */ static void smp_snoop(struct ib_device *ibdev, u8 port_num, - struct ib_mad *mad, + const struct ib_mad *mad, u16 prev_lid) { struct ib_event event; @@ -160,7 +160,7 @@ static void node_desc_override(struct ib_device *dev, static void forward_trap(struct mthca_dev *dev, u8 port_num, - struct ib_mad *mad) + const struct ib_mad *mad) { int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; struct ib_mad_send_buf *send_buf; @@ -170,7 +170,8 @@ static void forward_trap(struct mthca_dev *dev, if (agent) { send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, - IB_MGMT_MAD_DATA, GFP_ATOMIC); + IB_MGMT_MAD_DATA, GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(send_buf)) return; /* @@ -195,15 +196,21 @@ static void forward_trap(struct mthca_dev *dev, int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, - struct ib_mad *out_mad) + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { int err; u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); u16 prev_lid = 0; struct ib_port_attr pattr; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + BUG_ON(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad)); /* Forward locally generated traps to the SM */ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index 8edb28a..15d0644 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -77,7 +77,6 @@ s64 mthca_make_profile(struct mthca_dev *dev, u64 mem_base, mem_avail; s64 total_size = 0; struct mthca_resource *profile; - struct mthca_resource tmp; int i, j; profile = kzalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL); @@ -136,11 +135,8 @@ s64 mthca_make_profile(struct mthca_dev *dev, */ for (i = MTHCA_RES_NUM; i > 0; --i) for (j = 1; j < i; ++j) { - if (profile[j].size > profile[j - 1].size) { - tmp = profile[j]; - profile[j] = profile[j - 1]; - profile[j - 1] = tmp; - } + if (profile[j].size > profile[j - 1].size) + swap(profile[j], profile[j - 1]); } for (i = 0; i < MTHCA_RES_NUM; ++i) { diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 415f8e1..93ae51d 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -57,14 +57,17 @@ static void init_query_mad(struct ib_smp *mad) mad->method = IB_MGMT_METHOD_GET; } -static int mthca_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) +static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; struct mthca_dev *mdev = to_mdev(ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) @@ -641,16 +644,20 @@ static int mthca_destroy_qp(struct ib_qp *qp) return 0; } -static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, - int comp_vector, +static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata) { + int entries = attr->cqe; struct mthca_create_cq ucmd; struct mthca_cq *cq; int nent; int err; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) return ERR_PTR(-EINVAL); @@ -1244,6 +1251,24 @@ out: return err; } +static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = mthca_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + int mthca_register_device(struct mthca_dev *dev) { int ret; @@ -1323,6 +1348,7 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr; dev->ib_dev.reg_user_mr = mthca_reg_user_mr; dev->ib_dev.dereg_mr = mthca_dereg_mr; + dev->ib_dev.get_port_immutable = mthca_port_immutable; if (dev->mthca_flags & MTHCA_FLAG_FMR) { dev->ib_dev.alloc_fmr = mthca_alloc_fmr; diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 3b2a6dc..9f9d5c5 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -116,6 +116,7 @@ static struct ibnl_client_cbs nes_nl_cb_table[] = { [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, + [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 6f09a72..9047af4 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -596,27 +596,52 @@ static void nes_form_reg_msg(struct nes_vnic *nesvnic, memcpy(pm_msg->if_name, nesvnic->netdev->name, IWPM_IFNAME_SIZE); } +static void record_sockaddr_info(struct sockaddr_storage *addr_info, + nes_addr_t *ip_addr, u16 *port_num) +{ + struct sockaddr_in *in_addr = (struct sockaddr_in *)addr_info; + + if (in_addr->sin_family == AF_INET) { + *ip_addr = ntohl(in_addr->sin_addr.s_addr); + *port_num = ntohs(in_addr->sin_port); + } +} + /* * nes_record_pm_msg - Save the received mapping info */ static void nes_record_pm_msg(struct nes_cm_info *cm_info, struct iwpm_sa_data *pm_msg) { - struct sockaddr_in *mapped_loc_addr = - (struct sockaddr_in *)&pm_msg->mapped_loc_addr; - struct sockaddr_in *mapped_rem_addr = - (struct sockaddr_in *)&pm_msg->mapped_rem_addr; - - if (mapped_loc_addr->sin_family == AF_INET) { - cm_info->mapped_loc_addr = - ntohl(mapped_loc_addr->sin_addr.s_addr); - cm_info->mapped_loc_port = ntohs(mapped_loc_addr->sin_port); - } - if (mapped_rem_addr->sin_family == AF_INET) { - cm_info->mapped_rem_addr = - ntohl(mapped_rem_addr->sin_addr.s_addr); - cm_info->mapped_rem_port = ntohs(mapped_rem_addr->sin_port); - } + record_sockaddr_info(&pm_msg->mapped_loc_addr, + &cm_info->mapped_loc_addr, &cm_info->mapped_loc_port); + + record_sockaddr_info(&pm_msg->mapped_rem_addr, + &cm_info->mapped_rem_addr, &cm_info->mapped_rem_port); +} + +/* + * nes_get_reminfo - Get the address info of the remote connecting peer + */ +static int nes_get_remote_addr(struct nes_cm_node *cm_node) +{ + struct sockaddr_storage mapped_loc_addr, mapped_rem_addr; + struct sockaddr_storage remote_addr; + int ret; + + nes_create_sockaddr(htonl(cm_node->mapped_loc_addr), + htons(cm_node->mapped_loc_port), &mapped_loc_addr); + nes_create_sockaddr(htonl(cm_node->mapped_rem_addr), + htons(cm_node->mapped_rem_port), &mapped_rem_addr); + + ret = iwpm_get_remote_info(&mapped_loc_addr, &mapped_rem_addr, + &remote_addr, RDMA_NL_NES); + if (ret) + nes_debug(NES_DBG_CM, "Unable to find remote peer address info\n"); + else + record_sockaddr_info(&remote_addr, &cm_node->rem_addr, + &cm_node->rem_port); + return ret; } /** @@ -1566,9 +1591,14 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, return NULL; /* set our node specific transport info */ - cm_node->loc_addr = cm_info->loc_addr; + if (listener) { + cm_node->loc_addr = listener->loc_addr; + cm_node->loc_port = listener->loc_port; + } else { + cm_node->loc_addr = cm_info->loc_addr; + cm_node->loc_port = cm_info->loc_port; + } cm_node->rem_addr = cm_info->rem_addr; - cm_node->loc_port = cm_info->loc_port; cm_node->rem_port = cm_info->rem_port; cm_node->mapped_loc_addr = cm_info->mapped_loc_addr; @@ -1586,6 +1616,8 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, &cm_node->loc_addr, cm_node->loc_port, &cm_node->rem_addr, cm_node->rem_port); cm_node->listener = listener; + if (listener) + cm_node->tos = listener->tos; cm_node->netdev = nesvnic->netdev; cm_node->cm_id = cm_info->cm_id; memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN); @@ -2151,6 +2183,7 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, cm_node->state = NES_CM_STATE_ESTABLISHED; if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + nes_get_remote_addr(cm_node); handle_rcv_mpa(cm_node, skb); } else { /* rcvd ACK only */ dev_kfree_skb_any(skb); @@ -2907,6 +2940,9 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod nesqp->nesqp_context->misc2 |= cpu_to_le32(64 << NES_QPCONTEXT_MISC2_TTL_SHIFT); + nesqp->nesqp_context->misc2 |= cpu_to_le32( + cm_node->tos << NES_QPCONTEXT_MISC2_TOS_SHIFT); + nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16); nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32( @@ -3581,6 +3617,7 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->ord_size = 1; cm_node->apbvt_set = apbvt_set; + cm_node->tos = cm_id->tos; nesqp->cm_node = cm_node; cm_node->nesqp = nesqp; nes_add_ref(&nesqp->ibqp); @@ -3635,6 +3672,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) } cm_id->provider_data = cm_node; + cm_node->tos = cm_id->tos; if (!cm_node->reused_node) { if (nes_create_mapinfo(&cm_info)) diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index f522cf6..32a6420 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -303,6 +303,7 @@ struct nes_cm_listener { int backlog; enum nes_cm_listener_state listener_state; u32 reused_node; + u8 tos; }; /* per connection node and node state information */ @@ -352,6 +353,7 @@ struct nes_cm_node { struct list_head reset_entry; struct nes_qp *nesqp; atomic_t passive_state; + u8 tos; }; /* structure for client or CM to fill when making CM api calls. */ diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index c0d0296..fbc43e5 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -512,12 +512,16 @@ static void nes_free_fast_reg_page_list(struct ib_fast_reg_page_list *pifrpl) /** * nes_query_device */ -static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *props) +static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct nes_vnic *nesvnic = to_nesvnic(ibdev); struct nes_device *nesdev = nesvnic->nesdev; struct nes_ib_device *nesibdev = nesvnic->nesibdev; + if (uhw->inlen || uhw->outlen) + return -EINVAL; + memset(props, 0, sizeof(*props)); memcpy(&props->sys_image_guid, nesvnic->netdev->dev_addr, 6); @@ -606,7 +610,6 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr return 0; } - /** * nes_query_pkey */ @@ -1527,10 +1530,12 @@ static int nes_destroy_qp(struct ib_qp *ibqp) /** * nes_create_cq */ -static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, - int comp_vector, - struct ib_ucontext *context, struct ib_udata *udata) +static struct ib_cq *nes_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata) { + int entries = attr->cqe; u64 u64temp; struct nes_vnic *nesvnic = to_nesvnic(ibdev); struct nes_device *nesdev = nesvnic->nesdev; @@ -1550,6 +1555,9 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, unsigned long flags; int ret; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (entries > nesadapter->max_cqe) return ERR_PTR(-EINVAL); @@ -3222,8 +3230,10 @@ static int nes_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) * nes_process_mad */ static int nes_process_mad(struct ib_device *ibdev, int mad_flags, - u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { nes_debug(NES_DBG_INIT, "\n"); return -ENOSYS; @@ -3828,6 +3838,22 @@ static int nes_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_ return 0; } +static int nes_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = nes_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + return 0; +} /** * nes_init_ofa_device @@ -3928,6 +3954,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.iwcm->reject = nes_reject; nesibdev->ibdev.iwcm->create_listen = nes_create_listen; nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; + nesibdev->ibdev.get_port_immutable = nes_port_immutable; return nesibdev; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index c9780d9..b396344 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -40,7 +40,7 @@ #include <be_roce.h> #include "ocrdma_sli.h" -#define OCRDMA_ROCE_DRV_VERSION "10.4.205.0u" +#define OCRDMA_ROCE_DRV_VERSION "10.6.0.0" #define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver" #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" @@ -515,6 +515,8 @@ static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev, memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6)); if (rdma_is_multicast_addr(&in6)) rdma_get_mcast_mac(&in6, mac_addr); + else if (rdma_link_local_addr(&in6)) + rdma_get_ll_mac(&in6, mac_addr); else memcpy(mac_addr, ah_attr->dmac, ETH_ALEN); return 0; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index d812904..4bafa15 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -56,7 +56,13 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, vlan_tag = attr->vlan_id; if (!vlan_tag || (vlan_tag > 0xFFF)) vlan_tag = dev->pvid; - if (vlan_tag && (vlan_tag < 0x1000)) { + if (vlan_tag || dev->pfc_state) { + if (!vlan_tag) { + pr_err("ocrdma%d:Using VLAN with PFC is recommended\n", + dev->id); + pr_err("ocrdma%d:Using VLAN 0 for this connection\n", + dev->id); + } eth.eth_type = cpu_to_be16(0x8100); eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT; @@ -121,7 +127,9 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) goto av_conf_err; } - if (pd->uctx) { + if ((pd->uctx) && + (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) && + (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) { status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid, attr->dmac, &attr->vlan_id); if (status) { @@ -196,12 +204,19 @@ int ocrdma_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) int ocrdma_process_mad(struct ib_device *ibdev, int process_mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { int status; struct ocrdma_dev *dev; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + BUG_ON(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad)); switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_PERF_MGMT: diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 726a87c..cf366fe 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -42,7 +42,9 @@ int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *); int ocrdma_process_mad(struct ib_device *, int process_mad_flags, u8 port_num, - struct ib_wc *in_wc, - struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad); + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); #endif /* __OCRDMA_AH_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 0c9e959..47615ff 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -933,12 +933,18 @@ static irqreturn_t ocrdma_irq_handler(int irq, void *handle) struct ocrdma_eqe eqe; struct ocrdma_eqe *ptr; u16 cq_id; + u8 mcode; int budget = eq->cq_cnt; do { ptr = ocrdma_get_eqe(eq); eqe = *ptr; ocrdma_le32_to_cpu(&eqe, sizeof(eqe)); + mcode = (eqe.id_valid & OCRDMA_EQE_MAJOR_CODE_MASK) + >> OCRDMA_EQE_MAJOR_CODE_SHIFT; + if (mcode == OCRDMA_MAJOR_CODE_SENTINAL) + pr_err("EQ full on eqid = 0x%x, eqe = 0x%x\n", + eq->q.id, eqe.id_valid); if ((eqe.id_valid & OCRDMA_EQE_VALID_MASK) == 0) break; @@ -1434,27 +1440,30 @@ static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev) struct ocrdma_alloc_pd_range_rsp *rsp; /* Pre allocate the DPP PDs */ - cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_PD_RANGE, sizeof(*cmd)); - if (!cmd) - return -ENOMEM; - cmd->pd_count = dev->attr.max_dpp_pds; - cmd->enable_dpp_rsvd |= OCRDMA_ALLOC_PD_ENABLE_DPP; - status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); - if (status) - goto mbx_err; - rsp = (struct ocrdma_alloc_pd_range_rsp *)cmd; - - if ((rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RSP_DPP) && rsp->pd_count) { - dev->pd_mgr->dpp_page_index = rsp->dpp_page_pdid >> - OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT; - dev->pd_mgr->pd_dpp_start = rsp->dpp_page_pdid & - OCRDMA_ALLOC_PD_RNG_RSP_START_PDID_MASK; - dev->pd_mgr->max_dpp_pd = rsp->pd_count; - pd_bitmap_size = BITS_TO_LONGS(rsp->pd_count) * sizeof(long); - dev->pd_mgr->pd_dpp_bitmap = kzalloc(pd_bitmap_size, - GFP_KERNEL); + if (dev->attr.max_dpp_pds) { + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_PD_RANGE, + sizeof(*cmd)); + if (!cmd) + return -ENOMEM; + cmd->pd_count = dev->attr.max_dpp_pds; + cmd->enable_dpp_rsvd |= OCRDMA_ALLOC_PD_ENABLE_DPP; + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + rsp = (struct ocrdma_alloc_pd_range_rsp *)cmd; + + if (!status && (rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RSP_DPP) && + rsp->pd_count) { + dev->pd_mgr->dpp_page_index = rsp->dpp_page_pdid >> + OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT; + dev->pd_mgr->pd_dpp_start = rsp->dpp_page_pdid & + OCRDMA_ALLOC_PD_RNG_RSP_START_PDID_MASK; + dev->pd_mgr->max_dpp_pd = rsp->pd_count; + pd_bitmap_size = + BITS_TO_LONGS(rsp->pd_count) * sizeof(long); + dev->pd_mgr->pd_dpp_bitmap = kzalloc(pd_bitmap_size, + GFP_KERNEL); + } + kfree(cmd); } - kfree(cmd); cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_PD_RANGE, sizeof(*cmd)); if (!cmd) @@ -1462,10 +1471,8 @@ static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev) cmd->pd_count = dev->attr.max_pd - dev->attr.max_dpp_pds; status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); - if (status) - goto mbx_err; rsp = (struct ocrdma_alloc_pd_range_rsp *)cmd; - if (rsp->pd_count) { + if (!status && rsp->pd_count) { dev->pd_mgr->pd_norm_start = rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RNG_RSP_START_PDID_MASK; dev->pd_mgr->max_normal_pd = rsp->pd_count; @@ -1473,15 +1480,13 @@ static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev) dev->pd_mgr->pd_norm_bitmap = kzalloc(pd_bitmap_size, GFP_KERNEL); } + kfree(cmd); if (dev->pd_mgr->pd_norm_bitmap || dev->pd_mgr->pd_dpp_bitmap) { /* Enable PD resource manager */ dev->pd_mgr->pd_prealloc_valid = true; - } else { - return -ENOMEM; + return 0; } -mbx_err: - kfree(cmd); return status; } @@ -2406,7 +2411,7 @@ int ocrdma_mbx_query_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp, struct ocrdma_query_qp *cmd; struct ocrdma_query_qp_rsp *rsp; - cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_QP, sizeof(*cmd)); + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_QP, sizeof(*rsp)); if (!cmd) return status; cmd->qp_id = qp->id; @@ -2428,7 +2433,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, int status; struct ib_ah_attr *ah_attr = &attrs->ah_attr; union ib_gid sgid, zgid; - u32 vlan_id; + u32 vlan_id = 0xFFFF; u8 mac_addr[6]; struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); @@ -2468,12 +2473,22 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8); if (attr_mask & IB_QP_VID) { vlan_id = attrs->vlan_id; + } else if (dev->pfc_state) { + vlan_id = 0; + pr_err("ocrdma%d:Using VLAN with PFC is recommended\n", + dev->id); + pr_err("ocrdma%d:Using VLAN 0 for this connection\n", + dev->id); + } + + if (vlan_id < 0x1000) { cmd->params.vlan_dmac_b4_to_b5 |= vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT; cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID; cmd->params.rnt_rc_sl_fl |= (dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT; } + return 0; } @@ -2519,8 +2534,10 @@ static int ocrdma_set_qp_params(struct ocrdma_qp *qp, cmd->flags |= OCRDMA_QP_PARA_DST_QPN_VALID; } if (attr_mask & IB_QP_PATH_MTU) { - if (attrs->path_mtu < IB_MTU_256 || + if (attrs->path_mtu < IB_MTU_512 || attrs->path_mtu > IB_MTU_4096) { + pr_err("ocrdma%d: IB MTU %d is not supported\n", + dev->id, ib_mtu_enum_to_int(attrs->path_mtu)); status = -EINVAL; goto pmtu_err; } @@ -3147,9 +3164,9 @@ void ocrdma_cleanup_hw(struct ocrdma_dev *dev) ocrdma_free_pd_pool(dev); ocrdma_mbx_delete_ah_tbl(dev); - /* cleanup the eqs */ - ocrdma_destroy_eqs(dev); - /* cleanup the control path */ ocrdma_destroy_mq(dev); + + /* cleanup the eqs */ + ocrdma_destroy_eqs(dev); } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 7a2b59a..8a1398b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -30,6 +30,7 @@ #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_addr.h> +#include <rdma/ib_mad.h> #include <linux/netdevice.h> #include <net/addrconf.h> @@ -202,6 +203,24 @@ static enum rdma_link_layer ocrdma_link_layer(struct ib_device *device, return IB_LINK_LAYER_ETHERNET; } +static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = ocrdma_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + static int ocrdma_register_device(struct ocrdma_dev *dev) { strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX); @@ -286,6 +305,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.dma_device = &dev->nic_info.pdev->dev; dev->ibdev.process_mad = ocrdma_process_mad; + dev->ibdev.get_port_immutable = ocrdma_port_immutable; if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { dev->ibdev.uverbs_cmd_mask |= diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 243c87c..02ad0ae 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -1176,6 +1176,8 @@ struct ocrdma_query_qp_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; struct ocrdma_qp_params params; + u32 dpp_credits_cqid; + u32 rbq_id; }; enum { @@ -1624,12 +1626,19 @@ struct ocrdma_delete_ah_tbl_rsp { enum { OCRDMA_EQE_VALID_SHIFT = 0, OCRDMA_EQE_VALID_MASK = BIT(0), + OCRDMA_EQE_MAJOR_CODE_MASK = 0x0E, + OCRDMA_EQE_MAJOR_CODE_SHIFT = 0x01, OCRDMA_EQE_FOR_CQE_MASK = 0xFFFE, OCRDMA_EQE_RESOURCE_ID_SHIFT = 16, OCRDMA_EQE_RESOURCE_ID_MASK = 0xFFFF << OCRDMA_EQE_RESOURCE_ID_SHIFT, }; +enum major_code { + OCRDMA_MAJOR_CODE_COMPLETION = 0x00, + OCRDMA_MAJOR_CODE_SENTINAL = 0x01 +}; + struct ocrdma_eqe { u32 id_valid; }; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 8771755..5bb61eb 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -61,10 +61,14 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port, return 0; } -int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) +int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, + struct ib_udata *uhw) { struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + memset(attr, 0, sizeof *attr); memcpy(&attr->fw_ver, &dev->attr.fw_ver[0], min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver))); @@ -365,7 +369,7 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, if (!pd) return ERR_PTR(-ENOMEM); - if (udata && uctx) { + if (udata && uctx && dev->attr.max_dpp_pds) { pd->dpp_enabled = ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R; pd->num_dpp_qp = @@ -375,7 +379,12 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, if (dev->pd_mgr->pd_prealloc_valid) { status = ocrdma_get_pd_num(dev, pd); - return (status == 0) ? pd : ERR_PTR(status); + if (status == 0) { + return pd; + } else { + kfree(pd); + return ERR_PTR(status); + } } retry: @@ -679,7 +688,6 @@ err: ocrdma_release_ucontext_pd(uctx); } else { status = _ocrdma_dealloc_pd(dev, pd); - kfree(pd); } exit: return ERR_PTR(status); @@ -1000,10 +1008,12 @@ err: return status; } -struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, +struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, struct ib_ucontext *ib_ctx, struct ib_udata *udata) { + int entries = attr->cqe; struct ocrdma_cq *cq; struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); struct ocrdma_ucontext *uctx = NULL; @@ -1011,6 +1021,9 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, int status; struct ocrdma_create_cq_ureq ureq; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (udata) { if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) return ERR_PTR(-EFAULT); @@ -1721,18 +1734,20 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp) struct ocrdma_qp *qp; struct ocrdma_dev *dev; struct ib_qp_attr attrs; - int attr_mask = IB_QP_STATE; + int attr_mask; unsigned long flags; qp = get_ocrdma_qp(ibqp); dev = get_ocrdma_dev(ibqp->device); - attrs.qp_state = IB_QPS_ERR; pd = qp->pd; /* change the QP state to ERROR */ - _ocrdma_modify_qp(ibqp, &attrs, attr_mask); - + if (qp->state != OCRDMA_QPS_RST) { + attrs.qp_state = IB_QPS_ERR; + attr_mask = IB_QP_STATE; + _ocrdma_modify_qp(ibqp, &attrs, attr_mask); + } /* ensure that CQEs for newly created QP (whose id may be same with * one which just getting destroyed are same), dont get * discarded until the old CQEs are discarded. diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index b8f7853..b15c608 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -36,11 +36,15 @@ int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *, int ocrdma_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags); -int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props); +int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props, + struct ib_udata *uhw); int ocrdma_query_port(struct ib_device *, u8 port, struct ib_port_attr *props); int ocrdma_modify_port(struct ib_device *, u8 port, int mask, struct ib_port_modify *props); +enum rdma_protocol_type +ocrdma_query_protocol(struct ib_device *device, u8 port_num); + void ocrdma_get_guid(struct ocrdma_dev *, u8 *guid); int ocrdma_query_gid(struct ib_device *, u8 port, int index, union ib_gid *gid); @@ -56,8 +60,10 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *, struct ib_ucontext *, struct ib_udata *); int ocrdma_dealloc_pd(struct ib_pd *pd); -struct ib_cq *ocrdma_create_cq(struct ib_device *, int entries, int vector, - struct ib_ucontext *, struct ib_udata *); +struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata); int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); int ocrdma_destroy_cq(struct ib_cq *); diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index ffd48bf..7df16f7 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -903,7 +903,7 @@ struct qib_devdata { /* PCI Device ID (here for NodeInfo) */ u16 deviceid; /* for write combining settings */ - unsigned long wc_cookie; + int wc_cookie; unsigned long wc_base; unsigned long wc_len; @@ -1136,7 +1136,6 @@ extern struct qib_devdata *qib_lookup(int unit); extern u32 qib_cpulist_count; extern unsigned long *qib_cpulist; -extern unsigned qib_wc_pat; extern unsigned qib_cc_table_size; int qib_init(struct qib_devdata *, int); int init_chip_wc_pat(struct qib_devdata *dd, u32); diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c index ab4e11c..2b45d0b 100644 --- a/drivers/infiniband/hw/qib/qib_cq.c +++ b/drivers/infiniband/hw/qib/qib_cq.c @@ -203,7 +203,7 @@ static void send_complete(struct kthread_work *work) /** * qib_create_cq - create a completion queue * @ibdev: the device this completion queue is attached to - * @entries: the minimum size of the completion queue + * @attr: creation attributes * @context: unused by the QLogic_IB driver * @udata: user data for libibverbs.so * @@ -212,16 +212,21 @@ static void send_complete(struct kthread_work *work) * * Called by ib_create_cq() in the generic verbs code. */ -struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, - int comp_vector, struct ib_ucontext *context, +struct ib_cq *qib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, struct ib_udata *udata) { + int entries = attr->cqe; struct qib_ibdev *dev = to_idev(ibdev); struct qib_cq *cq; struct qib_cq_wc *wc; struct ib_cq *ret; u32 sz; + if (attr->flags) + return ERR_PTR(-EINVAL); + if (entries < 1 || entries > ib_qib_max_cqes) { ret = ERR_PTR(-EINVAL); goto done; diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 9ea6c44..7258818 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -835,7 +835,8 @@ static int mmap_piobufs(struct vm_area_struct *vma, vma->vm_flags &= ~VM_MAYREAD; vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; - if (qib_wc_pat) + /* We used PAT if wc_cookie == 0 */ + if (!dd->wc_cookie) vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT, diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 0d2ba59..4b927809 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -3315,11 +3315,9 @@ static int init_6120_variables(struct qib_devdata *dd) qib_6120_config_ctxts(dd); qib_set_ctxtcnt(dd); - if (qib_wc_pat) { - ret = init_chip_wc_pat(dd, 0); - if (ret) - goto bail; - } + ret = init_chip_wc_pat(dd, 0); + if (ret) + goto bail; set_6120_baseaddrs(dd); /* set chip access pointers now */ ret = 0; diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 22affda..00b2af2 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -4126,11 +4126,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd) qib_7220_config_ctxts(dd); qib_set_ctxtcnt(dd); /* needed for PAT setup */ - if (qib_wc_pat) { - ret = init_chip_wc_pat(dd, 0); - if (ret) - goto bail; - } + ret = init_chip_wc_pat(dd, 0); + if (ret) + goto bail; set_7220_baseaddrs(dd); /* set chip access pointers now */ ret = 0; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index ef97b71..6c8ff10 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -5502,7 +5502,8 @@ static void try_7322_ipg(struct qib_pportdata *ppd) goto retry; send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR, - IB_MGMT_MAD_DATA, GFP_ATOMIC); + IB_MGMT_MAD_DATA, GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(send_buf)) goto retry; @@ -6429,6 +6430,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd) unsigned features, pidx, sbufcnt; int ret, mtu; u32 sbufs, updthresh; + resource_size_t vl15off; /* pport structs are contiguous, allocated after devdata */ ppd = (struct qib_pportdata *)(dd + 1); @@ -6677,29 +6679,27 @@ static int qib_init_7322_variables(struct qib_devdata *dd) qib_7322_config_ctxts(dd); qib_set_ctxtcnt(dd); - if (qib_wc_pat) { - resource_size_t vl15off; - /* - * We do not set WC on the VL15 buffers to avoid - * a rare problem with unaligned writes from - * interrupt-flushed store buffers, so we need - * to map those separately here. We can't solve - * this for the rarely used mtrr case. - */ - ret = init_chip_wc_pat(dd, 0); - if (ret) - goto bail; + /* + * We do not set WC on the VL15 buffers to avoid + * a rare problem with unaligned writes from + * interrupt-flushed store buffers, so we need + * to map those separately here. We can't solve + * this for the rarely used mtrr case. + */ + ret = init_chip_wc_pat(dd, 0); + if (ret) + goto bail; - /* vl15 buffers start just after the 4k buffers */ - vl15off = dd->physaddr + (dd->piobufbase >> 32) + - dd->piobcnt4k * dd->align4k; - dd->piovl15base = ioremap_nocache(vl15off, - NUM_VL15_BUFS * dd->align4k); - if (!dd->piovl15base) { - ret = -ENOMEM; - goto bail; - } + /* vl15 buffers start just after the 4k buffers */ + vl15off = dd->physaddr + (dd->piobufbase >> 32) + + dd->piobcnt4k * dd->align4k; + dd->piovl15base = ioremap_nocache(vl15off, + NUM_VL15_BUFS * dd->align4k); + if (!dd->piovl15base) { + ret = -ENOMEM; + goto bail; } + qib_7322_set_baseaddrs(dd); /* set chip access pointers now */ ret = 0; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 2ee3695..7e00470 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -91,15 +91,6 @@ MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); unsigned qib_cc_table_size; module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO); MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984"); -/* - * qib_wc_pat parameter: - * 0 is WC via MTRR - * 1 is WC via PAT - * If PAT initialization fails, code reverts back to MTRR - */ -unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ -module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); -MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); static void verify_interrupt(unsigned long); @@ -1377,8 +1368,7 @@ static void cleanup_device_data(struct qib_devdata *dd) spin_unlock(&dd->pport[pidx].cc_shadow_lock); } - if (!qib_wc_pat) - qib_disable_wc(dd); + qib_disable_wc(dd); if (dd->pioavailregs_dma) { dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, @@ -1547,14 +1537,12 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto bail; } - if (!qib_wc_pat) { - ret = qib_enable_wc(dd); - if (ret) { - qib_dev_err(dd, - "Write combining not enabled (err %d): performance may be poor\n", - -ret); - ret = 0; - } + ret = qib_enable_wc(dd); + if (ret) { + qib_dev_err(dd, + "Write combining not enabled (err %d): performance may be poor\n", + -ret); + ret = 0; } qib_verify_pioperf(dd); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 395f404..05e3242 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -83,7 +83,8 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) return; send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR, - IB_MGMT_MAD_DATA, GFP_ATOMIC); + IB_MGMT_MAD_DATA, GFP_ATOMIC, + IB_MGMT_BASE_VERSION); if (IS_ERR(send_buf)) return; @@ -1854,7 +1855,7 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp, } static int process_subn(struct ib_device *ibdev, int mad_flags, - u8 port, struct ib_mad *in_mad, + u8 port, const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_smp *smp = (struct ib_smp *)out_mad; @@ -2006,7 +2007,7 @@ bail: } static int process_perf(struct ib_device *ibdev, u8 port, - struct ib_mad *in_mad, + const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad; @@ -2299,7 +2300,7 @@ static int check_cc_key(struct qib_ibport *ibp, } static int process_cc(struct ib_device *ibdev, int mad_flags, - u8 port, struct ib_mad *in_mad, + u8 port, const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad; @@ -2400,12 +2401,19 @@ bail: * This is called by the ib_mad module. */ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad) + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) { int ret; struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + BUG_ON(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad)); switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 4a35998..a05d1a3 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1550,12 +1550,14 @@ full: } } -static int qib_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) +static int qib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) { struct qib_devdata *dd = dd_from_ibdev(ibdev); struct qib_ibdev *dev = to_idev(ibdev); + if (uhw->inlen || uhw->outlen) + return -EINVAL; memset(props, 0, sizeof(*props)); props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | @@ -2040,6 +2042,24 @@ static void init_ibport(struct qib_pportdata *ppd) RCU_INIT_POINTER(ibp->qp1, NULL); } +static int qib_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = qib_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + /** * qib_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -2227,6 +2247,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->process_mad = qib_process_mad; ibdev->mmap = qib_mmap; ibdev->dma_ops = &qib_dma_mapping_ops; + ibdev->get_port_immutable = qib_port_immutable; snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), "Intel Infiniband HCA %s", init_utsname()->nodename); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index bfc8948..1635572 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -872,8 +872,10 @@ void qib_cap_mask_chg(struct qib_ibport *ibp); void qib_sys_guid_chg(struct qib_ibport *ibp); void qib_node_desc_chg(struct qib_ibport *ibp); int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, struct ib_mad *out_mad); + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); int qib_create_agents(struct qib_ibdev *dev); void qib_free_agents(struct qib_ibdev *dev); @@ -1007,8 +1009,9 @@ void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig); int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); -struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, - int comp_vector, struct ib_ucontext *context, +struct ib_cq *qib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, struct ib_udata *udata); int qib_destroy_cq(struct ib_cq *ibcq); diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c index 81b225f..edd0ddb 100644 --- a/drivers/infiniband/hw/qib/qib_wc_x86_64.c +++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c @@ -116,21 +116,10 @@ int qib_enable_wc(struct qib_devdata *dd) } if (!ret) { - int cookie; - - cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0); - if (cookie < 0) { - { - qib_devinfo(dd->pcidev, - "mtrr_add() WC for PIO bufs failed (%d)\n", - cookie); - ret = -EINVAL; - } - } else { - dd->wc_cookie = cookie; - dd->wc_base = (unsigned long) pioaddr; - dd->wc_len = (unsigned long) piolen; - } + dd->wc_cookie = arch_phys_wc_add(pioaddr, piolen); + if (dd->wc_cookie < 0) + /* use error from routine */ + ret = dd->wc_cookie; } return ret; @@ -142,18 +131,7 @@ int qib_enable_wc(struct qib_devdata *dd) */ void qib_disable_wc(struct qib_devdata *dd) { - if (dd->wc_cookie) { - int r; - - r = mtrr_del(dd->wc_cookie, dd->wc_base, - dd->wc_len); - if (r < 0) - qib_devinfo(dd->pcidev, - "mtrr_del(%lx, %lx, %lx) failed: %d\n", - dd->wc_cookie, dd->wc_base, - dd->wc_len, r); - dd->wc_cookie = 0; /* even on failure */ - } + arch_phys_wc_del(dd->wc_cookie); } /** diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 0d0f986..34c49b8 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -300,6 +300,22 @@ static struct notifier_block usnic_ib_inetaddr_notifier = { }; /* End of inet section*/ +static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = usnic_ib_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + + return 0; +} + /* Start of PF discovery section */ static void *usnic_ib_device_add(struct pci_dev *dev) { @@ -383,6 +399,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq; us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq; us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr; + us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable; if (ib_register_device(&us_ibdev->ib_dev, NULL)) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 53bd6a2..7df4382 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -248,7 +248,8 @@ enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device, } int usnic_ib_query_device(struct ib_device *ibdev, - struct ib_device_attr *props) + struct ib_device_attr *props, + struct ib_udata *uhw) { struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); union ib_gid gid; @@ -257,6 +258,9 @@ int usnic_ib_query_device(struct ib_device *ibdev, int qp_per_vf; usnic_dbg("\n"); + if (uhw->inlen || uhw->outlen) + return -EINVAL; + mutex_lock(&us_ibdev->usdev_lock); us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd); @@ -570,13 +574,17 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, return status; } -struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *context, - struct ib_udata *udata) +struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata) { struct ib_cq *cq; usnic_dbg("\n"); + if (attr->flags) + return ERR_PTR(-EINVAL); + cq = kzalloc(sizeof(*cq), GFP_KERNEL); if (!cq) return ERR_PTR(-EBUSY); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index bb864f5..0bd04ef 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -24,9 +24,12 @@ enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device, u8 port_num); int usnic_ib_query_device(struct ib_device *ibdev, - struct ib_device_attr *props); + struct ib_device_attr *props, + struct ib_udata *uhw); int usnic_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); +enum rdma_protocol_type +usnic_ib_query_protocol(struct ib_device *device, u8 port_num); int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); @@ -44,9 +47,10 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, int usnic_ib_destroy_qp(struct ib_qp *qp); int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); -struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries, - int vector, struct ib_ucontext *context, - struct ib_udata *udata); +struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata); int usnic_ib_destroy_cq(struct ib_cq *cq); struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 417de1f..cb2337f 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -472,11 +472,10 @@ struct usnic_uiom_pd *usnic_uiom_alloc_pd(void) return ERR_PTR(-ENOMEM); pd->domain = domain = iommu_domain_alloc(&pci_bus_type); - if (IS_ERR_OR_NULL(domain)) { - usnic_err("Failed to allocate IOMMU domain with err %ld\n", - PTR_ERR(pd->domain)); + if (!domain) { + usnic_err("Failed to allocate IOMMU domain"); kfree(pd); - return ERR_PTR(domain ? PTR_ERR(domain) : -ENOMEM); + return ERR_PTR(-ENOMEM); } iommu_set_fault_handler(pd->domain, usnic_uiom_dma_fault, NULL); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 56959ad..cf32a778 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -386,8 +386,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i rx->rx_ring[i].mapping, GFP_KERNEL)) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); - ret = -ENOMEM; - goto err_count; + ret = -ENOMEM; + goto err_count; } ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i); if (ret) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 9e1b203..da149c2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1128,7 +1128,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) { struct ipoib_neigh_table *ntbl = &priv->ntbl; struct ipoib_neigh_hash *htbl; - struct ipoib_neigh **buckets; + struct ipoib_neigh __rcu **buckets; u32 size; clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); @@ -1146,7 +1146,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) htbl->size = size; htbl->mask = (size - 1); htbl->buckets = buckets; - ntbl->htbl = htbl; + RCU_INIT_POINTER(ntbl->htbl, htbl); htbl->ntbl = ntbl; atomic_set(&ntbl->entries, 0); @@ -1685,9 +1685,7 @@ static void ipoib_add_one(struct ib_device *device) struct net_device *dev; struct ipoib_dev_priv *priv; int s, e, p; - - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; + int count = 0; dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); if (!dev_list) @@ -1704,15 +1702,21 @@ static void ipoib_add_one(struct ib_device *device) } for (p = s; p <= e; ++p) { - if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND) + if (!rdma_protocol_ib(device, p)) continue; dev = ipoib_add_port("ib%d", device, p); if (!IS_ERR(dev)) { priv = netdev_priv(dev); list_add_tail(&priv->list, dev_list); + count++; } } + if (!count) { + kfree(dev_list); + return; + } + ib_set_client_data(device, &ipoib_client, dev_list); } @@ -1721,9 +1725,6 @@ static void ipoib_remove_one(struct ib_device *device) struct ipoib_dev_priv *priv, *tmp; struct list_head *dev_list; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) - return; - dev_list = ib_get_client_data(device, &ipoib_client); if (!dev_list) return; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index e5cc430..9e6ee82 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -141,6 +141,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_UD }; + struct ib_cq_init_attr cq_attr = {}; int ret, size; int i; @@ -178,14 +179,17 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) } else goto out_free_wq; - priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); + cq_attr.cqe = size; + priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, + dev, &cq_attr); if (IS_ERR(priv->recv_cq)) { printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name); goto out_cm_dev_cleanup; } + cq_attr.cqe = ipoib_sendq_size; priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL, - dev, ipoib_sendq_size, 0); + dev, &cq_attr); if (IS_ERR(priv->send_cq)) { printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name); goto out_free_recv_cq; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index cc2dd35..5c9f565 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -51,19 +51,22 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context); static void iser_cq_event_callback(struct ib_event *cause, void *context) { - iser_err("got cq event %d \n", cause->event); + iser_err("cq event %s (%d)\n", + ib_event_msg(cause->event), cause->event); } static void iser_qp_event_callback(struct ib_event *cause, void *context) { - iser_err("got qp event %d\n",cause->event); + iser_err("qp event %s (%d)\n", + ib_event_msg(cause->event), cause->event); } static void iser_event_handler(struct ib_event_handler *handler, struct ib_event *event) { - iser_err("async event %d on device %s port %d\n", event->event, - event->device->name, event->element.port_num); + iser_err("async event %s (%d) on device %s port %d\n", + ib_event_msg(event->event), event->event, + event->device->name, event->element.port_num); } /** @@ -123,14 +126,17 @@ static int iser_create_device_ib_res(struct iser_device *device) goto pd_err; for (i = 0; i < device->comps_used; i++) { + struct ib_cq_init_attr cq_attr = {}; struct iser_comp *comp = &device->comps[i]; comp->device = device; + cq_attr.cqe = max_cqe; + cq_attr.comp_vector = i; comp->cq = ib_create_cq(device->ib_device, iser_cq_callback, iser_cq_event_callback, (void *)comp, - max_cqe, i); + &cq_attr); if (IS_ERR(comp->cq)) { comp->cq = NULL; goto cq_err; @@ -873,8 +879,9 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve int ret = 0; iser_conn = (struct iser_conn *)cma_id->context; - iser_info("event %d status %d conn %p id %p\n", - event->event, event->status, cma_id->context, cma_id); + iser_info("%s (%d): status %d conn %p id %p\n", + rdma_event_msg(event->event), event->event, + event->status, cma_id->context, cma_id); mutex_lock(&iser_conn->state_mutex); switch (event->event) { @@ -913,7 +920,8 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve } break; default: - iser_err("Unexpected RDMA CM event (%d)\n", event->event); + iser_err("Unexpected RDMA CM event: %s (%d)\n", + rdma_event_msg(event->event), event->event); break; } mutex_unlock(&iser_conn->state_mutex); @@ -1173,10 +1181,13 @@ static void iser_handle_wc(struct ib_wc *wc) } } else { if (wc->status != IB_WC_WR_FLUSH_ERR) - iser_err("wr id %llx status %d vend_err %x\n", - wc->wr_id, wc->status, wc->vendor_err); + iser_err("%s (%d): wr id %llx vend_err %x\n", + ib_wc_status_msg(wc->status), wc->status, + wc->wr_id, wc->vendor_err); else - iser_dbg("flush error: wr id %llx\n", wc->wr_id); + iser_dbg("%s (%d): wr id %llx\n", + ib_wc_status_msg(wc->status), wc->status, + wc->wr_id); if (wc->wr_id == ISER_BEACON_WRID) /* all flush errors were consumed */ diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 327529e..f3b7a34 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -65,6 +65,8 @@ static int isert_rdma_accept(struct isert_conn *isert_conn); struct rdma_cm_id *isert_setup_id(struct isert_np *isert_np); +static void isert_release_work(struct work_struct *work); + static inline bool isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd) { @@ -78,7 +80,9 @@ isert_qp_event_callback(struct ib_event *e, void *context) { struct isert_conn *isert_conn = context; - isert_err("conn %p event: %d\n", isert_conn, e->event); + isert_err("%s (%d): conn %p\n", + ib_event_msg(e->event), e->event, isert_conn); + switch (e->event) { case IB_EVENT_COMM_EST: rdma_notify(isert_conn->cm_id, IB_EVENT_COMM_EST); @@ -316,15 +320,18 @@ isert_alloc_comps(struct isert_device *device, max_cqe = min(ISER_MAX_CQ_LEN, attr->max_cqe); for (i = 0; i < device->comps_used; i++) { + struct ib_cq_init_attr cq_attr = {}; struct isert_comp *comp = &device->comps[i]; comp->device = device; INIT_WORK(&comp->work, isert_cq_work); + cq_attr.cqe = max_cqe; + cq_attr.comp_vector = i; comp->cq = ib_create_cq(device->ib_device, isert_cq_callback, isert_cq_event_callback, (void *)comp, - max_cqe, i); + &cq_attr); if (IS_ERR(comp->cq)) { isert_err("Unable to allocate cq\n"); ret = PTR_ERR(comp->cq); @@ -547,11 +554,11 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc, return 0; err_prot_mr: - ib_dereg_mr(desc->pi_ctx->prot_mr); + ib_dereg_mr(pi_ctx->prot_mr); err_prot_frpl: - ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); + ib_free_fast_reg_page_list(pi_ctx->prot_frpl); err_pi_ctx: - kfree(desc->pi_ctx); + kfree(pi_ctx); return ret; } @@ -648,6 +655,7 @@ isert_init_conn(struct isert_conn *isert_conn) mutex_init(&isert_conn->mutex); spin_lock_init(&isert_conn->pool_lock); INIT_LIST_HEAD(&isert_conn->fr_pool); + INIT_WORK(&isert_conn->release_work, isert_release_work); } static void @@ -897,7 +905,8 @@ static int isert_np_cma_handler(struct isert_np *isert_np, enum rdma_cm_event_type event) { - isert_dbg("isert np %p, handling event %d\n", isert_np, event); + isert_dbg("%s (%d): isert np %p\n", + rdma_event_msg(event), event, isert_np); switch (event) { case RDMA_CM_EVENT_DEVICE_REMOVAL: @@ -925,6 +934,7 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id, { struct isert_np *isert_np = cma_id->context; struct isert_conn *isert_conn; + bool terminating = false; if (isert_np->np_cm_id == cma_id) return isert_np_cma_handler(cma_id->context, event); @@ -932,12 +942,25 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id, isert_conn = cma_id->qp->qp_context; mutex_lock(&isert_conn->mutex); + terminating = (isert_conn->state == ISER_CONN_TERMINATING); isert_conn_terminate(isert_conn); mutex_unlock(&isert_conn->mutex); isert_info("conn %p completing wait\n", isert_conn); complete(&isert_conn->wait); + if (terminating) + goto out; + + mutex_lock(&isert_np->np_accept_mutex); + if (!list_empty(&isert_conn->accept_node)) { + list_del_init(&isert_conn->accept_node); + isert_put_conn(isert_conn); + queue_work(isert_release_wq, &isert_conn->release_work); + } + mutex_unlock(&isert_np->np_accept_mutex); + +out: return 0; } @@ -957,7 +980,8 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { int ret = 0; - isert_info("event %d status %d id %p np %p\n", event->event, + isert_info("%s (%d): status %d id %p np %p\n", + rdma_event_msg(event->event), event->event, event->status, cma_id, cma_id->context); switch (event->event) { @@ -2091,10 +2115,13 @@ isert_handle_wc(struct ib_wc *wc) } } else { if (wc->status != IB_WC_WR_FLUSH_ERR) - isert_err("wr id %llx status %d vend_err %x\n", - wc->wr_id, wc->status, wc->vendor_err); + isert_err("%s (%d): wr id %llx vend_err %x\n", + ib_wc_status_msg(wc->status), wc->status, + wc->wr_id, wc->vendor_err); else - isert_dbg("flush error: wr id %llx\n", wc->wr_id); + isert_dbg("%s (%d): wr id %llx\n", + ib_wc_status_msg(wc->status), wc->status, + wc->wr_id); if (wc->wr_id != ISER_FASTREG_LI_WRID) isert_cq_comp_err(isert_conn, wc); @@ -2380,7 +2407,6 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, page_off = offset % PAGE_SIZE; send_wr->sg_list = ib_sge; - send_wr->num_sge = sg_nents; send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc; /* * Perform mapping of TCM scatterlist memory ib_sge dma_addr. @@ -2400,14 +2426,17 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, ib_sge->addr, ib_sge->length, ib_sge->lkey); page_off = 0; data_left -= ib_sge->length; + if (!data_left) + break; ib_sge++; isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge); } + send_wr->num_sge = ++i; isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n", send_wr->sg_list, send_wr->num_sge); - return sg_nents; + return send_wr->num_sge; } static int @@ -3366,7 +3395,6 @@ static void isert_wait_conn(struct iscsi_conn *conn) isert_wait4flush(isert_conn); isert_wait4logout(isert_conn); - INIT_WORK(&isert_conn->release_work, isert_release_work); queue_work(isert_release_wq, &isert_conn->release_work); } @@ -3374,6 +3402,7 @@ static void isert_free_conn(struct iscsi_conn *conn) { struct isert_conn *isert_conn = conn->context; + isert_wait4flush(isert_conn); isert_put_conn(isert_conn); } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 918814c..eada8f7 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -59,9 +59,10 @@ #define DRV_RELDATE "July 1, 2013" MODULE_AUTHOR("Roland Dreier"); -MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator " - "v" DRV_VERSION " (" DRV_RELDATE ")"); +MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); +MODULE_INFO(release_date, DRV_RELDATE); static unsigned int srp_sg_tablesize; static unsigned int cmd_sg_entries; @@ -253,7 +254,8 @@ static void srp_free_iu(struct srp_host *host, struct srp_iu *iu) static void srp_qp_event(struct ib_event *event, void *context) { - pr_debug("QP event %d\n", event->event); + pr_debug("QP event %s (%d)\n", + ib_event_msg(event->event), event->event); } static int srp_init_qp(struct srp_target_port *target, @@ -465,14 +467,13 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) */ static void srp_destroy_qp(struct srp_rdma_ch *ch) { - struct srp_target_port *target = ch->target; static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID }; struct ib_recv_wr *bad_wr; int ret; /* Destroying a QP and reusing ch->done is only safe if not connected */ - WARN_ON_ONCE(target->connected); + WARN_ON_ONCE(ch->connected); ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE); WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret); @@ -499,6 +500,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) struct ib_fmr_pool *fmr_pool = NULL; struct srp_fr_pool *fr_pool = NULL; const int m = 1 + dev->use_fast_reg; + struct ib_cq_init_attr cq_attr = {}; int ret; init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); @@ -506,15 +508,19 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) return -ENOMEM; /* + 1 for SRP_LAST_WR_ID */ + cq_attr.cqe = target->queue_size + 1; + cq_attr.comp_vector = ch->comp_vector; recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch, - target->queue_size + 1, ch->comp_vector); + &cq_attr); if (IS_ERR(recv_cq)) { ret = PTR_ERR(recv_cq); goto err; } + cq_attr.cqe = m * target->queue_size; + cq_attr.comp_vector = ch->comp_vector; send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch, - m * target->queue_size, ch->comp_vector); + &cq_attr); if (IS_ERR(send_cq)) { ret = PTR_ERR(send_cq); goto err_recv_cq; @@ -781,7 +787,7 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich) shost_printk(KERN_DEBUG, target->scsi_host, PFX "Topspin/Cisco initiator port ID workaround " "activated for target GUID %016llx\n", - (unsigned long long) be64_to_cpu(target->ioc_guid)); + be64_to_cpu(target->ioc_guid)); memset(req->priv.initiator_port_id, 0, 8); memcpy(req->priv.initiator_port_id + 8, &target->srp_host->srp_dev->dev->node_guid, 8); @@ -811,35 +817,19 @@ static bool srp_queue_remove_work(struct srp_target_port *target) return changed; } -static bool srp_change_conn_state(struct srp_target_port *target, - bool connected) -{ - bool changed = false; - - spin_lock_irq(&target->lock); - if (target->connected != connected) { - target->connected = connected; - changed = true; - } - spin_unlock_irq(&target->lock); - - return changed; -} - static void srp_disconnect_target(struct srp_target_port *target) { struct srp_rdma_ch *ch; int i; - if (srp_change_conn_state(target, false)) { - /* XXX should send SRP_I_LOGOUT request */ + /* XXX should send SRP_I_LOGOUT request */ - for (i = 0; i < target->ch_count; i++) { - ch = &target->ch[i]; - if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) { - shost_printk(KERN_DEBUG, target->scsi_host, - PFX "Sending CM DREQ failed\n"); - } + for (i = 0; i < target->ch_count; i++) { + ch = &target->ch[i]; + ch->connected = false; + if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) { + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Sending CM DREQ failed\n"); } } } @@ -852,7 +842,7 @@ static void srp_free_req_data(struct srp_target_port *target, struct srp_request *req; int i; - if (!ch->target || !ch->req_ring) + if (!ch->req_ring) return; for (i = 0; i < target->req_ring_size; ++i) { @@ -986,14 +976,26 @@ static void srp_rport_delete(struct srp_rport *rport) srp_queue_remove_work(target); } +/** + * srp_connected_ch() - number of connected channels + * @target: SRP target port. + */ +static int srp_connected_ch(struct srp_target_port *target) +{ + int i, c = 0; + + for (i = 0; i < target->ch_count; i++) + c += target->ch[i].connected; + + return c; +} + static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) { struct srp_target_port *target = ch->target; int ret; - WARN_ON_ONCE(!multich && target->connected); - - target->qp_in_error = false; + WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0); ret = srp_lookup_path(ch); if (ret) @@ -1016,7 +1018,7 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) */ switch (ch->status) { case 0: - srp_change_conn_state(target, true); + ch->connected = true; return 0; case SRP_PORT_REDIRECT: @@ -1214,14 +1216,10 @@ static int srp_rport_reconnect(struct srp_rport *rport) */ for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - if (!ch->target) - break; ret += srp_new_cm_id(ch); } for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - if (!ch->target) - break; for (j = 0; j < target->req_ring_size; ++j) { struct srp_request *req = &ch->req_ring[j]; @@ -1230,8 +1228,6 @@ static int srp_rport_reconnect(struct srp_rport *rport) } for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - if (!ch->target) - break; /* * Whether or not creating a new CM ID succeeded, create a new * QP. This guarantees that all completion callback function @@ -1243,13 +1239,13 @@ static int srp_rport_reconnect(struct srp_rport *rport) for (j = 0; j < target->queue_size; ++j) list_add(&ch->tx_ring[j]->list, &ch->free_tx); } + + target->qp_in_error = false; + for (i = 0; i < target->ch_count; i++) { ch = &target->ch[i]; - if (ret || !ch->target) { - if (i > 1) - ret = 0; + if (ret) break; - } ret = srp_connect_ch(ch, multich); multich = true; } @@ -1842,7 +1838,7 @@ static void srp_process_aer_req(struct srp_rdma_ch *ch, s32 delta = be32_to_cpu(req->req_lim_delta); shost_printk(KERN_ERR, target->scsi_host, PFX - "ignoring AER for LUN %llu\n", be64_to_cpu(req->lun)); + "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun)); if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) shost_printk(KERN_ERR, target->scsi_host, PFX @@ -1929,20 +1925,21 @@ static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, return; } - if (target->connected && !target->qp_in_error) { + if (ch->connected && !target->qp_in_error) { if (wr_id & LOCAL_INV_WR_ID_MASK) { shost_printk(KERN_ERR, target->scsi_host, PFX - "LOCAL_INV failed with status %d\n", - wc_status); + "LOCAL_INV failed with status %s (%d)\n", + ib_wc_status_msg(wc_status), wc_status); } else if (wr_id & FAST_REG_WR_ID_MASK) { shost_printk(KERN_ERR, target->scsi_host, PFX - "FAST_REG_MR failed status %d\n", - wc_status); + "FAST_REG_MR failed status %s (%d)\n", + ib_wc_status_msg(wc_status), wc_status); } else { shost_printk(KERN_ERR, target->scsi_host, - PFX "failed %s status %d for iu %p\n", + PFX "failed %s status %s (%d) for iu %p\n", send_err ? "send" : "receive", - wc_status, (void *)(uintptr_t)wr_id); + ib_wc_status_msg(wc_status), wc_status, + (void *)(uintptr_t)wr_id); } queue_work(system_long_wq, &target->tl_err_work); } @@ -2034,7 +2031,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) memset(cmd, 0, sizeof *cmd); cmd->opcode = SRP_CMD; - cmd->lun = cpu_to_be64((u64) scmnd->device->lun << 48); + int_to_scsilun(scmnd->device->lun, &cmd->lun); cmd->tag = tag; memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); @@ -2367,7 +2364,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) case IB_CM_DREQ_RECEIVED: shost_printk(KERN_WARNING, target->scsi_host, PFX "DREQ received - connection closed\n"); - srp_change_conn_state(target, false); + ch->connected = false; if (ib_send_cm_drep(cm_id, NULL, 0)) shost_printk(KERN_ERR, target->scsi_host, PFX "Sending CM DREP failed\n"); @@ -2414,8 +2411,8 @@ srp_change_queue_depth(struct scsi_device *sdev, int qdepth) return scsi_change_queue_depth(sdev, qdepth); } -static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, - unsigned int lun, u8 func) +static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, + u8 func) { struct srp_target_port *target = ch->target; struct srp_rport *rport = target->rport; @@ -2423,7 +2420,7 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, struct srp_iu *iu; struct srp_tsk_mgmt *tsk_mgmt; - if (!target->connected || target->qp_in_error) + if (!ch->connected || target->qp_in_error) return -1; init_completion(&ch->tsk_mgmt_done); @@ -2449,7 +2446,7 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, memset(tsk_mgmt, 0, sizeof *tsk_mgmt); tsk_mgmt->opcode = SRP_TSK_MGMT; - tsk_mgmt->lun = cpu_to_be64((u64) lun << 48); + int_to_scsilun(lun, &tsk_mgmt->lun); tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT; tsk_mgmt->tsk_mgmt_func = func; tsk_mgmt->task_tag = req_tag; @@ -2563,8 +2560,7 @@ static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", - (unsigned long long) be64_to_cpu(target->id_ext)); + return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); } static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, @@ -2572,8 +2568,7 @@ static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", - (unsigned long long) be64_to_cpu(target->ioc_guid)); + return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); } static ssize_t show_service_id(struct device *dev, @@ -2581,8 +2576,7 @@ static ssize_t show_service_id(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sprintf(buf, "0x%016llx\n", - (unsigned long long) be64_to_cpu(target->service_id)); + return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id)); } static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, @@ -2773,7 +2767,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) target->state = SRP_TARGET_SCANNING; sprintf(target->target_name, "SRP.T10:%016llX", - (unsigned long long) be64_to_cpu(target->id_ext)); + be64_to_cpu(target->id_ext)); if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device)) return -ENODEV; @@ -2797,7 +2791,8 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) scsi_scan_target(&target->scsi_host->shost_gendev, 0, target->scsi_id, SCAN_WILD_CARD, 0); - if (!target->connected || target->qp_in_error) { + if (srp_connected_ch(target) < target->ch_count || + target->qp_in_error) { shost_printk(KERN_INFO, target->scsi_host, PFX "SCSI scan failed - removing SCSI host\n"); srp_queue_remove_work(target); @@ -3146,7 +3141,7 @@ static ssize_t srp_create_target(struct device *dev, target_host->transportt = ib_srp_transport_template; target_host->max_channel = 0; target_host->max_id = 1; - target_host->max_lun = SRP_MAX_LUN; + target_host->max_lun = -1LL; target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; target = host_to_target(target_host); @@ -3172,11 +3167,11 @@ static ssize_t srp_create_target(struct device *dev, ret = srp_parse_options(buf, target); if (ret) - goto err; + goto out; ret = scsi_init_shared_tag_map(target_host, target_host->can_queue); if (ret) - goto err; + goto out; target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; @@ -3187,7 +3182,7 @@ static ssize_t srp_create_target(struct device *dev, be64_to_cpu(target->ioc_guid), be64_to_cpu(target->initiator_ext)); ret = -EEXIST; - goto err; + goto out; } if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && @@ -3208,7 +3203,7 @@ static ssize_t srp_create_target(struct device *dev, spin_lock_init(&target->lock); ret = ib_query_gid(ibdev, host->port, 0, &target->sgid); if (ret) - goto err; + goto out; ret = -ENOMEM; target->ch_count = max_t(unsigned, num_online_nodes(), @@ -3219,7 +3214,7 @@ static ssize_t srp_create_target(struct device *dev, target->ch = kcalloc(target->ch_count, sizeof(*target->ch), GFP_KERNEL); if (!target->ch) - goto err; + goto out; node_idx = 0; for_each_online_node(node) { @@ -3315,9 +3310,6 @@ err_disconnect: } kfree(target->ch); - -err: - scsi_host_put(target_host); goto out; } diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index a611556..17ee3f8 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -54,7 +54,6 @@ enum { SRP_DLID_REDIRECT = 2, SRP_STALE_CONN = 3, - SRP_MAX_LUN = 512, SRP_DEF_SG_TABLESIZE = 12, SRP_DEFAULT_QUEUE_SIZE = 1 << 6, @@ -170,6 +169,7 @@ struct srp_rdma_ch { struct completion tsk_mgmt_done; u8 tsk_mgmt_status; + bool connected; }; /** @@ -214,7 +214,6 @@ struct srp_target_port { __be16 pkey; u32 rq_tmo_jiffies; - bool connected; int zero_req_lim; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index af70d39..4556cd1 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -477,7 +477,8 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp, mad_wc->wc->pkey_index, 0, IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA, - GFP_KERNEL); + GFP_KERNEL, + IB_MGMT_BASE_VERSION); if (IS_ERR(rsp)) goto err_rsp; @@ -2081,6 +2082,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) struct srpt_port *sport = ch->sport; struct srpt_device *sdev = sport->sdev; u32 srp_sq_size = sport->port_attrib.srp_sq_size; + struct ib_cq_init_attr cq_attr = {}; int ret; WARN_ON(ch->rq_size < 1); @@ -2091,8 +2093,9 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) goto out; retry: + cq_attr.cqe = ch->rq_size + srp_sq_size; ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, - ch->rq_size + srp_sq_size, 0); + &cq_attr); if (IS_ERR(ch->cq)) { ret = PTR_ERR(ch->cq); pr_err("failed to create CQ cqe= %d ret= %d\n", |