diff options
Diffstat (limited to 'drivers/infiniband')
57 files changed, 2963 insertions, 886 deletions
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index fc0f2bd..4104ea24 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -889,6 +889,8 @@ retest: break; case IB_CM_ESTABLISHED: spin_unlock_irq(&cm_id_priv->lock); + if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) + break; ib_send_cm_dreq(cm_id, NULL, 0); goto retest; case IB_CM_DREQ_SENT: @@ -1008,7 +1010,6 @@ static void cm_format_req(struct cm_req_msg *req_msg, req_msg->service_id = param->service_id; req_msg->local_ca_guid = cm_id_priv->id.device->node_guid; cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num)); - cm_req_set_resp_res(req_msg, param->responder_resources); cm_req_set_init_depth(req_msg, param->initiator_depth); cm_req_set_remote_resp_timeout(req_msg, param->remote_cm_response_timeout); @@ -1017,12 +1018,16 @@ static void cm_format_req(struct cm_req_msg *req_msg, cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn)); cm_req_set_local_resp_timeout(req_msg, param->local_cm_response_timeout); - cm_req_set_retry_count(req_msg, param->retry_count); req_msg->pkey = param->primary_path->pkey; cm_req_set_path_mtu(req_msg, param->primary_path->mtu); - cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); cm_req_set_max_cm_retries(req_msg, param->max_cm_retries); - cm_req_set_srq(req_msg, param->srq); + + if (param->qp_type != IB_QPT_XRC_INI) { + cm_req_set_resp_res(req_msg, param->responder_resources); + cm_req_set_retry_count(req_msg, param->retry_count); + cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); + cm_req_set_srq(req_msg, param->srq); + } if (pri_path->hop_limit <= 1) { req_msg->primary_local_lid = pri_path->slid; @@ -1080,7 +1085,8 @@ static int cm_validate_req_param(struct ib_cm_req_param *param) if (!param->primary_path) return -EINVAL; - if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC) + if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC && + param->qp_type != IB_QPT_XRC_INI) return -EINVAL; if (param->private_data && @@ -1601,18 +1607,24 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg, cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); rep_msg->local_comm_id = cm_id_priv->id.local_id; rep_msg->remote_comm_id = cm_id_priv->id.remote_id; - cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); rep_msg->resp_resources = param->responder_resources; - rep_msg->initiator_depth = param->initiator_depth; cm_rep_set_target_ack_delay(rep_msg, cm_id_priv->av.port->cm_dev->ack_delay); cm_rep_set_failover(rep_msg, param->failover_accepted); - cm_rep_set_flow_ctrl(rep_msg, param->flow_control); cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); - cm_rep_set_srq(rep_msg, param->srq); rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid; + if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) { + rep_msg->initiator_depth = param->initiator_depth; + cm_rep_set_flow_ctrl(rep_msg, param->flow_control); + cm_rep_set_srq(rep_msg, param->srq); + cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); + } else { + cm_rep_set_srq(rep_msg, 1); + cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num)); + } + if (param->private_data && param->private_data_len) memcpy(rep_msg->private_data, param->private_data, param->private_data_len); @@ -1660,7 +1672,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg); - cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg); + cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; @@ -1731,7 +1743,7 @@ error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); } EXPORT_SYMBOL(ib_send_cm_rtu); -static void cm_format_rep_event(struct cm_work *work) +static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type) { struct cm_rep_msg *rep_msg; struct ib_cm_rep_event_param *param; @@ -1740,7 +1752,7 @@ static void cm_format_rep_event(struct cm_work *work) param = &work->cm_event.param.rep_rcvd; param->remote_ca_guid = rep_msg->local_ca_guid; param->remote_qkey = be32_to_cpu(rep_msg->local_qkey); - param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg)); + param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type)); param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg)); param->responder_resources = rep_msg->initiator_depth; param->initiator_depth = rep_msg->resp_resources; @@ -1808,7 +1820,7 @@ static int cm_rep_handler(struct cm_work *work) return -EINVAL; } - cm_format_rep_event(work); + cm_format_rep_event(work, cm_id_priv->qp_type); spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { @@ -1823,7 +1835,7 @@ static int cm_rep_handler(struct cm_work *work) cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; - cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg); + cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); spin_lock(&cm.lock); /* Check for duplicate REP. */ @@ -1850,7 +1862,7 @@ static int cm_rep_handler(struct cm_work *work) cm_id_priv->id.state = IB_CM_REP_RCVD; cm_id_priv->id.remote_id = rep_msg->local_comm_id; - cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg); + cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); cm_id_priv->initiator_depth = rep_msg->resp_resources; cm_id_priv->responder_resources = rep_msg->initiator_depth; cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); @@ -3492,7 +3504,8 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); - if (cm_id_priv->qp_type == IB_QPT_RC) { + if (cm_id_priv->qp_type == IB_QPT_RC || + cm_id_priv->qp_type == IB_QPT_XRC_TGT) { *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; qp_attr->max_dest_rd_atomic = @@ -3537,15 +3550,21 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) { *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); - if (cm_id_priv->qp_type == IB_QPT_RC) { - *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | - IB_QP_RNR_RETRY | + switch (cm_id_priv->qp_type) { + case IB_QPT_RC: + case IB_QPT_XRC_INI: + *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC; - qp_attr->timeout = cm_id_priv->av.timeout; qp_attr->retry_cnt = cm_id_priv->retry_count; qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; - qp_attr->max_rd_atomic = - cm_id_priv->initiator_depth; + qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; + /* fall through */ + case IB_QPT_XRC_TGT: + *qp_attr_mask |= IB_QP_TIMEOUT; + qp_attr->timeout = cm_id_priv->av.timeout; + break; + default: + break; } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_PATH_MIG_STATE; diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 7e63c08..505db2a 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2011 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * @@ -86,7 +86,7 @@ struct cm_req_msg { __be16 pkey; /* path MTU:4, RDC exists:1, RNR retry count:3. */ u8 offset50; - /* max CM Retries:4, SRQ:1, rsvd:3 */ + /* max CM Retries:4, SRQ:1, extended transport type:3 */ u8 offset51; __be16 primary_local_lid; @@ -175,6 +175,11 @@ static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg) switch(transport_type) { case 0: return IB_QPT_RC; case 1: return IB_QPT_UC; + case 3: + switch (req_msg->offset51 & 0x7) { + case 1: return IB_QPT_XRC_TGT; + default: return 0; + } default: return 0; } } @@ -188,6 +193,12 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg, req_msg->offset40) & 0xFFFFFFF9) | 0x2); break; + case IB_QPT_XRC_INI: + req_msg->offset40 = cpu_to_be32((be32_to_cpu( + req_msg->offset40) & + 0xFFFFFFF9) | 0x6); + req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1; + break; default: req_msg->offset40 = cpu_to_be32(be32_to_cpu( req_msg->offset40) & @@ -527,6 +538,23 @@ static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn) (be32_to_cpu(rep_msg->offset12) & 0x000000FF)); } +static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg) +{ + return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8); +} + +static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn) +{ + rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) | + (be32_to_cpu(rep_msg->offset16) & 0x000000FF)); +} + +static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type) +{ + return (qp_type == IB_QPT_XRC_INI) ? + cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg); +} + static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg) { return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index ca4c5dc..872b184 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -81,6 +81,7 @@ static DEFINE_IDR(sdp_ps); static DEFINE_IDR(tcp_ps); static DEFINE_IDR(udp_ps); static DEFINE_IDR(ipoib_ps); +static DEFINE_IDR(ib_ps); struct cma_device { struct list_head list; @@ -1179,6 +1180,15 @@ static void cma_set_req_event_data(struct rdma_cm_event *event, event->param.conn.qp_num = req_data->remote_qpn; } +static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) +{ + return (((ib_event->event == IB_CM_REQ_RECEIVED) || + (ib_event->param.req_rcvd.qp_type == id->qp_type)) || + ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && + (id->qp_type == IB_QPT_UD)) || + (!id->qp_type)); +} + static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { struct rdma_id_private *listen_id, *conn_id; @@ -1186,13 +1196,16 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) int offset, ret; listen_id = cm_id->context; + if (!cma_check_req_qp_type(&listen_id->id, ib_event)) + return -EINVAL; + if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) return -ECONNABORTED; memset(&event, 0, sizeof event); offset = cma_user_data_offset(listen_id->id.ps); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; - if (listen_id->id.qp_type == IB_QPT_UD) { + if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { conn_id = cma_new_udp_id(&listen_id->id, ib_event); event.param.ud.private_data = ib_event->private_data + offset; event.param.ud.private_data_len = @@ -1328,6 +1341,8 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) switch (iw_event->status) { case 0: event.event = RDMA_CM_EVENT_ESTABLISHED; + event.param.conn.initiator_depth = iw_event->ird; + event.param.conn.responder_resources = iw_event->ord; break; case -ECONNRESET: case -ECONNREFUSED: @@ -1343,6 +1358,8 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) break; case IW_CM_EVENT_ESTABLISHED: event.event = RDMA_CM_EVENT_ESTABLISHED; + event.param.conn.initiator_depth = iw_event->ird; + event.param.conn.responder_resources = iw_event->ord; break; default: BUG_ON(1); @@ -1433,8 +1450,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, event.event = RDMA_CM_EVENT_CONNECT_REQUEST; event.param.conn.private_data = iw_event->private_data; event.param.conn.private_data_len = iw_event->private_data_len; - event.param.conn.initiator_depth = attr.max_qp_init_rd_atom; - event.param.conn.responder_resources = attr.max_qp_rd_atom; + event.param.conn.initiator_depth = iw_event->ird; + event.param.conn.responder_resources = iw_event->ord; /* * Protect against the user destroying conn_id from another thread @@ -2234,6 +2251,9 @@ static int cma_get_port(struct rdma_id_private *id_priv) case RDMA_PS_IPOIB: ps = &ipoib_ps; break; + case RDMA_PS_IB: + ps = &ib_ps; + break; default: return -EPROTONOSUPPORT; } @@ -2569,7 +2589,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, req.service_id = cma_get_service_id(id_priv->id.ps, (struct sockaddr *) &route->addr.dst_addr); req.qp_num = id_priv->qp_num; - req.qp_type = IB_QPT_RC; + req.qp_type = id_priv->id.qp_type; req.starting_psn = id_priv->seq_num; req.responder_resources = conn_param->responder_resources; req.initiator_depth = conn_param->initiator_depth; @@ -2616,14 +2636,16 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, if (ret) goto out; - iw_param.ord = conn_param->initiator_depth; - iw_param.ird = conn_param->responder_resources; - iw_param.private_data = conn_param->private_data; - iw_param.private_data_len = conn_param->private_data_len; - if (id_priv->id.qp) + if (conn_param) { + iw_param.ord = conn_param->initiator_depth; + iw_param.ird = conn_param->responder_resources; + iw_param.private_data = conn_param->private_data; + iw_param.private_data_len = conn_param->private_data_len; + iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; + } else { + memset(&iw_param, 0, sizeof iw_param); iw_param.qpn = id_priv->qp_num; - else - iw_param.qpn = conn_param->qp_num; + } ret = iw_cm_connect(cm_id, &iw_param); out: if (ret) { @@ -2765,14 +2787,20 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - if (id->qp_type == IB_QPT_UD) - ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, - conn_param->private_data, - conn_param->private_data_len); - else if (conn_param) - ret = cma_accept_ib(id_priv, conn_param); - else - ret = cma_rep_recv(id_priv); + if (id->qp_type == IB_QPT_UD) { + if (conn_param) + ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, + conn_param->private_data, + conn_param->private_data_len); + else + ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, + NULL, 0); + } else { + if (conn_param) + ret = cma_accept_ib(id_priv, conn_param); + else + ret = cma_rep_recv(id_priv); + } break; case RDMA_TRANSPORT_IWARP: ret = cma_accept_iw(id_priv, conn_param); @@ -3460,6 +3488,7 @@ static void __exit cma_cleanup(void) idr_destroy(&tcp_ps); idr_destroy(&udp_ps); idr_destroy(&ipoib_ps); + idr_destroy(&ib_ps); } module_init(cma_init); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index b4d8672..0563892 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1596,6 +1596,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv, mad->mad_hdr.class_version].class; if (!class) goto out; + if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >= + IB_MGMT_MAX_METHODS) + goto out; method = class->method_table[convert_mgmt_class( mad->mad_hdr.mgmt_class)]; if (method) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 9ab5df7..2b59b72 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -185,17 +185,35 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, if (ret) return ret; + rate = (25 * attr.active_speed) / 10; + switch (attr.active_speed) { - case 2: speed = " DDR"; break; - case 4: speed = " QDR"; break; + case 2: + speed = " DDR"; + break; + case 4: + speed = " QDR"; + break; + case 8: + speed = " FDR10"; + rate = 10; + break; + case 16: + speed = " FDR"; + rate = 14; + break; + case 32: + speed = " EDR"; + rate = 25; + break; } - rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed; + rate *= ib_width_enum_to_int(attr.active_width); if (rate < 0) return -EINVAL; return sprintf(buf, "%d%s Gb/sec (%dX%s)\n", - rate / 10, rate % 10 ? ".5" : "", + rate, (attr.active_speed == 1) ? ".5" : "", ib_width_enum_to_int(attr.active_width), speed); } diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 08f948d..b8a0b4a 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -1122,7 +1122,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; - if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) + if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; if (hdr.in + sizeof(hdr) > len) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 71be5ee..b69307f 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -276,7 +276,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, ucma_set_event_context(ctx, event, uevent); uevent->resp.event = event->event; uevent->resp.status = event->status; - if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB) + if (cm_id->qp_type == IB_QPT_UD) ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); else ucma_copy_conn_event(&uevent->resp.param.conn, @@ -377,6 +377,9 @@ static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_ case RDMA_PS_IPOIB: *qp_type = IB_QPT_UD; return 0; + case RDMA_PS_IB: + *qp_type = cmd->qp_type; + return 0; default: return -EINVAL; } @@ -1270,7 +1273,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; - if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) + if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) return -EINVAL; if (hdr.in + sizeof(hdr) > len) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 8d261b6..07db229 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -458,8 +458,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err; } - if (packet->mad.hdr.id < 0 || - packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { + if (packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { ret = -EINVAL; goto err; } @@ -703,7 +702,7 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); - if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { + if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { ret = -EINVAL; goto out; } diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index a078e56..5bcb2af 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -76,6 +76,8 @@ struct ib_uverbs_device { struct ib_device *ib_dev; int devnum; struct cdev cdev; + struct rb_root xrcd_tree; + struct mutex xrcd_tree_mutex; }; struct ib_uverbs_event_file { @@ -120,6 +122,16 @@ struct ib_uevent_object { u32 events_reported; }; +struct ib_uxrcd_object { + struct ib_uobject uobject; + atomic_t refcnt; +}; + +struct ib_usrq_object { + struct ib_uevent_object uevent; + struct ib_uxrcd_object *uxrcd; +}; + struct ib_uqp_object { struct ib_uevent_object uevent; struct list_head mcast_list; @@ -142,6 +154,7 @@ extern struct idr ib_uverbs_ah_idr; extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; +extern struct idr ib_uverbs_xrcd_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); @@ -161,6 +174,7 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); +void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ @@ -181,6 +195,7 @@ IB_UVERBS_DECLARE_CMD(poll_cq); IB_UVERBS_DECLARE_CMD(req_notify_cq); IB_UVERBS_DECLARE_CMD(destroy_cq); IB_UVERBS_DECLARE_CMD(create_qp); +IB_UVERBS_DECLARE_CMD(open_qp); IB_UVERBS_DECLARE_CMD(query_qp); IB_UVERBS_DECLARE_CMD(modify_qp); IB_UVERBS_DECLARE_CMD(destroy_qp); @@ -195,5 +210,8 @@ IB_UVERBS_DECLARE_CMD(create_srq); IB_UVERBS_DECLARE_CMD(modify_srq); IB_UVERBS_DECLARE_CMD(query_srq); IB_UVERBS_DECLARE_CMD(destroy_srq); +IB_UVERBS_DECLARE_CMD(create_xsrq); +IB_UVERBS_DECLARE_CMD(open_xrcd); +IB_UVERBS_DECLARE_CMD(close_xrcd); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index c426992..254f164 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -47,6 +47,7 @@ static struct lock_class_key cq_lock_key; static struct lock_class_key qp_lock_key; static struct lock_class_key ah_lock_key; static struct lock_class_key srq_lock_key; +static struct lock_class_key xrcd_lock_key; #define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ do { \ @@ -255,6 +256,18 @@ static void put_srq_read(struct ib_srq *srq) put_uobj_read(srq->uobject); } +static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context, + struct ib_uobject **uobj) +{ + *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0); + return *uobj ? (*uobj)->object : NULL; +} + +static void put_xrcd_read(struct ib_uobject *uobj) +{ + put_uobj_read(uobj); +} + ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -298,6 +311,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->qp_list); INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); + INIT_LIST_HEAD(&ucontext->xrcd_list); ucontext->closing = 0; resp.num_comp_vectors = file->device->num_comp_vectors; @@ -579,6 +593,310 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, return in_len; } +struct xrcd_table_entry { + struct rb_node node; + struct ib_xrcd *xrcd; + struct inode *inode; +}; + +static int xrcd_table_insert(struct ib_uverbs_device *dev, + struct inode *inode, + struct ib_xrcd *xrcd) +{ + struct xrcd_table_entry *entry, *scan; + struct rb_node **p = &dev->xrcd_tree.rb_node; + struct rb_node *parent = NULL; + + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->xrcd = xrcd; + entry->inode = inode; + + while (*p) { + parent = *p; + scan = rb_entry(parent, struct xrcd_table_entry, node); + + if (inode < scan->inode) { + p = &(*p)->rb_left; + } else if (inode > scan->inode) { + p = &(*p)->rb_right; + } else { + kfree(entry); + return -EEXIST; + } + } + + rb_link_node(&entry->node, parent, p); + rb_insert_color(&entry->node, &dev->xrcd_tree); + igrab(inode); + return 0; +} + +static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev, + struct inode *inode) +{ + struct xrcd_table_entry *entry; + struct rb_node *p = dev->xrcd_tree.rb_node; + + while (p) { + entry = rb_entry(p, struct xrcd_table_entry, node); + + if (inode < entry->inode) + p = p->rb_left; + else if (inode > entry->inode) + p = p->rb_right; + else + return entry; + } + + return NULL; +} + +static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode) +{ + struct xrcd_table_entry *entry; + + entry = xrcd_table_search(dev, inode); + if (!entry) + return NULL; + + return entry->xrcd; +} + +static void xrcd_table_delete(struct ib_uverbs_device *dev, + struct inode *inode) +{ + struct xrcd_table_entry *entry; + + entry = xrcd_table_search(dev, inode); + if (entry) { + iput(inode); + rb_erase(&entry->node, &dev->xrcd_tree); + kfree(entry); + } +} + +ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_open_xrcd cmd; + struct ib_uverbs_open_xrcd_resp resp; + struct ib_udata udata; + struct ib_uxrcd_object *obj; + struct ib_xrcd *xrcd = NULL; + struct file *f = NULL; + struct inode *inode = NULL; + int ret = 0; + int new_xrcd = 0; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + mutex_lock(&file->device->xrcd_tree_mutex); + + if (cmd.fd != -1) { + /* search for file descriptor */ + f = fget(cmd.fd); + if (!f) { + ret = -EBADF; + goto err_tree_mutex_unlock; + } + + inode = f->f_dentry->d_inode; + if (!inode) { + ret = -EBADF; + goto err_tree_mutex_unlock; + } + + xrcd = find_xrcd(file->device, inode); + if (!xrcd && !(cmd.oflags & O_CREAT)) { + /* no file descriptor. Need CREATE flag */ + ret = -EAGAIN; + goto err_tree_mutex_unlock; + } + + if (xrcd && cmd.oflags & O_EXCL) { + ret = -EINVAL; + goto err_tree_mutex_unlock; + } + } + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) { + ret = -ENOMEM; + goto err_tree_mutex_unlock; + } + + init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_key); + + down_write(&obj->uobject.mutex); + + if (!xrcd) { + xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev, + file->ucontext, &udata); + if (IS_ERR(xrcd)) { + ret = PTR_ERR(xrcd); + goto err; + } + + xrcd->inode = inode; + xrcd->device = file->device->ib_dev; + atomic_set(&xrcd->usecnt, 0); + mutex_init(&xrcd->tgt_qp_mutex); + INIT_LIST_HEAD(&xrcd->tgt_qp_list); + new_xrcd = 1; + } + + atomic_set(&obj->refcnt, 0); + obj->uobject.object = xrcd; + ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); + if (ret) + goto err_idr; + + memset(&resp, 0, sizeof resp); + resp.xrcd_handle = obj->uobject.id; + + if (inode) { + if (new_xrcd) { + /* create new inode/xrcd table entry */ + ret = xrcd_table_insert(file->device, inode, xrcd); + if (ret) + goto err_insert_xrcd; + } + atomic_inc(&xrcd->usecnt); + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_copy; + } + + if (f) + fput(f); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); + mutex_unlock(&file->mutex); + + obj->uobject.live = 1; + up_write(&obj->uobject.mutex); + + mutex_unlock(&file->device->xrcd_tree_mutex); + return in_len; + +err_copy: + if (inode) { + if (new_xrcd) + xrcd_table_delete(file->device, inode); + atomic_dec(&xrcd->usecnt); + } + +err_insert_xrcd: + idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); + +err_idr: + ib_dealloc_xrcd(xrcd); + +err: + put_uobj_write(&obj->uobject); + +err_tree_mutex_unlock: + if (f) + fput(f); + + mutex_unlock(&file->device->xrcd_tree_mutex); + + return ret; +} + +ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_close_xrcd cmd; + struct ib_uobject *uobj; + struct ib_xrcd *xrcd = NULL; + struct inode *inode = NULL; + struct ib_uxrcd_object *obj; + int live; + int ret = 0; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + mutex_lock(&file->device->xrcd_tree_mutex); + uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext); + if (!uobj) { + ret = -EINVAL; + goto out; + } + + xrcd = uobj->object; + inode = xrcd->inode; + obj = container_of(uobj, struct ib_uxrcd_object, uobject); + if (atomic_read(&obj->refcnt)) { + put_uobj_write(uobj); + ret = -EBUSY; + goto out; + } + + if (!inode || atomic_dec_and_test(&xrcd->usecnt)) { + ret = ib_dealloc_xrcd(uobj->object); + if (!ret) + uobj->live = 0; + } + + live = uobj->live; + if (inode && ret) + atomic_inc(&xrcd->usecnt); + + put_uobj_write(uobj); + + if (ret) + goto out; + + if (inode && !live) + xrcd_table_delete(file->device, inode); + + idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + ret = in_len; + +out: + mutex_unlock(&file->device->xrcd_tree_mutex); + return ret; +} + +void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, + struct ib_xrcd *xrcd) +{ + struct inode *inode; + + inode = xrcd->inode; + if (inode && !atomic_dec_and_test(&xrcd->usecnt)) + return; + + ib_dealloc_xrcd(xrcd); + + if (inode) + xrcd_table_delete(dev, inode); +} + ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -1052,9 +1370,12 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, struct ib_uverbs_create_qp_resp resp; struct ib_udata udata; struct ib_uqp_object *obj; - struct ib_pd *pd; - struct ib_cq *scq, *rcq; - struct ib_srq *srq; + struct ib_device *device; + struct ib_pd *pd = NULL; + struct ib_xrcd *xrcd = NULL; + struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_cq *scq = NULL, *rcq = NULL; + struct ib_srq *srq = NULL; struct ib_qp *qp; struct ib_qp_init_attr attr; int ret; @@ -1076,15 +1397,39 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); down_write(&obj->uevent.uobject.mutex); - srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL; - pd = idr_read_pd(cmd.pd_handle, file->ucontext); - scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); - rcq = cmd.recv_cq_handle == cmd.send_cq_handle ? - scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1); + if (cmd.qp_type == IB_QPT_XRC_TGT) { + xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); + if (!xrcd) { + ret = -EINVAL; + goto err_put; + } + device = xrcd->device; + } else { + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); + if (!pd || !scq) { + ret = -EINVAL; + goto err_put; + } - if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) { - ret = -EINVAL; - goto err_put; + if (cmd.qp_type == IB_QPT_XRC_INI) { + cmd.max_recv_wr = cmd.max_recv_sge = 0; + } else { + if (cmd.is_srq) { + srq = idr_read_srq(cmd.srq_handle, file->ucontext); + if (!srq || srq->srq_type != IB_SRQT_BASIC) { + ret = -EINVAL; + goto err_put; + } + } + rcq = (cmd.recv_cq_handle == cmd.send_cq_handle) ? + scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1); + if (!rcq) { + ret = -EINVAL; + goto err_put; + } + } + device = pd->device; } attr.event_handler = ib_uverbs_qp_event_handler; @@ -1092,6 +1437,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, attr.send_cq = scq; attr.recv_cq = rcq; attr.srq = srq; + attr.xrcd = xrcd; attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; attr.qp_type = cmd.qp_type; attr.create_flags = 0; @@ -1106,26 +1452,34 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); - qp = pd->device->create_qp(pd, &attr, &udata); + if (cmd.qp_type == IB_QPT_XRC_TGT) + qp = ib_create_qp(pd, &attr); + else + qp = device->create_qp(pd, &attr, &udata); + if (IS_ERR(qp)) { ret = PTR_ERR(qp); goto err_put; } - qp->device = pd->device; - qp->pd = pd; - qp->send_cq = attr.send_cq; - qp->recv_cq = attr.recv_cq; - qp->srq = attr.srq; - qp->uobject = &obj->uevent.uobject; - qp->event_handler = attr.event_handler; - qp->qp_context = attr.qp_context; - qp->qp_type = attr.qp_type; - atomic_inc(&pd->usecnt); - atomic_inc(&attr.send_cq->usecnt); - atomic_inc(&attr.recv_cq->usecnt); - if (attr.srq) - atomic_inc(&attr.srq->usecnt); + if (cmd.qp_type != IB_QPT_XRC_TGT) { + qp->real_qp = qp; + qp->device = device; + qp->pd = pd; + qp->send_cq = attr.send_cq; + qp->recv_cq = attr.recv_cq; + qp->srq = attr.srq; + qp->event_handler = attr.event_handler; + qp->qp_context = attr.qp_context; + qp->qp_type = attr.qp_type; + atomic_inc(&pd->usecnt); + atomic_inc(&attr.send_cq->usecnt); + if (attr.recv_cq) + atomic_inc(&attr.recv_cq->usecnt); + if (attr.srq) + atomic_inc(&attr.srq->usecnt); + } + qp->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = qp; ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); @@ -1147,9 +1501,13 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, goto err_copy; } - put_pd_read(pd); - put_cq_read(scq); - if (rcq != scq) + if (xrcd) + put_xrcd_read(xrcd_uobj); + if (pd) + put_pd_read(pd); + if (scq) + put_cq_read(scq); + if (rcq && rcq != scq) put_cq_read(rcq); if (srq) put_srq_read(srq); @@ -1171,6 +1529,8 @@ err_destroy: ib_destroy_qp(qp); err_put: + if (xrcd) + put_xrcd_read(xrcd_uobj); if (pd) put_pd_read(pd); if (scq) @@ -1184,6 +1544,98 @@ err_put: return ret; } +ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, int out_len) +{ + struct ib_uverbs_open_qp cmd; + struct ib_uverbs_create_qp_resp resp; + struct ib_udata udata; + struct ib_uqp_object *obj; + struct ib_xrcd *xrcd; + struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_qp *qp; + struct ib_qp_open_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); + down_write(&obj->uevent.uobject.mutex); + + xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); + if (!xrcd) { + ret = -EINVAL; + goto err_put; + } + + attr.event_handler = ib_uverbs_qp_event_handler; + attr.qp_context = file; + attr.qp_num = cmd.qpn; + attr.qp_type = cmd.qp_type; + + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); + INIT_LIST_HEAD(&obj->mcast_list); + + qp = ib_open_qp(xrcd, &attr); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto err_put; + } + + qp->uobject = &obj->uevent.uobject; + + obj->uevent.uobject.object = qp; + ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + if (ret) + goto err_destroy; + + memset(&resp, 0, sizeof resp); + resp.qpn = qp->qp_num; + resp.qp_handle = obj->uevent.uobject.id; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_remove; + } + + put_xrcd_read(xrcd_uobj); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); + mutex_unlock(&file->mutex); + + obj->uevent.uobject.live = 1; + + up_write(&obj->uevent.uobject.mutex); + + return in_len; + +err_remove: + idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + +err_destroy: + ib_destroy_qp(qp); + +err_put: + put_xrcd_read(xrcd_uobj); + put_uobj_write(&obj->uevent.uobject); + return ret; +} + ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -1284,6 +1736,20 @@ out: return ret ? ret : in_len; } +/* Remove ignored fields set in the attribute mask */ +static int modify_qp_mask(enum ib_qp_type qp_type, int mask) +{ + switch (qp_type) { + case IB_QPT_XRC_INI: + return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER); + case IB_QPT_XRC_TGT: + return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY); + default: + return mask; + } +} + ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -1356,7 +1822,12 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; - ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata); + if (qp->real_qp == qp) { + ret = qp->device->modify_qp(qp, attr, + modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); + } else { + ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask)); + } put_qp_read(qp); @@ -1553,7 +2024,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, } resp.bad_wr = 0; - ret = qp->device->post_send(qp, wr, &bad_wr); + ret = qp->device->post_send(qp->real_qp, wr, &bad_wr); if (ret) for (next = wr; next; next = next->next) { ++resp.bad_wr; @@ -1691,7 +2162,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, goto out; resp.bad_wr = 0; - ret = qp->device->post_recv(qp, wr, &bad_wr); + ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr); put_qp_read(qp); @@ -1975,107 +2446,199 @@ out_put: return ret ? ret : in_len; } -ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +int __uverbs_create_xsrq(struct ib_uverbs_file *file, + struct ib_uverbs_create_xsrq *cmd, + struct ib_udata *udata) { - struct ib_uverbs_create_srq cmd; struct ib_uverbs_create_srq_resp resp; - struct ib_udata udata; - struct ib_uevent_object *obj; + struct ib_usrq_object *obj; struct ib_pd *pd; struct ib_srq *srq; + struct ib_uobject *uninitialized_var(xrcd_uobj); struct ib_srq_init_attr attr; int ret; - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); - obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; - init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key); - down_write(&obj->uobject.mutex); + init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_key); + down_write(&obj->uevent.uobject.mutex); - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = idr_read_pd(cmd->pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err; } + if (cmd->srq_type == IB_SRQT_XRC) { + attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); + if (!attr.ext.xrc.cq) { + ret = -EINVAL; + goto err_put_pd; + } + + attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj); + if (!attr.ext.xrc.xrcd) { + ret = -EINVAL; + goto err_put_cq; + } + + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); + atomic_inc(&obj->uxrcd->refcnt); + } + attr.event_handler = ib_uverbs_srq_event_handler; attr.srq_context = file; - attr.attr.max_wr = cmd.max_wr; - attr.attr.max_sge = cmd.max_sge; - attr.attr.srq_limit = cmd.srq_limit; + attr.srq_type = cmd->srq_type; + attr.attr.max_wr = cmd->max_wr; + attr.attr.max_sge = cmd->max_sge; + attr.attr.srq_limit = cmd->srq_limit; - obj->events_reported = 0; - INIT_LIST_HEAD(&obj->event_list); + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); - srq = pd->device->create_srq(pd, &attr, &udata); + srq = pd->device->create_srq(pd, &attr, udata); if (IS_ERR(srq)) { ret = PTR_ERR(srq); goto err_put; } - srq->device = pd->device; - srq->pd = pd; - srq->uobject = &obj->uobject; + srq->device = pd->device; + srq->pd = pd; + srq->srq_type = cmd->srq_type; + srq->uobject = &obj->uevent.uobject; srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; + + if (cmd->srq_type == IB_SRQT_XRC) { + srq->ext.xrc.cq = attr.ext.xrc.cq; + srq->ext.xrc.xrcd = attr.ext.xrc.xrcd; + atomic_inc(&attr.ext.xrc.cq->usecnt); + atomic_inc(&attr.ext.xrc.xrcd->usecnt); + } + atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); - obj->uobject.object = srq; - ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject); + obj->uevent.uobject.object = srq; + ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); if (ret) goto err_destroy; memset(&resp, 0, sizeof resp); - resp.srq_handle = obj->uobject.id; + resp.srq_handle = obj->uevent.uobject.id; resp.max_wr = attr.attr.max_wr; resp.max_sge = attr.attr.max_sge; + if (cmd->srq_type == IB_SRQT_XRC) + resp.srqn = srq->ext.xrc.srq_num; - if (copy_to_user((void __user *) (unsigned long) cmd.response, + if (copy_to_user((void __user *) (unsigned long) cmd->response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } + if (cmd->srq_type == IB_SRQT_XRC) { + put_uobj_read(xrcd_uobj); + put_cq_read(attr.ext.xrc.cq); + } put_pd_read(pd); mutex_lock(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->srq_list); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list); mutex_unlock(&file->mutex); - obj->uobject.live = 1; + obj->uevent.uobject.live = 1; - up_write(&obj->uobject.mutex); + up_write(&obj->uevent.uobject.mutex); - return in_len; + return 0; err_copy: - idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject); + idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); err_destroy: ib_destroy_srq(srq); err_put: + if (cmd->srq_type == IB_SRQT_XRC) { + atomic_dec(&obj->uxrcd->refcnt); + put_uobj_read(xrcd_uobj); + } + +err_put_cq: + if (cmd->srq_type == IB_SRQT_XRC) + put_cq_read(attr.ext.xrc.cq); + +err_put_pd: put_pd_read(pd); err: - put_uobj_write(&obj->uobject); + put_uobj_write(&obj->uevent.uobject); return ret; } +ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_srq cmd; + struct ib_uverbs_create_xsrq xcmd; + struct ib_uverbs_create_srq_resp resp; + struct ib_udata udata; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + xcmd.response = cmd.response; + xcmd.user_handle = cmd.user_handle; + xcmd.srq_type = IB_SRQT_BASIC; + xcmd.pd_handle = cmd.pd_handle; + xcmd.max_wr = cmd.max_wr; + xcmd.max_sge = cmd.max_sge; + xcmd.srq_limit = cmd.srq_limit; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + ret = __uverbs_create_xsrq(file, &xcmd, &udata); + if (ret) + return ret; + + return in_len; +} + +ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, int out_len) +{ + struct ib_uverbs_create_xsrq cmd; + struct ib_uverbs_create_srq_resp resp; + struct ib_udata udata; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + ret = __uverbs_create_xsrq(file, &cmd, &udata); + if (ret) + return ret; + + return in_len; +} + ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 56898b6..8796367 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -72,6 +72,7 @@ DEFINE_IDR(ib_uverbs_ah_idr); DEFINE_IDR(ib_uverbs_cq_idr); DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); +DEFINE_IDR(ib_uverbs_xrcd_idr); static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -107,6 +108,10 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, + [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, + [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, + [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, + [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp }; static void ib_uverbs_add_one(struct ib_device *device); @@ -202,8 +207,12 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, container_of(uobj, struct ib_uqp_object, uevent.uobject); idr_remove_uobj(&ib_uverbs_qp_idr, uobj); - ib_uverbs_detach_umcast(qp, uqp); - ib_destroy_qp(qp); + if (qp != qp->real_qp) { + ib_close_qp(qp); + } else { + ib_uverbs_detach_umcast(qp, uqp); + ib_destroy_qp(qp); + } ib_uverbs_release_uevent(file, &uqp->uevent); kfree(uqp); } @@ -241,6 +250,18 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uobj); } + mutex_lock(&file->device->xrcd_tree_mutex); + list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { + struct ib_xrcd *xrcd = uobj->object; + struct ib_uxrcd_object *uxrcd = + container_of(uobj, struct ib_uxrcd_object, uobject); + + idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); + ib_uverbs_dealloc_xrcd(file->device, xrcd); + kfree(uxrcd); + } + mutex_unlock(&file->device->xrcd_tree_mutex); + list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { struct ib_pd *pd = uobj->object; @@ -557,8 +578,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (hdr.in_words * 4 != count) return -EINVAL; - if (hdr.command < 0 || - hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || + if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || !uverbs_cmd_table[hdr.command]) return -EINVAL; @@ -741,6 +761,8 @@ static void ib_uverbs_add_one(struct ib_device *device) kref_init(&uverbs_dev->ref); init_completion(&uverbs_dev->comp); + uverbs_dev->xrcd_tree = RB_ROOT; + mutex_init(&uverbs_dev->xrcd_tree_mutex); spin_lock(&map_lock); devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index af7a8b0..4251750 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -39,6 +39,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/string.h> +#include <linux/slab.h> #include <rdma/ib_verbs.h> #include <rdma/ib_cache.h> @@ -77,6 +78,31 @@ enum ib_rate mult_to_ib_rate(int mult) } EXPORT_SYMBOL(mult_to_ib_rate); +int ib_rate_to_mbps(enum ib_rate rate) +{ + switch (rate) { + case IB_RATE_2_5_GBPS: return 2500; + case IB_RATE_5_GBPS: return 5000; + case IB_RATE_10_GBPS: return 10000; + case IB_RATE_20_GBPS: return 20000; + case IB_RATE_30_GBPS: return 30000; + case IB_RATE_40_GBPS: return 40000; + case IB_RATE_60_GBPS: return 60000; + case IB_RATE_80_GBPS: return 80000; + case IB_RATE_120_GBPS: return 120000; + case IB_RATE_14_GBPS: return 14062; + case IB_RATE_56_GBPS: return 56250; + case IB_RATE_112_GBPS: return 112500; + case IB_RATE_168_GBPS: return 168750; + case IB_RATE_25_GBPS: return 25781; + case IB_RATE_100_GBPS: return 103125; + case IB_RATE_200_GBPS: return 206250; + case IB_RATE_300_GBPS: return 309375; + default: return -1; + } +} +EXPORT_SYMBOL(ib_rate_to_mbps); + enum rdma_transport_type rdma_node_get_transport(enum rdma_node_type node_type) { @@ -250,6 +276,13 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, srq->uobject = NULL; srq->event_handler = srq_init_attr->event_handler; srq->srq_context = srq_init_attr->srq_context; + srq->srq_type = srq_init_attr->srq_type; + if (srq->srq_type == IB_SRQT_XRC) { + srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; + srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq; + atomic_inc(&srq->ext.xrc.xrcd->usecnt); + atomic_inc(&srq->ext.xrc.cq->usecnt); + } atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); } @@ -279,16 +312,29 @@ EXPORT_SYMBOL(ib_query_srq); int ib_destroy_srq(struct ib_srq *srq) { struct ib_pd *pd; + enum ib_srq_type srq_type; + struct ib_xrcd *uninitialized_var(xrcd); + struct ib_cq *uninitialized_var(cq); int ret; if (atomic_read(&srq->usecnt)) return -EBUSY; pd = srq->pd; + srq_type = srq->srq_type; + if (srq_type == IB_SRQT_XRC) { + xrcd = srq->ext.xrc.xrcd; + cq = srq->ext.xrc.cq; + } ret = srq->device->destroy_srq(srq); - if (!ret) + if (!ret) { atomic_dec(&pd->usecnt); + if (srq_type == IB_SRQT_XRC) { + atomic_dec(&xrcd->usecnt); + atomic_dec(&cq->usecnt); + } + } return ret; } @@ -296,28 +342,123 @@ EXPORT_SYMBOL(ib_destroy_srq); /* Queue pairs */ +static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) +{ + struct ib_qp *qp = context; + + list_for_each_entry(event->element.qp, &qp->open_list, open_list) + event->element.qp->event_handler(event, event->element.qp->qp_context); +} + +static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) +{ + mutex_lock(&xrcd->tgt_qp_mutex); + list_add(&qp->xrcd_list, &xrcd->tgt_qp_list); + mutex_unlock(&xrcd->tgt_qp_mutex); +} + +static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, + void (*event_handler)(struct ib_event *, void *), + void *qp_context) +{ + struct ib_qp *qp; + unsigned long flags; + + qp = kzalloc(sizeof *qp, GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + qp->real_qp = real_qp; + atomic_inc(&real_qp->usecnt); + qp->device = real_qp->device; + qp->event_handler = event_handler; + qp->qp_context = qp_context; + qp->qp_num = real_qp->qp_num; + qp->qp_type = real_qp->qp_type; + + spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); + list_add(&qp->open_list, &real_qp->open_list); + spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + + return qp; +} + +struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, + struct ib_qp_open_attr *qp_open_attr) +{ + struct ib_qp *qp, *real_qp; + + if (qp_open_attr->qp_type != IB_QPT_XRC_TGT) + return ERR_PTR(-EINVAL); + + qp = ERR_PTR(-EINVAL); + mutex_lock(&xrcd->tgt_qp_mutex); + list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) { + if (real_qp->qp_num == qp_open_attr->qp_num) { + qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, + qp_open_attr->qp_context); + break; + } + } + mutex_unlock(&xrcd->tgt_qp_mutex); + return qp; +} +EXPORT_SYMBOL(ib_open_qp); + struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { - struct ib_qp *qp; + struct ib_qp *qp, *real_qp; + struct ib_device *device; - qp = pd->device->create_qp(pd, qp_init_attr, NULL); + device = pd ? pd->device : qp_init_attr->xrcd->device; + qp = device->create_qp(pd, qp_init_attr, NULL); if (!IS_ERR(qp)) { - qp->device = pd->device; - qp->pd = pd; - qp->send_cq = qp_init_attr->send_cq; - qp->recv_cq = qp_init_attr->recv_cq; - qp->srq = qp_init_attr->srq; - qp->uobject = NULL; - qp->event_handler = qp_init_attr->event_handler; - qp->qp_context = qp_init_attr->qp_context; - qp->qp_type = qp_init_attr->qp_type; - atomic_inc(&pd->usecnt); - atomic_inc(&qp_init_attr->send_cq->usecnt); - atomic_inc(&qp_init_attr->recv_cq->usecnt); - if (qp_init_attr->srq) - atomic_inc(&qp_init_attr->srq->usecnt); + qp->device = device; + qp->real_qp = qp; + qp->uobject = NULL; + qp->qp_type = qp_init_attr->qp_type; + + if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { + qp->event_handler = __ib_shared_qp_event_handler; + qp->qp_context = qp; + qp->pd = NULL; + qp->send_cq = qp->recv_cq = NULL; + qp->srq = NULL; + qp->xrcd = qp_init_attr->xrcd; + atomic_inc(&qp_init_attr->xrcd->usecnt); + INIT_LIST_HEAD(&qp->open_list); + atomic_set(&qp->usecnt, 0); + + real_qp = qp; + qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, + qp_init_attr->qp_context); + if (!IS_ERR(qp)) + __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); + else + real_qp->device->destroy_qp(real_qp); + } else { + qp->event_handler = qp_init_attr->event_handler; + qp->qp_context = qp_init_attr->qp_context; + if (qp_init_attr->qp_type == IB_QPT_XRC_INI) { + qp->recv_cq = NULL; + qp->srq = NULL; + } else { + qp->recv_cq = qp_init_attr->recv_cq; + atomic_inc(&qp_init_attr->recv_cq->usecnt); + qp->srq = qp_init_attr->srq; + if (qp->srq) + atomic_inc(&qp_init_attr->srq->usecnt); + } + + qp->pd = pd; + qp->send_cq = qp_init_attr->send_cq; + qp->xrcd = NULL; + + atomic_inc(&pd->usecnt); + atomic_inc(&qp_init_attr->send_cq->usecnt); + } } return qp; @@ -326,8 +467,8 @@ EXPORT_SYMBOL(ib_create_qp); static const struct { int valid; - enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETHERTYPE + 1]; - enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETHERTYPE + 1]; + enum ib_qp_attr_mask req_param[IB_QPT_MAX]; + enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, @@ -343,6 +484,12 @@ static const struct { [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | @@ -365,6 +512,12 @@ static const struct { [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | @@ -384,6 +537,16 @@ static const struct { IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER), + [IB_QPT_XRC_INI] = (IB_QP_AV | + IB_QP_PATH_MTU | + IB_QP_DEST_QPN | + IB_QP_RQ_PSN), + [IB_QPT_XRC_TGT] = (IB_QP_AV | + IB_QP_PATH_MTU | + IB_QP_DEST_QPN | + IB_QP_RQ_PSN | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_MIN_RNR_TIMER), }, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | @@ -394,6 +557,12 @@ static const struct { [IB_QPT_RC] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), + [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), + [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | @@ -414,6 +583,13 @@ static const struct { IB_QP_RNR_RETRY | IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC), + [IB_QPT_XRC_INI] = (IB_QP_TIMEOUT | + IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_SQ_PSN | + IB_QP_MAX_QP_RD_ATOMIC), + [IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT | + IB_QP_SQ_PSN), [IB_QPT_SMI] = IB_QP_SQ_PSN, [IB_QPT_GSI] = IB_QP_SQ_PSN, }, @@ -429,6 +605,15 @@ static const struct { IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | @@ -453,6 +638,15 @@ static const struct { IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE | IB_QP_MIN_RNR_TIMER), + [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | + IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | + IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE | + IB_QP_MIN_RNR_TIMER), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | @@ -465,6 +659,8 @@ static const struct { [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */ [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY } @@ -487,6 +683,15 @@ static const struct { IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | @@ -515,6 +720,25 @@ static const struct { IB_QP_PKEY_INDEX | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_INI] = (IB_QP_PORT | + IB_QP_AV | + IB_QP_TIMEOUT | + IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_MAX_QP_RD_ATOMIC | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC_TGT] = (IB_QP_PORT | + IB_QP_AV | + IB_QP_TIMEOUT | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | @@ -579,7 +803,7 @@ int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) { - return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL); + return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); } EXPORT_SYMBOL(ib_modify_qp); @@ -589,11 +813,59 @@ int ib_query_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr) { return qp->device->query_qp ? - qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) : + qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : -ENOSYS; } EXPORT_SYMBOL(ib_query_qp); +int ib_close_qp(struct ib_qp *qp) +{ + struct ib_qp *real_qp; + unsigned long flags; + + real_qp = qp->real_qp; + if (real_qp == qp) + return -EINVAL; + + spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); + list_del(&qp->open_list); + spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + + atomic_dec(&real_qp->usecnt); + kfree(qp); + + return 0; +} +EXPORT_SYMBOL(ib_close_qp); + +static int __ib_destroy_shared_qp(struct ib_qp *qp) +{ + struct ib_xrcd *xrcd; + struct ib_qp *real_qp; + int ret; + + real_qp = qp->real_qp; + xrcd = real_qp->xrcd; + + mutex_lock(&xrcd->tgt_qp_mutex); + ib_close_qp(qp); + if (atomic_read(&real_qp->usecnt) == 0) + list_del(&real_qp->xrcd_list); + else + real_qp = NULL; + mutex_unlock(&xrcd->tgt_qp_mutex); + + if (real_qp) { + ret = ib_destroy_qp(real_qp); + if (!ret) + atomic_dec(&xrcd->usecnt); + else + __ib_insert_xrcd_qp(xrcd, real_qp); + } + + return 0; +} + int ib_destroy_qp(struct ib_qp *qp) { struct ib_pd *pd; @@ -601,16 +873,25 @@ int ib_destroy_qp(struct ib_qp *qp) struct ib_srq *srq; int ret; - pd = qp->pd; - scq = qp->send_cq; - rcq = qp->recv_cq; - srq = qp->srq; + if (atomic_read(&qp->usecnt)) + return -EBUSY; + + if (qp->real_qp != qp) + return __ib_destroy_shared_qp(qp); + + pd = qp->pd; + scq = qp->send_cq; + rcq = qp->recv_cq; + srq = qp->srq; ret = qp->device->destroy_qp(qp); if (!ret) { - atomic_dec(&pd->usecnt); - atomic_dec(&scq->usecnt); - atomic_dec(&rcq->usecnt); + if (pd) + atomic_dec(&pd->usecnt); + if (scq) + atomic_dec(&scq->usecnt); + if (rcq) + atomic_dec(&rcq->usecnt); if (srq) atomic_dec(&srq->usecnt); } @@ -920,3 +1201,42 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) return qp->device->detach_mcast(qp, gid, lid); } EXPORT_SYMBOL(ib_detach_mcast); + +struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device) +{ + struct ib_xrcd *xrcd; + + if (!device->alloc_xrcd) + return ERR_PTR(-ENOSYS); + + xrcd = device->alloc_xrcd(device, NULL, NULL); + if (!IS_ERR(xrcd)) { + xrcd->device = device; + xrcd->inode = NULL; + atomic_set(&xrcd->usecnt, 0); + mutex_init(&xrcd->tgt_qp_mutex); + INIT_LIST_HEAD(&xrcd->tgt_qp_list); + } + + return xrcd; +} +EXPORT_SYMBOL(ib_alloc_xrcd); + +int ib_dealloc_xrcd(struct ib_xrcd *xrcd) +{ + struct ib_qp *qp; + int ret; + + if (atomic_read(&xrcd->usecnt)) + return -EBUSY; + + while (!list_empty(&xrcd->tgt_qp_list)) { + qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list); + ret = ib_destroy_qp(qp); + if (ret) + return ret; + } + + return xrcd->device->dealloc_xrcd(xrcd); +} +EXPORT_SYMBOL(ib_dealloc_xrcd); diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c index 24f9e3a..32d34e8 100644 --- a/drivers/infiniband/hw/amso1100/c2_ae.c +++ b/drivers/infiniband/hw/amso1100/c2_ae.c @@ -288,6 +288,11 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) cm_event.private_data_len = be32_to_cpu(req->private_data_length); cm_event.private_data = req->private_data; + /* + * Until ird/ord negotiation via MPAv2 support is added, send + * max supported values + */ + cm_event.ird = cm_event.ord = 128; if (cm_id->event_handler) cm_id->event_handler(cm_id, &cm_event); diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c index 0ebe4e8..8951db4 100644 --- a/drivers/infiniband/hw/amso1100/c2_intr.c +++ b/drivers/infiniband/hw/amso1100/c2_intr.c @@ -183,6 +183,11 @@ static void handle_vq(struct c2_dev *c2dev, u32 mq_index) case IW_CM_EVENT_ESTABLISHED: c2_set_qp_state(req->qp, C2_QP_STATE_RTS); + /* + * Until ird/ord negotiation via MPAv2 support is added, send + * max supported values + */ + cm_event.ird = cm_event.ord = 128; case IW_CM_EVENT_CLOSE: /* diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index f101bb7..12f923d 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -753,10 +753,7 @@ static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev) memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6); /* Print out the MAC address */ - pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n", - netdev->name, - netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2], - netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]); + pr_debug("%s: MAC %pM\n", netdev->name, netdev->dev_addr); #if 0 /* Disable network packets */ diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 6cd642a..de6d077 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -753,6 +753,11 @@ static void connect_request_upcall(struct iwch_ep *ep) event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); event.provider_data = ep; + /* + * Until ird/ord negotiation via MPAv2 support is added, send max + * supported values + */ + event.ird = event.ord = 8; if (state_read(&ep->parent_ep->com) != DEAD) { get_ep(&ep->com); ep->parent_ep->com.cm_id->event_handler( @@ -770,6 +775,11 @@ static void established_upcall(struct iwch_ep *ep) PDBG("%s ep %p\n", __func__, ep); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_ESTABLISHED; + /* + * Until ird/ord negotiation via MPAv2 support is added, send max + * supported values + */ + event.ird = event.ord = 8; if (ep->com.cm_id) { PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c index 71e0d84..abcc9e7 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_ev.c +++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c @@ -46,6 +46,7 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, struct ib_event event; struct iwch_qp_attributes attrs; struct iwch_qp *qhp; + unsigned long flag; spin_lock(&rnicp->lock); qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe)); @@ -94,7 +95,9 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, if (qhp->ibqp.event_handler) (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); + spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); if (atomic_dec_and_test(&qhp->refcnt)) wake_up(&qhp->wait); @@ -107,6 +110,7 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) struct iwch_cq *chp; struct iwch_qp *qhp; u32 cqid = RSPQ_CQID(rsp_msg); + unsigned long flag; rnicp = (struct iwch_dev *) rdev_p->ulp; spin_lock(&rnicp->lock); @@ -170,7 +174,9 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) */ if (qhp->ep && SQ_TYPE(rsp_msg->cqe)) dst_confirm(qhp->ep->dst); + spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); break; case TPT_ERR_STAG: diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index c7d9411..37c224f 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -190,6 +190,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve chp->rhp = rhp; chp->ibcq.cqe = 1 << chp->cq.size_log2; spin_lock_init(&chp->lock); + spin_lock_init(&chp->comp_handler_lock); atomic_set(&chp->refcnt, 1); init_waitqueue_head(&chp->wait); if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) { diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index 9a342c9..87c14b0 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -103,6 +103,7 @@ struct iwch_cq { struct iwch_dev *rhp; struct t3_cq cq; spinlock_t lock; + spinlock_t comp_handler_lock; atomic_t refcnt; wait_queue_head_t wait; u32 __user *user_rptr_addr; diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index ecd313f..bea5839 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -822,8 +822,11 @@ static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, *flag); - if (flushed) + if (flushed) { + spin_lock_irqsave(&rchp->comp_handler_lock, *flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, *flag); + } /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, *flag); @@ -833,8 +836,11 @@ static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, *flag); - if (flushed) + if (flushed) { + spin_lock_irqsave(&schp->comp_handler_lock, *flag); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, *flag); + } /* deref */ if (atomic_dec_and_test(&qhp->refcnt)) @@ -853,11 +859,15 @@ static void flush_qp(struct iwch_qp *qhp, unsigned long *flag) if (qhp->ibqp.uobject) { cxio_set_wq_in_error(&qhp->wq); cxio_set_cq_in_error(&rchp->cq); + spin_lock_irqsave(&rchp->comp_handler_lock, *flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, *flag); if (schp != rchp) { cxio_set_cq_in_error(&schp->cq); + spin_lock_irqsave(&schp->comp_handler_lock, *flag); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, *flag); } return; } diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 77f769d..b36cdac 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -103,7 +103,8 @@ MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " static int mpa_rev = 1; module_param(mpa_rev, int, 0644); MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " - "1 is spec compliant. (default=1)"); + "1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft" + " compliant (default=1)"); static int markers_enabled; module_param(markers_enabled, int, 0644); @@ -497,17 +498,21 @@ static int send_connect(struct c4iw_ep *ep) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } -static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb) +static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, + u8 mpa_rev_to_use) { int mpalen, wrlen; struct fw_ofld_tx_data_wr *req; struct mpa_message *mpa; + struct mpa_v2_conn_params mpa_v2_params; PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); BUG_ON(skb_cloned(skb)); mpalen = sizeof(*mpa) + ep->plen; + if (mpa_rev_to_use == 2) + mpalen += sizeof(struct mpa_v2_conn_params); wrlen = roundup(mpalen + sizeof *req, 16); skb = get_skb(skb, wrlen, GFP_KERNEL); if (!skb) { @@ -533,12 +538,39 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb) mpa = (struct mpa_message *)(req + 1); memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); mpa->flags = (crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0); + (markers_enabled ? MPA_MARKERS : 0) | + (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0); mpa->private_data_size = htons(ep->plen); - mpa->revision = mpa_rev; + mpa->revision = mpa_rev_to_use; + if (mpa_rev_to_use == 1) + ep->tried_with_mpa_v1 = 1; + + if (mpa_rev_to_use == 2) { + mpa->private_data_size += + htons(sizeof(struct mpa_v2_conn_params)); + mpa_v2_params.ird = htons((u16)ep->ird); + mpa_v2_params.ord = htons((u16)ep->ord); + + if (peer2peer) { + mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); + if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) + mpa_v2_params.ord |= + htons(MPA_V2_RDMA_WRITE_RTR); + else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) + mpa_v2_params.ord |= + htons(MPA_V2_RDMA_READ_RTR); + } + memcpy(mpa->private_data, &mpa_v2_params, + sizeof(struct mpa_v2_conn_params)); - if (ep->plen) - memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); + if (ep->plen) + memcpy(mpa->private_data + + sizeof(struct mpa_v2_conn_params), + ep->mpa_pkt + sizeof(*mpa), ep->plen); + } else + if (ep->plen) + memcpy(mpa->private_data, + ep->mpa_pkt + sizeof(*mpa), ep->plen); /* * Reference the mpa skb. This ensures the data area @@ -562,10 +594,13 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) struct fw_ofld_tx_data_wr *req; struct mpa_message *mpa; struct sk_buff *skb; + struct mpa_v2_conn_params mpa_v2_params; PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); mpalen = sizeof(*mpa) + plen; + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) + mpalen += sizeof(struct mpa_v2_conn_params); wrlen = roundup(mpalen + sizeof *req, 16); skb = get_skb(NULL, wrlen, GFP_KERNEL); @@ -595,8 +630,29 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) mpa->flags = MPA_REJECT; mpa->revision = mpa_rev; mpa->private_data_size = htons(plen); - if (plen) - memcpy(mpa->private_data, pdata, plen); + + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { + mpa->flags |= MPA_ENHANCED_RDMA_CONN; + mpa->private_data_size += + htons(sizeof(struct mpa_v2_conn_params)); + mpa_v2_params.ird = htons(((u16)ep->ird) | + (peer2peer ? MPA_V2_PEER2PEER_MODEL : + 0)); + mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ? + (p2p_type == + FW_RI_INIT_P2PTYPE_RDMA_WRITE ? + MPA_V2_RDMA_WRITE_RTR : p2p_type == + FW_RI_INIT_P2PTYPE_READ_REQ ? + MPA_V2_RDMA_READ_RTR : 0) : 0)); + memcpy(mpa->private_data, &mpa_v2_params, + sizeof(struct mpa_v2_conn_params)); + + if (ep->plen) + memcpy(mpa->private_data + + sizeof(struct mpa_v2_conn_params), pdata, plen); + } else + if (plen) + memcpy(mpa->private_data, pdata, plen); /* * Reference the mpa skb again. This ensures the data area @@ -617,10 +673,13 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) struct fw_ofld_tx_data_wr *req; struct mpa_message *mpa; struct sk_buff *skb; + struct mpa_v2_conn_params mpa_v2_params; PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); mpalen = sizeof(*mpa) + plen; + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) + mpalen += sizeof(struct mpa_v2_conn_params); wrlen = roundup(mpalen + sizeof *req, 16); skb = get_skb(NULL, wrlen, GFP_KERNEL); @@ -649,10 +708,36 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | (markers_enabled ? MPA_MARKERS : 0); - mpa->revision = mpa_rev; + mpa->revision = ep->mpa_attr.version; mpa->private_data_size = htons(plen); - if (plen) - memcpy(mpa->private_data, pdata, plen); + + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { + mpa->flags |= MPA_ENHANCED_RDMA_CONN; + mpa->private_data_size += + htons(sizeof(struct mpa_v2_conn_params)); + mpa_v2_params.ird = htons((u16)ep->ird); + mpa_v2_params.ord = htons((u16)ep->ord); + if (peer2peer && (ep->mpa_attr.p2p_type != + FW_RI_INIT_P2PTYPE_DISABLED)) { + mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); + + if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) + mpa_v2_params.ord |= + htons(MPA_V2_RDMA_WRITE_RTR); + else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) + mpa_v2_params.ord |= + htons(MPA_V2_RDMA_READ_RTR); + } + + memcpy(mpa->private_data, &mpa_v2_params, + sizeof(struct mpa_v2_conn_params)); + + if (ep->plen) + memcpy(mpa->private_data + + sizeof(struct mpa_v2_conn_params), pdata, plen); + } else + if (plen) + memcpy(mpa->private_data, pdata, plen); /* * Reference the mpa skb. This ensures the data area @@ -695,7 +780,10 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) /* start MPA negotiation */ send_flowc(ep, NULL); - send_mpa_req(ep, skb); + if (ep->retry_with_mpa_v1) + send_mpa_req(ep, skb, 1); + else + send_mpa_req(ep, skb, mpa_rev); return 0; } @@ -769,8 +857,19 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) event.remote_addr = ep->com.remote_addr; if ((status == 0) || (status == -ECONNREFUSED)) { - event.private_data_len = ep->plen; - event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); + if (!ep->tried_with_mpa_v1) { + /* this means MPA_v2 is used */ + event.private_data_len = ep->plen - + sizeof(struct mpa_v2_conn_params); + event.private_data = ep->mpa_pkt + + sizeof(struct mpa_message) + + sizeof(struct mpa_v2_conn_params); + } else { + /* this means MPA_v1 is used */ + event.private_data_len = ep->plen; + event.private_data = ep->mpa_pkt + + sizeof(struct mpa_message); + } } PDBG("%s ep %p tid %u status %d\n", __func__, ep, @@ -793,9 +892,22 @@ static void connect_request_upcall(struct c4iw_ep *ep) event.event = IW_CM_EVENT_CONNECT_REQUEST; event.local_addr = ep->com.local_addr; event.remote_addr = ep->com.remote_addr; - event.private_data_len = ep->plen; - event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); event.provider_data = ep; + if (!ep->tried_with_mpa_v1) { + /* this means MPA_v2 is used */ + event.ord = ep->ord; + event.ird = ep->ird; + event.private_data_len = ep->plen - + sizeof(struct mpa_v2_conn_params); + event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) + + sizeof(struct mpa_v2_conn_params); + } else { + /* this means MPA_v1 is used. Send max supported */ + event.ord = c4iw_max_read_depth; + event.ird = c4iw_max_read_depth; + event.private_data_len = ep->plen; + event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); + } if (state_read(&ep->parent_ep->com) != DEAD) { c4iw_get_ep(&ep->com); ep->parent_ep->com.cm_id->event_handler( @@ -813,6 +925,8 @@ static void established_upcall(struct c4iw_ep *ep) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_ESTABLISHED; + event.ird = ep->ird; + event.ord = ep->ord; if (ep->com.cm_id) { PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); @@ -848,7 +962,10 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) { struct mpa_message *mpa; + struct mpa_v2_conn_params *mpa_v2_params; u16 plen; + u16 resp_ird, resp_ord; + u8 rtr_mismatch = 0, insuff_ird = 0; struct c4iw_qp_attributes attrs; enum c4iw_qp_attr_mask mask; int err; @@ -888,7 +1005,9 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) mpa = (struct mpa_message *) ep->mpa_pkt; /* Validate MPA header. */ - if (mpa->revision != mpa_rev) { + if (mpa->revision > mpa_rev) { + printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," + " Received = %d\n", __func__, mpa_rev, mpa->revision); err = -EPROTO; goto err; } @@ -938,13 +1057,66 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; - ep->mpa_attr.version = mpa_rev; - ep->mpa_attr.p2p_type = peer2peer ? p2p_type : - FW_RI_INIT_P2PTYPE_DISABLED; + ep->mpa_attr.version = mpa->revision; + ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; + + if (mpa->revision == 2) { + ep->mpa_attr.enhanced_rdma_conn = + mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; + if (ep->mpa_attr.enhanced_rdma_conn) { + mpa_v2_params = (struct mpa_v2_conn_params *) + (ep->mpa_pkt + sizeof(*mpa)); + resp_ird = ntohs(mpa_v2_params->ird) & + MPA_V2_IRD_ORD_MASK; + resp_ord = ntohs(mpa_v2_params->ord) & + MPA_V2_IRD_ORD_MASK; + + /* + * This is a double-check. Ideally, below checks are + * not required since ird/ord stuff has been taken + * care of in c4iw_accept_cr + */ + if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) { + err = -ENOMEM; + ep->ird = resp_ord; + ep->ord = resp_ird; + insuff_ird = 1; + } + + if (ntohs(mpa_v2_params->ird) & + MPA_V2_PEER2PEER_MODEL) { + if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_WRITE_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_RDMA_WRITE; + else if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_READ_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_READ_REQ; + } + } + } else if (mpa->revision == 1) + if (peer2peer) + ep->mpa_attr.p2p_type = p2p_type; + PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, " - "xmit_marker_enabled=%d, version=%d\n", __func__, - ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, - ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); + "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = " + "%d\n", __func__, ep->mpa_attr.crc_enabled, + ep->mpa_attr.recv_marker_enabled, + ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, + ep->mpa_attr.p2p_type, p2p_type); + + /* + * If responder's RTR does not match with that of initiator, assign + * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not + * generated when moving QP to RTS state. + * A TERM message will be sent after QP has moved to RTS state + */ + if ((ep->mpa_attr.version == 2) && + (ep->mpa_attr.p2p_type != p2p_type)) { + ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; + rtr_mismatch = 1; + } attrs.mpa_attr = ep->mpa_attr; attrs.max_ird = ep->ird; @@ -961,6 +1133,39 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) ep->com.qp, mask, &attrs, 1); if (err) goto err; + + /* + * If responder's RTR requirement did not match with what initiator + * supports, generate TERM message + */ + if (rtr_mismatch) { + printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__); + attrs.layer_etype = LAYER_MPA | DDP_LLP; + attrs.ecode = MPA_NOMATCH_RTR; + attrs.next_state = C4IW_QP_STATE_TERMINATE; + err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + err = -ENOMEM; + goto out; + } + + /* + * Generate TERM if initiator IRD is not sufficient for responder + * provided ORD. Currently, we do the same behaviour even when + * responder provided IRD is also not sufficient as regards to + * initiator ORD. + */ + if (insuff_ird) { + printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n", + __func__); + attrs.layer_etype = LAYER_MPA | DDP_LLP; + attrs.ecode = MPA_INSUFF_IRD; + attrs.next_state = C4IW_QP_STATE_TERMINATE; + err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + err = -ENOMEM; + goto out; + } goto out; err: state_set(&ep->com, ABORTING); @@ -973,6 +1178,7 @@ out: static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) { struct mpa_message *mpa; + struct mpa_v2_conn_params *mpa_v2_params; u16 plen; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); @@ -1013,7 +1219,9 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) /* * Validate MPA Header. */ - if (mpa->revision != mpa_rev) { + if (mpa->revision > mpa_rev) { + printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," + " Received = %d\n", __func__, mpa_rev, mpa->revision); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1056,9 +1264,37 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; - ep->mpa_attr.version = mpa_rev; - ep->mpa_attr.p2p_type = peer2peer ? p2p_type : - FW_RI_INIT_P2PTYPE_DISABLED; + ep->mpa_attr.version = mpa->revision; + if (mpa->revision == 1) + ep->tried_with_mpa_v1 = 1; + ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; + + if (mpa->revision == 2) { + ep->mpa_attr.enhanced_rdma_conn = + mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; + if (ep->mpa_attr.enhanced_rdma_conn) { + mpa_v2_params = (struct mpa_v2_conn_params *) + (ep->mpa_pkt + sizeof(*mpa)); + ep->ird = ntohs(mpa_v2_params->ird) & + MPA_V2_IRD_ORD_MASK; + ep->ord = ntohs(mpa_v2_params->ord) & + MPA_V2_IRD_ORD_MASK; + if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) + if (peer2peer) { + if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_WRITE_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_RDMA_WRITE; + else if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_READ_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_READ_REQ; + } + } + } else if (mpa->revision == 1) + if (peer2peer) + ep->mpa_attr.p2p_type = p2p_type; + PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, " "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__, ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, @@ -1550,6 +1786,112 @@ static int is_neg_adv_abort(unsigned int status) status == CPL_ERR_PERSIST_NEG_ADVICE; } +static int c4iw_reconnect(struct c4iw_ep *ep) +{ + int err = 0; + struct rtable *rt; + struct net_device *pdev; + struct neighbour *neigh; + int step; + + PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); + init_timer(&ep->timer); + + /* + * Allocate an active TID to initiate a TCP connection. + */ + ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); + if (ep->atid == -1) { + printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); + err = -ENOMEM; + goto fail2; + } + + /* find a route */ + rt = find_route(ep->com.dev, + ep->com.cm_id->local_addr.sin_addr.s_addr, + ep->com.cm_id->remote_addr.sin_addr.s_addr, + ep->com.cm_id->local_addr.sin_port, + ep->com.cm_id->remote_addr.sin_port, 0); + if (!rt) { + printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); + err = -EHOSTUNREACH; + goto fail3; + } + ep->dst = &rt->dst; + + neigh = dst_get_neighbour(ep->dst); + + /* get a l2t entry */ + if (neigh->dev->flags & IFF_LOOPBACK) { + PDBG("%s LOOPBACK\n", __func__); + pdev = ip_dev_find(&init_net, + ep->com.cm_id->remote_addr.sin_addr.s_addr); + ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, + neigh, pdev, 0); + ep->mtu = pdev->mtu; + ep->tx_chan = cxgb4_port_chan(pdev); + ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + step = ep->com.dev->rdev.lldi.ntxq / + ep->com.dev->rdev.lldi.nchan; + ep->txq_idx = cxgb4_port_idx(pdev) * step; + step = ep->com.dev->rdev.lldi.nrxq / + ep->com.dev->rdev.lldi.nchan; + ep->ctrlq_idx = cxgb4_port_idx(pdev); + ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[ + cxgb4_port_idx(pdev) * step]; + dev_put(pdev); + } else { + ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, + neigh, neigh->dev, 0); + ep->mtu = dst_mtu(ep->dst); + ep->tx_chan = cxgb4_port_chan(neigh->dev); + ep->smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1; + step = ep->com.dev->rdev.lldi.ntxq / + ep->com.dev->rdev.lldi.nchan; + ep->txq_idx = cxgb4_port_idx(neigh->dev) * step; + ep->ctrlq_idx = cxgb4_port_idx(neigh->dev); + step = ep->com.dev->rdev.lldi.nrxq / + ep->com.dev->rdev.lldi.nchan; + ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[ + cxgb4_port_idx(neigh->dev) * step]; + } + if (!ep->l2t) { + printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); + err = -ENOMEM; + goto fail4; + } + + PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", + __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, + ep->l2t->idx); + + state_set(&ep->com, CONNECTING); + ep->tos = 0; + + /* send connect request to rnic */ + err = send_connect(ep); + if (!err) + goto out; + + cxgb4_l2t_release(ep->l2t); +fail4: + dst_release(ep->dst); +fail3: + cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); +fail2: + /* + * remember to send notification to upper layer. + * We are in here so the upper layer is not aware that this is + * re-connect attempt and so, upper layer is still waiting for + * response of 1st connect request. + */ + connect_reply_upcall(ep, -ECONNRESET); + c4iw_put_ep(&ep->com); +out: + return err; +} + static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss *req = cplhdr(skb); @@ -1573,8 +1915,11 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) /* * Wake up any threads in rdma_init() or rdma_fini(). + * However, this is not needed if com state is just + * MPA_REQ_SENT */ - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + if (ep->com.state != MPA_REQ_SENT) + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); mutex_lock(&ep->com.mutex); switch (ep->com.state) { @@ -1585,7 +1930,21 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) break; case MPA_REQ_SENT: stop_ep_timer(ep); - connect_reply_upcall(ep, -ECONNRESET); + if (mpa_rev == 2 && ep->tried_with_mpa_v1) + connect_reply_upcall(ep, -ECONNRESET); + else { + /* + * we just don't send notification upwards because we + * want to retry with mpa_v1 without upper layers even + * knowing it. + * + * do some housekeeping so as to re-initiate the + * connection + */ + PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__, + mpa_rev); + ep->retry_with_mpa_v1 = 1; + } break; case MPA_REP_SENT: break; @@ -1621,7 +1980,9 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) dst_confirm(ep->dst); if (ep->com.state != ABORTING) { __state_set(&ep->com, DEAD); - release = 1; + /* we don't release if we want to retry with mpa_v1 */ + if (!ep->retry_with_mpa_v1) + release = 1; } mutex_unlock(&ep->com.mutex); @@ -1641,6 +2002,15 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) out: if (release) release_ep_resources(ep); + + /* retry with mpa-v1 */ + if (ep && ep->retry_with_mpa_v1) { + cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_reconnect(ep); + } + return 0; } @@ -1792,18 +2162,40 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto err; } - cm_id->add_ref(cm_id); - ep->com.cm_id = cm_id; - ep->com.qp = qp; + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { + if (conn_param->ord > ep->ird) { + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + send_mpa_reject(ep, conn_param->private_data, + conn_param->private_data_len); + abort_connection(ep, NULL, GFP_KERNEL); + err = -ENOMEM; + goto err; + } + if (conn_param->ird > ep->ord) { + if (!ep->ord) + conn_param->ird = 1; + else { + abort_connection(ep, NULL, GFP_KERNEL); + err = -ENOMEM; + goto err; + } + } + } ep->ird = conn_param->ird; ep->ord = conn_param->ord; - if (peer2peer && ep->ird == 0) - ep->ird = 1; + if (ep->mpa_attr.version != 2) + if (peer2peer && ep->ird == 0) + ep->ird = 1; PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); + cm_id->add_ref(cm_id); + ep->com.cm_id = cm_id; + ep->com.qp = qp; + /* bind QP to EP and move to RTS */ attrs.mpa_attr = ep->mpa_attr; attrs.max_ird = ep->ird; @@ -1944,6 +2336,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->com.dev->rdev.lldi.nchan; ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[ cxgb4_port_idx(neigh->dev) * step]; + ep->retry_with_mpa_v1 = 0; + ep->tried_with_mpa_v1 = 0; } if (!ep->l2t) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); @@ -2323,8 +2717,11 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) /* * Wake up any threads in rdma_init() or rdma_fini(). + * However, this is not needed if com state is just + * MPA_REQ_SENT */ - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + if (ep->com.state != MPA_REQ_SENT) + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); sched(dev, skb); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 1720dc7..f35a935 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -185,7 +185,7 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) V_CQE_OPCODE(FW_RI_SEND) | V_CQE_TYPE(0) | V_CQE_SWCQE(1) | - V_CQE_QPID(wq->rq.qid)); + V_CQE_QPID(wq->sq.qid)); cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen)); cq->sw_queue[cq->sw_pidx] = cqe; t4_swcq_produce(cq); @@ -818,6 +818,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, chp->cq.size--; /* status page */ chp->ibcq.cqe = entries - 2; spin_lock_init(&chp->lock); + spin_lock_init(&chp->comp_handler_lock); atomic_set(&chp->refcnt, 1); init_waitqueue_head(&chp->wait); ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 40a13cc..6d0df6e 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -376,10 +376,8 @@ struct uld_ctx { struct c4iw_dev *dev; }; -static void c4iw_remove(struct uld_ctx *ctx) +static void c4iw_dealloc(struct uld_ctx *ctx) { - PDBG("%s c4iw_dev %p\n", __func__, ctx->dev); - c4iw_unregister_device(ctx->dev); c4iw_rdev_close(&ctx->dev->rdev); idr_destroy(&ctx->dev->cqidr); idr_destroy(&ctx->dev->qpidr); @@ -389,11 +387,30 @@ static void c4iw_remove(struct uld_ctx *ctx) ctx->dev = NULL; } +static void c4iw_remove(struct uld_ctx *ctx) +{ + PDBG("%s c4iw_dev %p\n", __func__, ctx->dev); + c4iw_unregister_device(ctx->dev); + c4iw_dealloc(ctx); +} + +static int rdma_supported(const struct cxgb4_lld_info *infop) +{ + return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 && + infop->vr->rq.size > 0 && infop->vr->qp.size > 0 && + infop->vr->cq.size > 0 && infop->vr->ocq.size > 0; +} + static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) { struct c4iw_dev *devp; int ret; + if (!rdma_supported(infop)) { + printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n", + pci_name(infop->pdev)); + return ERR_PTR(-ENOSYS); + } devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp)); if (!devp) { printk(KERN_ERR MOD "Cannot allocate ib device\n"); @@ -414,7 +431,6 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) ret = c4iw_rdev_open(&devp->rdev); if (ret) { - mutex_unlock(&dev_mutex); printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret); ib_dealloc_device(&devp->ibdev); return ERR_PTR(ret); @@ -519,15 +535,24 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state) case CXGB4_STATE_UP: printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev)); if (!ctx->dev) { - int ret = 0; + int ret; ctx->dev = c4iw_alloc(&ctx->lldi); - if (!IS_ERR(ctx->dev)) - ret = c4iw_register_device(ctx->dev); - if (IS_ERR(ctx->dev) || ret) + if (IS_ERR(ctx->dev)) { + printk(KERN_ERR MOD + "%s: initialization failed: %ld\n", + pci_name(ctx->lldi.pdev), + PTR_ERR(ctx->dev)); + ctx->dev = NULL; + break; + } + ret = c4iw_register_device(ctx->dev); + if (ret) { printk(KERN_ERR MOD "%s: RDMA registration failed: %d\n", pci_name(ctx->lldi.pdev), ret); + c4iw_dealloc(ctx); + } } break; case CXGB4_STATE_DOWN: diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index c13041a..397cb36 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -42,6 +42,7 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp, { struct ib_event event; struct c4iw_qp_attributes attrs; + unsigned long flag; if ((qhp->attr.state == C4IW_QP_STATE_ERROR) || (qhp->attr.state == C4IW_QP_STATE_TERMINATE)) { @@ -72,7 +73,9 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp, if (qhp->ibqp.event_handler) (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); + spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); } void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) @@ -183,11 +186,14 @@ out: int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) { struct c4iw_cq *chp; + unsigned long flag; chp = get_chp(dev, qid); - if (chp) + if (chp) { + spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); - else + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); + } else PDBG("%s unknown cqid 0x%x\n", __func__, qid); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 4f04537..1357c5b 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -309,6 +309,7 @@ struct c4iw_cq { struct c4iw_dev *rhp; struct t4_cq cq; spinlock_t lock; + spinlock_t comp_handler_lock; atomic_t refcnt; wait_queue_head_t wait; }; @@ -323,6 +324,7 @@ struct c4iw_mpa_attributes { u8 recv_marker_enabled; u8 xmit_marker_enabled; u8 crc_enabled; + u8 enhanced_rdma_conn; u8 version; u8 p2p_type; }; @@ -349,6 +351,8 @@ struct c4iw_qp_attributes { u8 is_terminate_local; struct c4iw_mpa_attributes mpa_attr; struct c4iw_ep *llp_stream_handle; + u8 layer_etype; + u8 ecode; }; struct c4iw_qp { @@ -501,11 +505,18 @@ enum c4iw_mmid_state { #define MPA_KEY_REP "MPA ID Rep Frame" #define MPA_MAX_PRIVATE_DATA 256 +#define MPA_ENHANCED_RDMA_CONN 0x10 #define MPA_REJECT 0x20 #define MPA_CRC 0x40 #define MPA_MARKERS 0x80 #define MPA_FLAGS_MASK 0xE0 +#define MPA_V2_PEER2PEER_MODEL 0x8000 +#define MPA_V2_ZERO_LEN_FPDU_RTR 0x4000 +#define MPA_V2_RDMA_WRITE_RTR 0x8000 +#define MPA_V2_RDMA_READ_RTR 0x4000 +#define MPA_V2_IRD_ORD_MASK 0x3FFF + #define c4iw_put_ep(ep) { \ PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ ep, atomic_read(&((ep)->kref.refcount))); \ @@ -528,6 +539,11 @@ struct mpa_message { u8 private_data[0]; }; +struct mpa_v2_conn_params { + __be16 ird; + __be16 ord; +}; + struct terminate_message { u8 layer_etype; u8 ecode; @@ -580,7 +596,10 @@ enum c4iw_ddp_ecodes { enum c4iw_mpa_ecodes { MPA_CRC_ERR = 0x02, - MPA_MARKER_ERR = 0x03 + MPA_MARKER_ERR = 0x03, + MPA_LOCAL_CATA = 0x05, + MPA_INSUFF_IRD = 0x06, + MPA_NOMATCH_RTR = 0x07, }; enum c4iw_ep_state { @@ -651,6 +670,8 @@ struct c4iw_ep { u16 txq_idx; u16 ctrlq_idx; u8 tos; + u8 retry_with_mpa_v1; + u8 tried_with_mpa_v1; }; static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index a41578e..d6ccc7e 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -917,7 +917,11 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, wqe->u.terminate.type = FW_RI_TYPE_TERMINATE; wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term); term = (struct terminate_message *)wqe->u.terminate.termmsg; - build_term_codes(err_cqe, &term->layer_etype, &term->ecode); + if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) { + term->layer_etype = qhp->attr.layer_etype; + term->ecode = qhp->attr.ecode; + } else + build_term_codes(err_cqe, &term->layer_etype, &term->ecode); c4iw_ofld_send(&qhp->rhp->rdev, skb); } @@ -941,8 +945,11 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, flag); - if (flushed) + if (flushed) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, flag); @@ -952,13 +959,17 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, flag); - if (flushed) + if (flushed) { + spin_lock_irqsave(&schp->comp_handler_lock, flag); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } } static void flush_qp(struct c4iw_qp *qhp) { struct c4iw_cq *rchp, *schp; + unsigned long flag; rchp = get_chp(qhp->rhp, qhp->attr.rcq); schp = get_chp(qhp->rhp, qhp->attr.scq); @@ -966,8 +977,16 @@ static void flush_qp(struct c4iw_qp *qhp) if (qhp->ibqp.uobject) { t4_set_wq_in_error(&qhp->wq); t4_set_cq_in_error(&rchp->cq); - if (schp != rchp) + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + if (schp != rchp) { t4_set_cq_in_error(&schp->cq); + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } return; } __flush_qp(qhp, rchp, schp); @@ -1012,6 +1031,7 @@ out: static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init) { + PDBG("%s p2p_type = %d\n", __func__, p2p_type); memset(&init->u, 0, sizeof init->u); switch (p2p_type) { case FW_RI_INIT_P2PTYPE_RDMA_WRITE: @@ -1206,12 +1226,16 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, disconnect = 1; c4iw_get_ep(&qhp->ep->com); } + if (qhp->ibqp.uobject) + t4_set_wq_in_error(&qhp->wq); ret = rdma_fini(rhp, qhp, ep); if (ret) goto err; break; case C4IW_QP_STATE_TERMINATE: set_state(qhp, C4IW_QP_STATE_TERMINATE); + qhp->attr.layer_etype = attrs->layer_etype; + qhp->attr.ecode = attrs->ecode; if (qhp->ibqp.uobject) t4_set_wq_in_error(&qhp->wq); ep = qhp->ep; @@ -1222,6 +1246,8 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, break; case C4IW_QP_STATE_ERROR: set_state(qhp, C4IW_QP_STATE_ERROR); + if (qhp->ibqp.uobject) + t4_set_wq_in_error(&qhp->wq); if (!internal) { abort = 1; disconnect = 1; @@ -1334,7 +1360,10 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) rhp = qhp->rhp; attrs.next_state = C4IW_QP_STATE_ERROR; - c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + if (qhp->attr.state == C4IW_QP_STATE_TERMINATE) + c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + else + c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); wait_event(qhp->wait, !qhp->ep); remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index d9b1bb4..818d721 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -125,7 +125,7 @@ int ehca_create_eq(struct ehca_shca *shca, tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq, - IRQF_DISABLED, "ehca_eq", + 0, "ehca_eq", (void *)shca); if (ret < 0) ehca_err(ib_dev, "Can't map interrupt handler."); @@ -133,7 +133,7 @@ int ehca_create_eq(struct ehca_shca *shca, tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq, - IRQF_DISABLED, "ehca_neq", + 0, "ehca_neq", (void *)shca); if (ret < 0) ehca_err(ib_dev, "Can't map interrupt handler."); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 32fb342..964f855 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -977,6 +977,9 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, struct hcp_modify_qp_control_block *mqpcb; u64 hret, update_mask; + if (srq_init_attr->srq_type != IB_SRQT_BASIC) + return ERR_PTR(-ENOSYS); + /* For common attributes, internal_create_qp() takes its info * out of qp_init_attr, so copy all common attrs there. */ diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 7c1eebe..824a4d5 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -34,6 +34,7 @@ #include <linux/pci.h> #include <linux/netdevice.h> #include <linux/slab.h> +#include <linux/stat.h> #include <linux/vmalloc.h> #include "ipath_kernel.h" diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index 386e2c7..2627198 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -107,6 +107,11 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, u32 sz; struct ib_srq *ret; + if (srq_init_attr->srq_type != IB_SRQT_BASIC) { + ret = ERR_PTR(-ENOSYS); + goto done; + } + if (srq_init_attr->attr.max_wr == 0) { ret = ERR_PTR(-EINVAL); goto done; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index fa643f4..77f3dbc 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -128,6 +128,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) + props->device_cap_flags |= IB_DEVICE_XRC; props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; @@ -181,8 +183,12 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num) static int ib_link_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props, + struct ib_smp *in_mad, struct ib_smp *out_mad) { + int ext_active_speed; + int err; + props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); @@ -203,6 +209,39 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port, props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = out_mad->data[41] >> 4; + /* Check if extended speeds (EDR/FDR/...) are supported */ + if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { + ext_active_speed = out_mad->data[62] >> 4; + + switch (ext_active_speed) { + case 1: + props->active_speed = 16; /* FDR */ + break; + case 2: + props->active_speed = 32; /* EDR */ + break; + } + } + + /* If reported active speed is QDR, check if is FDR-10 */ + if (props->active_speed == 4) { + if (to_mdev(ibdev)->dev->caps.ext_port_cap[port] & + MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { + init_query_mad(in_mad); + in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, + NULL, NULL, in_mad, out_mad); + if (err) + return err; + + /* Checking LinkSpeedActive for FDR-10 */ + if (out_mad->data[15] & 0x1) + props->active_speed = 8; + } + } + return 0; } @@ -227,7 +266,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, props->pkey_tbl_len = 1; props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); - props->max_mtu = IB_MTU_2048; + props->max_mtu = IB_MTU_4096; props->subnet_timeout = 0; props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = 0; @@ -274,7 +313,7 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, goto out; err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? - ib_link_query_port(ibdev, port, props, out_mad) : + ib_link_query_port(ibdev, port, props, in_mad, out_mad) : eth_link_query_port(ibdev, port, props, out_mad); out: @@ -566,6 +605,57 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd) return 0; } +static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx4_ib_xrcd *xrcd; + int err; + + if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) + return ERR_PTR(-ENOSYS); + + xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL); + if (!xrcd) + return ERR_PTR(-ENOMEM); + + err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn); + if (err) + goto err1; + + xrcd->pd = ib_alloc_pd(ibdev); + if (IS_ERR(xrcd->pd)) { + err = PTR_ERR(xrcd->pd); + goto err2; + } + + xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0); + if (IS_ERR(xrcd->cq)) { + err = PTR_ERR(xrcd->cq); + goto err3; + } + + return &xrcd->ibxrcd; + +err3: + ib_dealloc_pd(xrcd->pd); +err2: + mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn); +err1: + kfree(xrcd); + return ERR_PTR(err); +} + +static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd) +{ + ib_destroy_cq(to_mxrcd(xrcd)->cq); + ib_dealloc_pd(to_mxrcd(xrcd)->pd); + mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn); + kfree(xrcd); + + return 0; +} + static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) { struct mlx4_ib_qp *mqp = to_mqp(ibqp); @@ -1044,7 +1134,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | + (1ull << IB_USER_VERBS_CMD_OPEN_QP); ibdev->ib_dev.query_device = mlx4_ib_query_device; ibdev->ib_dev.query_port = mlx4_ib_query_port; @@ -1093,6 +1185,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { + ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; + ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd; + ibdev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | + (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + } + spin_lock_init(&iboe->lock); if (init_node_data(ibdev)) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index e4bf2cf..ed80345 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -56,6 +56,13 @@ struct mlx4_ib_pd { u32 pdn; }; +struct mlx4_ib_xrcd { + struct ib_xrcd ibxrcd; + u32 xrcdn; + struct ib_pd *pd; + struct ib_cq *cq; +}; + struct mlx4_ib_cq_buf { struct mlx4_buf buf; struct mlx4_mtt mtt; @@ -138,6 +145,7 @@ struct mlx4_ib_qp { struct mlx4_mtt mtt; int buf_size; struct mutex mutex; + u16 xrcdn; u32 flags; u8 port; u8 alt_port; @@ -211,6 +219,11 @@ static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd) return container_of(ibpd, struct mlx4_ib_pd, ibpd); } +static inline struct mlx4_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd) +{ + return container_of(ibxrcd, struct mlx4_ib_xrcd, ibxrcd); +} + static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq) { return container_of(ibcq, struct mlx4_ib_cq, ibcq); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 3a91d9d..a16f0c8 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -302,15 +302,14 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags) } static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - int is_user, int has_srq, struct mlx4_ib_qp *qp) + int is_user, int has_rq, struct mlx4_ib_qp *qp) { /* Sanity check RQ size before proceeding */ if (cap->max_recv_wr > dev->dev->caps.max_wqes || cap->max_recv_sge > dev->dev->caps.max_rq_sg) return -EINVAL; - if (has_srq) { - /* QPs attached to an SRQ should have no RQ */ + if (!has_rq) { if (cap->max_recv_wr) return -EINVAL; @@ -463,6 +462,14 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev, return 0; } +static int qp_has_rq(struct ib_qp_init_attr *attr) +{ + if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) + return 0; + + return !attr->srq; +} + static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp) @@ -479,7 +486,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp); + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp); if (err) goto err; @@ -513,7 +520,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_mtt; - if (!init_attr->srq) { + if (qp_has_rq(init_attr)) { err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), ucmd.db_addr, &qp->db); if (err) @@ -532,7 +539,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err; - if (!init_attr->srq) { + if (qp_has_rq(init_attr)) { err = mlx4_db_alloc(dev->dev, &qp->db, 0); if (err) goto err; @@ -575,6 +582,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_qpn; + if (init_attr->qp_type == IB_QPT_XRC_TGT) + qp->mqp.qpn |= (1 << 23); + /* * Hardware wants QPN written in big-endian order (after * shifting) for send doorbell. Precompute this value to save @@ -592,9 +602,8 @@ err_qpn: err_wrid: if (pd->uobject) { - if (!init_attr->srq) - mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), - &qp->db); + if (qp_has_rq(init_attr)) + mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); } else { kfree(qp->sq.wrid); kfree(qp->rq.wrid); @@ -610,7 +619,7 @@ err_buf: mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); err_db: - if (!pd->uobject && !init_attr->srq) + if (!pd->uobject && qp_has_rq(init_attr)) mlx4_db_free(dev->dev, &qp->db); err: @@ -671,6 +680,33 @@ static void del_gid_entries(struct mlx4_ib_qp *qp) } } +static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp) +{ + if (qp->ibqp.qp_type == IB_QPT_XRC_TGT) + return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd); + else + return to_mpd(qp->ibqp.pd); +} + +static void get_cqs(struct mlx4_ib_qp *qp, + struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq) +{ + switch (qp->ibqp.qp_type) { + case IB_QPT_XRC_TGT: + *send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq); + *recv_cq = *send_cq; + break; + case IB_QPT_XRC_INI: + *send_cq = to_mcq(qp->ibqp.send_cq); + *recv_cq = *send_cq; + break; + default: + *send_cq = to_mcq(qp->ibqp.send_cq); + *recv_cq = to_mcq(qp->ibqp.recv_cq); + break; + } +} + static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, int is_user) { @@ -682,8 +718,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n", qp->mqp.qpn); - send_cq = to_mcq(qp->ibqp.send_cq); - recv_cq = to_mcq(qp->ibqp.recv_cq); + get_cqs(qp, &send_cq, &recv_cq); mlx4_ib_lock_cqs(send_cq, recv_cq); @@ -706,7 +741,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, mlx4_mtt_cleanup(dev->dev, &qp->mtt); if (is_user) { - if (!qp->ibqp.srq) + if (qp->rq.wqe_cnt) mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), &qp->db); ib_umem_release(qp->umem); @@ -714,7 +749,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, kfree(qp->sq.wrid); kfree(qp->rq.wrid); mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); - if (!qp->ibqp.srq) + if (qp->rq.wqe_cnt) mlx4_db_free(dev->dev, &qp->db); } @@ -725,10 +760,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { - struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_sqp *sqp; struct mlx4_ib_qp *qp; int err; + u16 xrcdn = 0; /* * We only support LSO and multicast loopback blocking, and @@ -739,10 +774,20 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); if (init_attr->create_flags && - (pd->uobject || init_attr->qp_type != IB_QPT_UD)) + (udata || init_attr->qp_type != IB_QPT_UD)) return ERR_PTR(-EINVAL); switch (init_attr->qp_type) { + case IB_QPT_XRC_TGT: + pd = to_mxrcd(init_attr->xrcd)->pd; + xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; + init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq; + /* fall through */ + case IB_QPT_XRC_INI: + if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) + return ERR_PTR(-ENOSYS); + init_attr->recv_cq = init_attr->send_cq; + /* fall through */ case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_UD: @@ -751,13 +796,14 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, if (!qp) return ERR_PTR(-ENOMEM); - err = create_qp_common(dev, pd, init_attr, udata, 0, qp); + err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 0, qp); if (err) { kfree(qp); return ERR_PTR(err); } qp->ibqp.qp_num = qp->mqp.qpn; + qp->xrcdn = xrcdn; break; } @@ -765,7 +811,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_GSI: { /* Userspace is not allowed to create special QPs: */ - if (pd->uobject) + if (udata) return ERR_PTR(-EINVAL); sqp = kzalloc(sizeof *sqp, GFP_KERNEL); @@ -774,8 +820,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, qp = &sqp->qp; - err = create_qp_common(dev, pd, init_attr, udata, - dev->dev->caps.sqp_start + + err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, + to_mdev(pd->device)->dev->caps.sqp_start + (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) + init_attr->port_num - 1, qp); @@ -801,11 +847,13 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp) { struct mlx4_ib_dev *dev = to_mdev(qp->device); struct mlx4_ib_qp *mqp = to_mqp(qp); + struct mlx4_ib_pd *pd; if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); - destroy_qp_common(dev, mqp, !!qp->pd->uobject); + pd = get_pd(mqp); + destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); if (is_sqp(dev, mqp)) kfree(to_msqp(mqp)); @@ -821,6 +869,8 @@ static int to_mlx4_st(enum ib_qp_type type) case IB_QPT_RC: return MLX4_QP_ST_RC; case IB_QPT_UC: return MLX4_QP_ST_UC; case IB_QPT_UD: return MLX4_QP_ST_UD; + case IB_QPT_XRC_INI: + case IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC; case IB_QPT_SMI: case IB_QPT_GSI: return MLX4_QP_ST_MLX; default: return -1; @@ -959,6 +1009,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, { struct mlx4_ib_dev *dev = to_mdev(ibqp->device); struct mlx4_ib_qp *qp = to_mqp(ibqp); + struct mlx4_ib_pd *pd; + struct mlx4_ib_cq *send_cq, *recv_cq; struct mlx4_qp_context *context; enum mlx4_qp_optpar optpar = 0; int sqd_event; @@ -1014,8 +1066,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; context->sq_size_stride |= qp->sq.wqe_shift - 4; - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { context->sq_size_stride |= !!qp->sq_no_prefetch << 7; + context->xrcd = cpu_to_be32((u32) qp->xrcdn); + } if (qp->ibqp.uobject) context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index); @@ -1079,8 +1133,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; } - context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn); - context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); + pd = get_pd(qp); + get_cqs(qp, &send_cq, &recv_cq); + context->pd = cpu_to_be32(pd->pdn); + context->cqn_send = cpu_to_be32(send_cq->mcq.cqn); + context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn); + context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); /* Set "fast registration enabled" for all kernel QPs */ if (!qp->ibqp.uobject) @@ -1106,8 +1164,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_SQ_PSN) context->next_send_psn = cpu_to_be32(attr->sq_psn); - context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn); - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { if (attr->max_dest_rd_atomic) context->params2 |= @@ -1130,8 +1186,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_RQ_PSN) context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); - context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn); - if (attr_mask & IB_QP_QKEY) { context->qkey = cpu_to_be32(attr->qkey); optpar |= MLX4_QP_OPTPAR_Q_KEY; @@ -1140,7 +1194,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (ibqp->srq) context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); - if (!ibqp->srq && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->db_rec_addr = cpu_to_be64(qp->db.dma); if (cur_state == IB_QPS_INIT && @@ -1225,17 +1279,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, * entries and reinitialize the QP. */ if (new_state == IB_QPS_RESET && !ibqp->uobject) { - mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn, + mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, ibqp->srq ? to_msrq(ibqp->srq): NULL); - if (ibqp->send_cq != ibqp->recv_cq) - mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL); + if (send_cq != recv_cq) + mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); qp->rq.head = 0; qp->rq.tail = 0; qp->sq.head = 0; qp->sq.tail = 0; qp->sq_next_wqe = 0; - if (!ibqp->srq) + if (qp->rq.wqe_cnt) *qp->db.db = 0; } @@ -1547,14 +1601,13 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, } static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr, __be16 *vlan) + struct ib_send_wr *wr) { memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); - *vlan = dseg->vlan; } static void set_mlx_icrc_seg(void *dseg) @@ -1657,7 +1710,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, __be32 uninitialized_var(lso_hdr_sz); __be32 blh; int i; - __be16 vlan = cpu_to_be16(0xffff); spin_lock_irqsave(&qp->sq.lock, flags); @@ -1761,7 +1813,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_QPT_UD: - set_datagram_seg(wqe, wr, &vlan); + set_datagram_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; @@ -1824,11 +1876,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? MLX4_WQE_CTRL_FENCE : 0) | size; - if (be16_to_cpu(vlan) < 0x1000) { - ctrl->ins_vlan = 1 << 6; - ctrl->vlan_tag = vlan; - } - /* * Make sure descriptor is fully written before * setting ownership bit (because HW can start diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 818b7ec..39542f3 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -76,6 +76,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, struct mlx4_ib_srq *srq; struct mlx4_wqe_srq_next_seg *next; struct mlx4_wqe_data_seg *scatter; + u32 cqn; + u16 xrcdn; int desc_size; int buf_size; int err; @@ -174,12 +176,18 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, } } - err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, &srq->mtt, + cqn = (init_attr->srq_type == IB_SRQT_XRC) ? + to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0; + xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ? + to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn : + (u16) dev->dev->caps.reserved_xrcds; + err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt, srq->db.dma, &srq->msrq); if (err) goto err_wrid; srq->msrq.event = mlx4_ib_srq_event; + srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; if (pd->uobject) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 365fe0e..cb9a0b9 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -438,6 +438,9 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd, struct mthca_srq *srq; int err; + if (init_attr->srq_type != IB_SRQT_BASIC) + return ERR_PTR(-ENOSYS); + srq = kmalloc(sizeof *srq, GFP_KERNEL); if (!srq) return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 401b7bb..dfce9ea 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -77,26 +77,19 @@ atomic_t cm_nodes_destroyed; atomic_t cm_accel_dropped_pkts; atomic_t cm_resets_recvd; -static inline int mini_cm_accelerated(struct nes_cm_core *, - struct nes_cm_node *); -static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, - struct nes_vnic *, struct nes_cm_info *); +static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *); +static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, struct nes_vnic *, struct nes_cm_info *); static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *); -static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *, - struct nes_vnic *, u16, void *, struct nes_cm_info *); +static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *, struct nes_vnic *, u16, void *, struct nes_cm_info *); static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *); -static int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *, - struct nes_cm_node *); -static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *, - struct nes_cm_node *); -static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, - struct sk_buff *); +static int mini_cm_accept(struct nes_cm_core *, struct nes_cm_node *); +static int mini_cm_reject(struct nes_cm_core *, struct nes_cm_node *); +static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *); static int mini_cm_dealloc_core(struct nes_cm_core *); static int mini_cm_get(struct nes_cm_core *); static int mini_cm_set(struct nes_cm_core *, u32, u32); -static void form_cm_frame(struct sk_buff *, struct nes_cm_node *, - void *, u32, void *, u32, u8); +static void form_cm_frame(struct sk_buff *, struct nes_cm_node *, void *, u32, void *, u32, u8); static int add_ref_cm_node(struct nes_cm_node *); static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *); @@ -111,16 +104,14 @@ static int send_syn(struct nes_cm_node *, u32, struct sk_buff *); static int send_reset(struct nes_cm_node *, struct sk_buff *); static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb); static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb); -static void process_packet(struct nes_cm_node *, struct sk_buff *, - struct nes_cm_core *); +static void process_packet(struct nes_cm_node *, struct sk_buff *, struct nes_cm_core *); static void active_open_err(struct nes_cm_node *, struct sk_buff *, int); static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int); static void cleanup_retrans_entry(struct nes_cm_node *); static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *); static void free_retrans_entry(struct nes_cm_node *cm_node); -static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, - struct sk_buff *skb, int optionsize, int passive); +static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, struct sk_buff *skb, int optionsize, int passive); /* CM event handler functions */ static void cm_event_connected(struct nes_cm_event *); @@ -130,6 +121,12 @@ static void cm_event_mpa_req(struct nes_cm_event *); static void cm_event_mpa_reject(struct nes_cm_event *); static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node); +/* MPA build functions */ +static int cm_build_mpa_frame(struct nes_cm_node *, u8 **, u16 *, u8 *, u8); +static void build_mpa_v2(struct nes_cm_node *, void *, u8); +static void build_mpa_v1(struct nes_cm_node *, void *, u8); +static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **); + static void print_core(struct nes_cm_core *core); /* External CM API Interface */ @@ -172,8 +169,8 @@ int nes_rem_ref_cm_node(struct nes_cm_node *cm_node) /** * create_event */ -static struct nes_cm_event *create_event(struct nes_cm_node *cm_node, - enum nes_cm_event_type type) +static struct nes_cm_event *create_event(struct nes_cm_node * cm_node, + enum nes_cm_event_type type) { struct nes_cm_event *event; @@ -195,10 +192,10 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node, event->cm_info.cm_id = cm_node->cm_id; nes_debug(NES_DBG_CM, "cm_node=%p Created event=%p, type=%u, " - "dst_addr=%08x[%x], src_addr=%08x[%x]\n", - cm_node, event, type, event->cm_info.loc_addr, - event->cm_info.loc_port, event->cm_info.rem_addr, - event->cm_info.rem_port); + "dst_addr=%08x[%x], src_addr=%08x[%x]\n", + cm_node, event, type, event->cm_info.loc_addr, + event->cm_info.loc_port, event->cm_info.rem_addr, + event->cm_info.rem_port); nes_cm_post_event(event); return event; @@ -210,14 +207,19 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node, */ static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb) { + u8 start_addr = 0; + u8 *start_ptr = &start_addr; + u8 **start_buff = &start_ptr; + u16 buff_len = 0; + if (!skb) { nes_debug(NES_DBG_CM, "skb set to NULL\n"); return -1; } /* send an MPA Request frame */ - form_cm_frame(skb, cm_node, NULL, 0, &cm_node->mpa_frame, - cm_node->mpa_frame_size, SET_ACK); + cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REQUEST); + form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK); return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); } @@ -226,7 +228,11 @@ static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb) static int send_mpa_reject(struct nes_cm_node *cm_node) { - struct sk_buff *skb = NULL; + struct sk_buff *skb = NULL; + u8 start_addr = 0; + u8 *start_ptr = &start_addr; + u8 **start_buff = &start_ptr; + u16 buff_len = 0; skb = dev_alloc_skb(MAX_CM_BUFFER); if (!skb) { @@ -235,8 +241,8 @@ static int send_mpa_reject(struct nes_cm_node *cm_node) } /* send an MPA reject frame */ - form_cm_frame(skb, cm_node, NULL, 0, &cm_node->mpa_frame, - cm_node->mpa_frame_size, SET_ACK | SET_FIN); + cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REPLY); + form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK | SET_FIN); cm_node->state = NES_CM_STATE_FIN_WAIT1; return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); @@ -248,24 +254,31 @@ static int send_mpa_reject(struct nes_cm_node *cm_node) * IETF MPA frame */ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, - u32 len) + u32 len) { - struct ietf_mpa_frame *mpa_frame; + struct ietf_mpa_v1 *mpa_frame; + struct ietf_mpa_v2 *mpa_v2_frame; + struct ietf_rtr_msg *rtr_msg; + int mpa_hdr_len; + int priv_data_len; *type = NES_MPA_REQUEST_ACCEPT; /* assume req frame is in tcp data payload */ - if (len < sizeof(struct ietf_mpa_frame)) { + if (len < sizeof(struct ietf_mpa_v1)) { nes_debug(NES_DBG_CM, "The received ietf buffer was too small (%x)\n", len); return -EINVAL; } - mpa_frame = (struct ietf_mpa_frame *)buffer; - cm_node->mpa_frame_size = ntohs(mpa_frame->priv_data_len); + /* points to the beginning of the frame, which could be MPA V1 or V2 */ + mpa_frame = (struct ietf_mpa_v1 *)buffer; + mpa_hdr_len = sizeof(struct ietf_mpa_v1); + priv_data_len = ntohs(mpa_frame->priv_data_len); + /* make sure mpa private data len is less than 512 bytes */ - if (cm_node->mpa_frame_size > IETF_MAX_PRIV_DATA_LEN) { + if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) { nes_debug(NES_DBG_CM, "The received Length of Private" - " Data field exceeds 512 octets\n"); + " Data field exceeds 512 octets\n"); return -EINVAL; } /* @@ -273,11 +286,22 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, * received MPA version and MPA key information * */ - if (mpa_frame->rev != mpa_version) { + if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) { + nes_debug(NES_DBG_CM, "The received mpa version" + " is not supported\n"); + return -EINVAL; + } + /* + * backwards compatibility only + */ + if (mpa_frame->rev > cm_node->mpa_frame_rev) { nes_debug(NES_DBG_CM, "The received mpa version" - " can not be interoperated\n"); + " can not be interoperated\n"); return -EINVAL; + } else { + cm_node->mpa_frame_rev = mpa_frame->rev; } + if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) { if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) { nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n"); @@ -290,25 +314,75 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, } } - if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) { + + if (priv_data_len + mpa_hdr_len != len) { nes_debug(NES_DBG_CM, "The received ietf buffer was not right" - " complete (%x + %x != %x)\n", - cm_node->mpa_frame_size, - (u32)sizeof(struct ietf_mpa_frame), len); + " complete (%x + %x != %x)\n", + priv_data_len, mpa_hdr_len, len); return -EINVAL; } /* make sure it does not exceed the max size */ if (len > MAX_CM_BUFFER) { nes_debug(NES_DBG_CM, "The received ietf buffer was too large" - " (%x + %x != %x)\n", - cm_node->mpa_frame_size, - (u32)sizeof(struct ietf_mpa_frame), len); + " (%x + %x != %x)\n", + priv_data_len, mpa_hdr_len, len); return -EINVAL; } + cm_node->mpa_frame_size = priv_data_len; + + switch (mpa_frame->rev) { + case IETF_MPA_V2: { + u16 ird_size; + u16 ord_size; + mpa_v2_frame = (struct ietf_mpa_v2 *)buffer; + mpa_hdr_len += IETF_RTR_MSG_SIZE; + cm_node->mpa_frame_size -= IETF_RTR_MSG_SIZE; + rtr_msg = &mpa_v2_frame->rtr_msg; + + /* parse rtr message */ + rtr_msg->ctrl_ird = ntohs(rtr_msg->ctrl_ird); + rtr_msg->ctrl_ord = ntohs(rtr_msg->ctrl_ord); + ird_size = rtr_msg->ctrl_ird & IETF_NO_IRD_ORD; + ord_size = rtr_msg->ctrl_ord & IETF_NO_IRD_ORD; + + if (!(rtr_msg->ctrl_ird & IETF_PEER_TO_PEER)) { + /* send reset */ + return -EINVAL; + } + + if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) { + /* responder */ + if (cm_node->ord_size > ird_size) + cm_node->ord_size = ird_size; + } else { + /* initiator */ + if (cm_node->ord_size > ird_size) + cm_node->ord_size = ird_size; + + if (cm_node->ird_size < ord_size) { + /* no resources available */ + /* send terminate message */ + return -EINVAL; + } + } + + if (rtr_msg->ctrl_ord & IETF_RDMA0_READ) { + cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; + } else if (rtr_msg->ctrl_ord & IETF_RDMA0_WRITE) { + cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO; + } else { /* Not supported RDMA0 operation */ + return -EINVAL; + } + break; + } + case IETF_MPA_V1: + default: + break; + } + /* copy entire MPA frame to our cm_node's frame */ - memcpy(cm_node->mpa_frame_buf, buffer + sizeof(struct ietf_mpa_frame), - cm_node->mpa_frame_size); + memcpy(cm_node->mpa_frame_buf, buffer + mpa_hdr_len, cm_node->mpa_frame_size); if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT) *type = NES_MPA_REQUEST_REJECT; @@ -321,8 +395,8 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, * node info to build. */ static void form_cm_frame(struct sk_buff *skb, - struct nes_cm_node *cm_node, void *options, u32 optionsize, - void *data, u32 datasize, u8 flags) + struct nes_cm_node *cm_node, void *options, u32 optionsize, + void *data, u32 datasize, u8 flags) { struct tcphdr *tcph; struct iphdr *iph; @@ -331,14 +405,14 @@ static void form_cm_frame(struct sk_buff *skb, u16 packetsize = sizeof(*iph); packetsize += sizeof(*tcph); - packetsize += optionsize + datasize; + packetsize += optionsize + datasize; + skb_trim(skb, 0); memset(skb->data, 0x00, ETH_HLEN + sizeof(*iph) + sizeof(*tcph)); - skb->len = 0; buf = skb_put(skb, packetsize + ETH_HLEN); - ethh = (struct ethhdr *) buf; + ethh = (struct ethhdr *)buf; buf += ETH_HLEN; iph = (struct iphdr *)buf; @@ -346,7 +420,7 @@ static void form_cm_frame(struct sk_buff *skb, tcph = (struct tcphdr *)buf; skb_reset_mac_header(skb); skb_set_network_header(skb, ETH_HLEN); - skb_set_transport_header(skb, ETH_HLEN+sizeof(*iph)); + skb_set_transport_header(skb, ETH_HLEN + sizeof(*iph)); buf += sizeof(*tcph); skb->ip_summed = CHECKSUM_PARTIAL; @@ -359,14 +433,14 @@ static void form_cm_frame(struct sk_buff *skb, ethh->h_proto = htons(0x0800); iph->version = IPVERSION; - iph->ihl = 5; /* 5 * 4Byte words, IP headr len */ + iph->ihl = 5; /* 5 * 4Byte words, IP headr len */ iph->tos = 0; iph->tot_len = htons(packetsize); iph->id = htons(++cm_node->tcp_cntxt.loc_id); iph->frag_off = htons(0x4000); iph->ttl = 0x40; - iph->protocol = 0x06; /* IPPROTO_TCP */ + iph->protocol = 0x06; /* IPPROTO_TCP */ iph->saddr = htonl(cm_node->loc_addr); iph->daddr = htonl(cm_node->rem_addr); @@ -379,14 +453,16 @@ static void form_cm_frame(struct sk_buff *skb, cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt; tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num); tcph->ack = 1; - } else + } else { tcph->ack_seq = 0; + } if (flags & SET_SYN) { cm_node->tcp_cntxt.loc_seq_num++; tcph->syn = 1; - } else + } else { cm_node->tcp_cntxt.loc_seq_num += datasize; + } if (flags & SET_FIN) { cm_node->tcp_cntxt.loc_seq_num++; @@ -407,10 +483,8 @@ static void form_cm_frame(struct sk_buff *skb, skb_shinfo(skb)->nr_frags = 0; cm_packets_created++; - } - /** * print_core - dump a cm core */ @@ -422,7 +496,7 @@ static void print_core(struct nes_cm_core *core) return; nes_debug(NES_DBG_CM, "---------------------------------------------\n"); - nes_debug(NES_DBG_CM, "State : %u \n", core->state); + nes_debug(NES_DBG_CM, "State : %u \n", core->state); nes_debug(NES_DBG_CM, "Listen Nodes : %u \n", atomic_read(&core->listen_node_cnt)); nes_debug(NES_DBG_CM, "Active Nodes : %u \n", atomic_read(&core->node_cnt)); @@ -432,6 +506,147 @@ static void print_core(struct nes_cm_core *core) nes_debug(NES_DBG_CM, "-------------- end core ---------------\n"); } +/** + * cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame + */ +static int cm_build_mpa_frame(struct nes_cm_node *cm_node, u8 **start_buff, + u16 *buff_len, u8 *pci_mem, u8 mpa_key) +{ + int ret = 0; + + *start_buff = (pci_mem) ? pci_mem : &cm_node->mpa_frame_buf[0]; + + switch (cm_node->mpa_frame_rev) { + case IETF_MPA_V1: + *start_buff = (u8 *)*start_buff + sizeof(struct ietf_rtr_msg); + *buff_len = sizeof(struct ietf_mpa_v1) + cm_node->mpa_frame_size; + build_mpa_v1(cm_node, *start_buff, mpa_key); + break; + case IETF_MPA_V2: + *buff_len = sizeof(struct ietf_mpa_v2) + cm_node->mpa_frame_size; + build_mpa_v2(cm_node, *start_buff, mpa_key); + break; + default: + ret = -EINVAL; + } + return ret; +} + +/** + * build_mpa_v2 - build a MPA V2 frame + */ +static void build_mpa_v2(struct nes_cm_node *cm_node, + void *start_addr, u8 mpa_key) +{ + struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr; + struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg; + + /* initialize the upper 5 bytes of the frame */ + build_mpa_v1(cm_node, start_addr, mpa_key); + mpa_frame->flags |= IETF_MPA_V2_FLAG; /* set a bit to indicate MPA V2 */ + mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE); + + /* initialize RTR msg */ + rtr_msg->ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ? + IETF_NO_IRD_ORD : cm_node->ird_size; + rtr_msg->ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ? + IETF_NO_IRD_ORD : cm_node->ord_size; + + rtr_msg->ctrl_ird |= IETF_PEER_TO_PEER; + rtr_msg->ctrl_ird |= IETF_FLPDU_ZERO_LEN; + + switch (mpa_key) { + case MPA_KEY_REQUEST: + rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE; + rtr_msg->ctrl_ord |= IETF_RDMA0_READ; + break; + case MPA_KEY_REPLY: + switch (cm_node->send_rdma0_op) { + case SEND_RDMA_WRITE_ZERO: + rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE; + break; + case SEND_RDMA_READ_ZERO: + rtr_msg->ctrl_ord |= IETF_RDMA0_READ; + break; + } + } + rtr_msg->ctrl_ird = htons(rtr_msg->ctrl_ird); + rtr_msg->ctrl_ord = htons(rtr_msg->ctrl_ord); +} + +/** + * build_mpa_v1 - build a MPA V1 frame + */ +static void build_mpa_v1(struct nes_cm_node *cm_node, void *start_addr, u8 mpa_key) +{ + struct ietf_mpa_v1 *mpa_frame = (struct ietf_mpa_v1 *)start_addr; + + switch (mpa_key) { + case MPA_KEY_REQUEST: + memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE); + break; + case MPA_KEY_REPLY: + memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE); + break; + } + mpa_frame->flags = IETF_MPA_FLAGS_CRC; + mpa_frame->rev = cm_node->mpa_frame_rev; + mpa_frame->priv_data_len = htons(cm_node->mpa_frame_size); +} + +static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_addr) +{ + u64 u64temp; + struct nes_qp *nesqp = *nesqp_addr; + struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0]; + + u64temp = (unsigned long)nesqp; + u64temp |= NES_SW_CONTEXT_ALIGN >> 1; + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp); + + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0; + + switch (cm_node->send_rdma0_op) { + case SEND_RDMA_WRITE_ZERO: + nes_debug(NES_DBG_CM, "Sending first write.\n"); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_IWARP_SQ_OP_RDMAW); + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; + break; + + case SEND_RDMA_READ_ZERO: + default: + if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO) { + printk(KERN_ERR "%s[%u]: Unsupported RDMA0 len operation=%u\n", + __func__, __LINE__, cm_node->send_rdma0_op); + WARN_ON(1); + } + nes_debug(NES_DBG_CM, "Sending first rdma operation.\n"); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_IWARP_SQ_OP_RDMAR); + wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX] = 1; + wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_HIGH_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_STAG_IDX] = 1; + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 1; + break; + } + + if (nesqp->sq_kmapped) { + nesqp->sq_kmapped = 0; + kunmap(nesqp->page); + } + + /*use the reserved spot on the WQ for the extra first WQE*/ + nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU | + NES_QPCONTEXT_ORDIRD_ALSMM)); + nesqp->skip_lsmm = 1; + nesqp->hwqp.sq_tail = 0; +} /** * schedule_nes_timer @@ -439,10 +654,10 @@ static void print_core(struct nes_cm_core *core) * rem_ref_cm_node(cm_core, cm_node);add_ref_cm_node(cm_node); */ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, - enum nes_timer_type type, int send_retrans, - int close_when_complete) + enum nes_timer_type type, int send_retrans, + int close_when_complete) { - unsigned long flags; + unsigned long flags; struct nes_cm_core *cm_core = cm_node->cm_core; struct nes_timer_entry *new_send; int ret = 0; @@ -463,7 +678,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, new_send->close_when_complete = close_when_complete; if (type == NES_TIMER_TYPE_CLOSE) { - new_send->timetosend += (HZ/10); + new_send->timetosend += (HZ / 10); if (cm_node->recv_entry) { kfree(new_send); WARN_ON(1); @@ -484,7 +699,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev); if (ret != NETDEV_TX_OK) { nes_debug(NES_DBG_CM, "Error sending packet %p " - "(jiffies = %lu)\n", new_send, jiffies); + "(jiffies = %lu)\n", new_send, jiffies); new_send->timetosend = jiffies; ret = NETDEV_TX_OK; } else { @@ -513,6 +728,7 @@ static void nes_retrans_expired(struct nes_cm_node *cm_node) struct iw_cm_id *cm_id = cm_node->cm_id; enum nes_cm_node_state state = cm_node->state; cm_node->state = NES_CM_STATE_CLOSED; + switch (state) { case NES_CM_STATE_SYN_RCVD: case NES_CM_STATE_CLOSING: @@ -545,10 +761,10 @@ static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node) spin_lock_irqsave(&nesqp->lock, qplockflags); if (nesqp->cm_id) { nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " - "refcount = %d: HIT A " - "NES_TIMER_TYPE_CLOSE with something " - "to do!!!\n", nesqp->hwqp.qp_id, cm_id, - atomic_read(&nesqp->refcount)); + "refcount = %d: HIT A " + "NES_TIMER_TYPE_CLOSE with something " + "to do!!!\n", nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; nesqp->ibqp_state = IB_QPS_ERR; @@ -557,10 +773,10 @@ static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node) } else { spin_unlock_irqrestore(&nesqp->lock, qplockflags); nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " - "refcount = %d: HIT A " - "NES_TIMER_TYPE_CLOSE with nothing " - "to do!!!\n", nesqp->hwqp.qp_id, cm_id, - atomic_read(&nesqp->refcount)); + "refcount = %d: HIT A " + "NES_TIMER_TYPE_CLOSE with nothing " + "to do!!!\n", nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); } } else if (rem_node) { /* TIME_WAIT state */ @@ -589,11 +805,12 @@ static void nes_cm_timer_tick(unsigned long pass) int ret = NETDEV_TX_OK; struct list_head timer_list; + INIT_LIST_HEAD(&timer_list); spin_lock_irqsave(&cm_core->ht_lock, flags); list_for_each_safe(list_node, list_core_temp, - &cm_core->connected_nodes) { + &cm_core->connected_nodes) { cm_node = container_of(list_node, struct nes_cm_node, list); if ((cm_node->recv_entry) || (cm_node->send_entry)) { add_ref_cm_node(cm_node); @@ -604,18 +821,19 @@ static void nes_cm_timer_tick(unsigned long pass) list_for_each_safe(list_node, list_core_temp, &timer_list) { cm_node = container_of(list_node, struct nes_cm_node, - timer_entry); + timer_entry); recv_entry = cm_node->recv_entry; if (recv_entry) { if (time_after(recv_entry->timetosend, jiffies)) { if (nexttimeout > recv_entry->timetosend || - !settimer) { + !settimer) { nexttimeout = recv_entry->timetosend; settimer = 1; } - } else + } else { handle_recv_entry(cm_node, 1); + } } spin_lock_irqsave(&cm_node->retrans_list_lock, flags); @@ -626,8 +844,8 @@ static void nes_cm_timer_tick(unsigned long pass) if (time_after(send_entry->timetosend, jiffies)) { if (cm_node->state != NES_CM_STATE_TSA) { if ((nexttimeout > - send_entry->timetosend) || - !settimer) { + send_entry->timetosend) || + !settimer) { nexttimeout = send_entry->timetosend; settimer = 1; @@ -639,13 +857,13 @@ static void nes_cm_timer_tick(unsigned long pass) } if ((cm_node->state == NES_CM_STATE_TSA) || - (cm_node->state == NES_CM_STATE_CLOSED)) { + (cm_node->state == NES_CM_STATE_CLOSED)) { free_retrans_entry(cm_node); break; } if (!send_entry->retranscount || - !send_entry->retrycount) { + !send_entry->retrycount) { cm_packets_dropped++; free_retrans_entry(cm_node); @@ -654,28 +872,28 @@ static void nes_cm_timer_tick(unsigned long pass) nes_retrans_expired(cm_node); cm_node->state = NES_CM_STATE_CLOSED; spin_lock_irqsave(&cm_node->retrans_list_lock, - flags); + flags); break; } atomic_inc(&send_entry->skb->users); cm_packets_retrans++; nes_debug(NES_DBG_CM, "Retransmitting send_entry %p " - "for node %p, jiffies = %lu, time to send = " - "%lu, retranscount = %u, send_entry->seq_num = " - "0x%08X, cm_node->tcp_cntxt.rem_ack_num = " - "0x%08X\n", send_entry, cm_node, jiffies, - send_entry->timetosend, - send_entry->retranscount, - send_entry->seq_num, - cm_node->tcp_cntxt.rem_ack_num); + "for node %p, jiffies = %lu, time to send = " + "%lu, retranscount = %u, send_entry->seq_num = " + "0x%08X, cm_node->tcp_cntxt.rem_ack_num = " + "0x%08X\n", send_entry, cm_node, jiffies, + send_entry->timetosend, + send_entry->retranscount, + send_entry->seq_num, + cm_node->tcp_cntxt.rem_ack_num); spin_unlock_irqrestore(&cm_node->retrans_list_lock, - flags); + flags); ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev); spin_lock_irqsave(&cm_node->retrans_list_lock, flags); if (ret != NETDEV_TX_OK) { nes_debug(NES_DBG_CM, "rexmit failed for " - "node=%p\n", cm_node); + "node=%p\n", cm_node); cm_packets_bounced++; send_entry->retrycount--; nexttimeout = jiffies + NES_SHORT_TIME; @@ -685,18 +903,18 @@ static void nes_cm_timer_tick(unsigned long pass) cm_packets_sent++; } nes_debug(NES_DBG_CM, "Packet Sent: retrans count = " - "%u, retry count = %u.\n", - send_entry->retranscount, - send_entry->retrycount); + "%u, retry count = %u.\n", + send_entry->retranscount, + send_entry->retrycount); if (send_entry->send_retrans) { send_entry->retranscount--; timetosend = (NES_RETRY_TIMEOUT << - (NES_DEFAULT_RETRANS - send_entry->retranscount)); + (NES_DEFAULT_RETRANS - send_entry->retranscount)); send_entry->timetosend = jiffies + - min(timetosend, NES_MAX_TIMEOUT); + min(timetosend, NES_MAX_TIMEOUT); if (nexttimeout > send_entry->timetosend || - !settimer) { + !settimer) { nexttimeout = send_entry->timetosend; settimer = 1; } @@ -705,11 +923,11 @@ static void nes_cm_timer_tick(unsigned long pass) close_when_complete = send_entry->close_when_complete; nes_debug(NES_DBG_CM, "cm_node=%p state=%d\n", - cm_node, cm_node->state); + cm_node, cm_node->state); free_retrans_entry(cm_node); if (close_when_complete) rem_ref_cm_node(cm_node->cm_core, - cm_node); + cm_node); } } while (0); @@ -719,7 +937,7 @@ static void nes_cm_timer_tick(unsigned long pass) if (settimer) { if (!timer_pending(&cm_core->tcp_timer)) { - cm_core->tcp_timer.expires = nexttimeout; + cm_core->tcp_timer.expires = nexttimeout; add_timer(&cm_core->tcp_timer); } } @@ -730,13 +948,13 @@ static void nes_cm_timer_tick(unsigned long pass) * send_syn */ static int send_syn(struct nes_cm_node *cm_node, u32 sendack, - struct sk_buff *skb) + struct sk_buff *skb) { int ret; int flags = SET_SYN; char optionsbuffer[sizeof(struct option_mss) + - sizeof(struct option_windowscale) + sizeof(struct option_base) + - TCP_OPTIONS_PADDING]; + sizeof(struct option_windowscale) + sizeof(struct option_base) + + TCP_OPTIONS_PADDING]; int optionssize = 0; /* Sending MSS option */ @@ -863,7 +1081,7 @@ static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb) * find_node - find a cm node that matches the reference cm node */ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, - u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr) + u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr) { unsigned long flags; struct list_head *hte; @@ -877,12 +1095,12 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, list_for_each_entry(cm_node, hte, list) { /* compare quad, return node handle if a match */ nes_debug(NES_DBG_CM, "finding node %x:%x =? %x:%x ^ %x:%x =? %x:%x\n", - cm_node->loc_addr, cm_node->loc_port, - loc_addr, loc_port, - cm_node->rem_addr, cm_node->rem_port, - rem_addr, rem_port); + cm_node->loc_addr, cm_node->loc_port, + loc_addr, loc_port, + cm_node->rem_addr, cm_node->rem_port, + rem_addr, rem_port); if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) && - (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) { + (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) { add_ref_cm_node(cm_node); spin_unlock_irqrestore(&cm_core->ht_lock, flags); return cm_node; @@ -899,7 +1117,7 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, * find_listener - find a cm node listening on this addr-port pair */ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, - nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state) + nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state) { unsigned long flags; struct nes_cm_listener *listen_node; @@ -909,9 +1127,9 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, list_for_each_entry(listen_node, &cm_core->listen_list.list, list) { /* compare node pair, return node handle if a match */ if (((listen_node->loc_addr == dst_addr) || - listen_node->loc_addr == 0x00000000) && - (listen_node->loc_port == dst_port) && - (listener_state & listen_node->listener_state)) { + listen_node->loc_addr == 0x00000000) && + (listen_node->loc_port == dst_port) && + (listener_state & listen_node->listener_state)) { atomic_inc(&listen_node->ref_count); spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); return listen_node; @@ -936,7 +1154,7 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node return -EINVAL; nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n", - cm_node); + cm_node); spin_lock_irqsave(&cm_core->ht_lock, flags); @@ -955,7 +1173,7 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node * mini_cm_dec_refcnt_listen */ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, - struct nes_cm_listener *listener, int free_hanging_nodes) + struct nes_cm_listener *listener, int free_hanging_nodes) { int ret = -EINVAL; int err = 0; @@ -966,8 +1184,8 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, struct list_head reset_list; nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, " - "refcnt=%d\n", listener, free_hanging_nodes, - atomic_read(&listener->ref_count)); + "refcnt=%d\n", listener, free_hanging_nodes, + atomic_read(&listener->ref_count)); /* free non-accelerated child nodes for this listener */ INIT_LIST_HEAD(&reset_list); if (free_hanging_nodes) { @@ -975,7 +1193,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, list_for_each_safe(list_pos, list_temp, &g_cm_core->connected_nodes) { cm_node = container_of(list_pos, struct nes_cm_node, - list); + list); if ((cm_node->listener == listener) && (!cm_node->accelerated)) { add_ref_cm_node(cm_node); @@ -987,7 +1205,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, list_for_each_safe(list_pos, list_temp, &reset_list) { cm_node = container_of(list_pos, struct nes_cm_node, - reset_entry); + reset_entry); { struct nes_cm_node *loopback = cm_node->loopbackpartner; enum nes_cm_node_state old_state; @@ -999,7 +1217,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, err = send_reset(cm_node, NULL); if (err) { cm_node->state = - NES_CM_STATE_CLOSED; + NES_CM_STATE_CLOSED; WARN_ON(1); } else { old_state = cm_node->state; @@ -1044,10 +1262,9 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); - if (listener->nesvnic) { + if (listener->nesvnic) nes_manage_apbvt(listener->nesvnic, listener->loc_port, - PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); - } + PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener); @@ -1061,8 +1278,8 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, if (listener) { if (atomic_read(&listener->pend_accepts_cnt) > 0) nes_debug(NES_DBG_CM, "destroying listener (%p)" - " with non-zero pending accepts=%u\n", - listener, atomic_read(&listener->pend_accepts_cnt)); + " with non-zero pending accepts=%u\n", + listener, atomic_read(&listener->pend_accepts_cnt)); } return ret; @@ -1073,7 +1290,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, * mini_cm_del_listen */ static int mini_cm_del_listen(struct nes_cm_core *cm_core, - struct nes_cm_listener *listener) + struct nes_cm_listener *listener) { listener->listener_state = NES_CM_LISTENER_PASSIVE_STATE; listener->cm_id = NULL; /* going to be destroyed pretty soon */ @@ -1085,9 +1302,10 @@ static int mini_cm_del_listen(struct nes_cm_core *cm_core, * mini_cm_accelerated */ static inline int mini_cm_accelerated(struct nes_cm_core *cm_core, - struct nes_cm_node *cm_node) + struct nes_cm_node *cm_node) { u32 was_timer_set; + cm_node->accelerated = 1; if (cm_node->accept_pend) { @@ -1121,7 +1339,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi rt = ip_route_output(&init_net, htonl(dst_ip), 0, 0, 0); if (IS_ERR(rt)) { printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n", - __func__, dst_ip); + __func__, dst_ip); return rc; } @@ -1139,7 +1357,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi if (arpindex >= 0) { if (!memcmp(nesadapter->arp_table[arpindex].mac_addr, - neigh->ha, ETH_ALEN)){ + neigh->ha, ETH_ALEN)) { /* Mac address same as in nes_arp_table */ neigh_release(neigh); ip_rt_put(rt); @@ -1147,8 +1365,8 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi } nes_manage_arp_cache(nesvnic->netdev, - nesadapter->arp_table[arpindex].mac_addr, - dst_ip, NES_ARP_DELETE); + nesadapter->arp_table[arpindex].mac_addr, + dst_ip, NES_ARP_DELETE); } nes_manage_arp_cache(nesvnic->netdev, neigh->ha, @@ -1170,8 +1388,8 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi * make_cm_node - create a new instance of a cm node */ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, struct nes_cm_info *cm_info, - struct nes_cm_listener *listener) + struct nes_vnic *nesvnic, struct nes_cm_info *cm_info, + struct nes_cm_listener *listener) { struct nes_cm_node *cm_node; struct timespec ts; @@ -1190,7 +1408,12 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->rem_addr = cm_info->rem_addr; cm_node->loc_port = cm_info->loc_port; cm_node->rem_port = cm_info->rem_port; - cm_node->send_write0 = send_first; + + cm_node->mpa_frame_rev = mpa_version; + cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; + cm_node->ird_size = IETF_NO_IRD_ORD; + cm_node->ord_size = IETF_NO_IRD_ORD; + nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n", &cm_node->loc_addr, cm_node->loc_port, &cm_node->rem_addr, cm_node->rem_port); @@ -1200,7 +1423,7 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN); nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", cm_node->listener, - cm_node->cm_id); + cm_node->cm_id); spin_lock_init(&cm_node->retrans_list_lock); @@ -1211,11 +1434,11 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->tcp_cntxt.loc_id = NES_CM_DEF_LOCAL_ID; cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >> - NES_CM_DEFAULT_RCV_WND_SCALE; + NES_CM_DEFAULT_RCV_WND_SCALE; ts = current_kernel_time(); cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec); cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) - - sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN; + sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN; cm_node->tcp_cntxt.rcv_nxt = 0; /* get a unique session ID , add thread_id to an upcounter to handle race */ atomic_inc(&cm_core->node_cnt); @@ -1231,12 +1454,11 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->loopbackpartner = NULL; /* get the mac addr for the remote node */ - if (ipv4_is_loopback(htonl(cm_node->rem_addr))) + if (ipv4_is_loopback(htonl(cm_node->rem_addr))) { arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE); - else { + } else { oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE); arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex); - } if (arpindex < 0) { kfree(cm_node); @@ -1269,7 +1491,7 @@ static int add_ref_cm_node(struct nes_cm_node *cm_node) * rem_ref_cm_node - destroy an instance of a cm node */ static int rem_ref_cm_node(struct nes_cm_core *cm_core, - struct nes_cm_node *cm_node) + struct nes_cm_node *cm_node) { unsigned long flags; struct nes_qp *nesqp; @@ -1300,9 +1522,9 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core, } else { if (cm_node->apbvt_set && cm_node->nesvnic) { nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port, - PCI_FUNC( - cm_node->nesvnic->nesdev->pcidev->devfn), - NES_MANAGE_APBVT_DEL); + PCI_FUNC( + cm_node->nesvnic->nesdev->pcidev->devfn), + NES_MANAGE_APBVT_DEL); } } @@ -1323,7 +1545,7 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core, * process_options */ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, - u32 optionsize, u32 syn_packet) + u32 optionsize, u32 syn_packet) { u32 tmp; u32 offset = 0; @@ -1341,15 +1563,15 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, continue; case OPTION_NUMBER_MSS: nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d " - "Size: %d\n", __func__, - all_options->as_mss.length, offset, optionsize); + "Size: %d\n", __func__, + all_options->as_mss.length, offset, optionsize); got_mss_option = 1; if (all_options->as_mss.length != 4) { return 1; } else { tmp = ntohs(all_options->as_mss.mss); if (tmp > 0 && tmp < - cm_node->tcp_cntxt.mss) + cm_node->tcp_cntxt.mss) cm_node->tcp_cntxt.mss = tmp; } break; @@ -1357,12 +1579,9 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, cm_node->tcp_cntxt.snd_wscale = all_options->as_windowscale.shiftcount; break; - case OPTION_NUMBER_WRITE0: - cm_node->send_write0 = 1; - break; default: nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n", - all_options->as_base.optionnum); + all_options->as_base.optionnum); break; } offset += all_options->as_base.length; @@ -1381,8 +1600,8 @@ static void drop_packet(struct sk_buff *skb) static void handle_fin_pkt(struct nes_cm_node *cm_node) { nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. " - "refcnt=%d\n", cm_node, cm_node->state, - atomic_read(&cm_node->ref_count)); + "refcnt=%d\n", cm_node, cm_node->state, + atomic_read(&cm_node->ref_count)); switch (cm_node->state) { case NES_CM_STATE_SYN_RCVD: case NES_CM_STATE_SYN_SENT: @@ -1448,7 +1667,20 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " "listener=%p state=%d\n", __func__, __LINE__, cm_node, cm_node->listener, cm_node->state); - active_open_err(cm_node, skb, reset); + switch (cm_node->mpa_frame_rev) { + case IETF_MPA_V2: + cm_node->mpa_frame_rev = IETF_MPA_V1; + /* send a syn and goto syn sent state */ + cm_node->state = NES_CM_STATE_SYN_SENT; + if (send_syn(cm_node, 0, NULL)) { + active_open_err(cm_node, skb, reset); + } + break; + case IETF_MPA_V1: + default: + active_open_err(cm_node, skb, reset); + break; + } break; case NES_CM_STATE_MPAREQ_RCVD: atomic_inc(&cm_node->passive_state); @@ -1484,21 +1716,21 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb) { - - int ret = 0; + int ret = 0; int datasize = skb->len; u8 *dataloc = skb->data; enum nes_cm_event_type type = NES_CM_EVENT_UNKNOWN; - u32 res_type; + u32 res_type; + ret = parse_mpa(cm_node, dataloc, &res_type, datasize); if (ret) { nes_debug(NES_DBG_CM, "didn't like MPA Request\n"); if (cm_node->state == NES_CM_STATE_MPAREQ_SENT) { nes_debug(NES_DBG_CM, "%s[%u] create abort for " - "cm_node=%p listener=%p state=%d\n", __func__, - __LINE__, cm_node, cm_node->listener, - cm_node->state); + "cm_node=%p listener=%p state=%d\n", __func__, + __LINE__, cm_node, cm_node->listener, + cm_node->state); active_open_err(cm_node, skb, 1); } else { passive_open_err(cm_node, skb, 1); @@ -1508,16 +1740,15 @@ static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb) switch (cm_node->state) { case NES_CM_STATE_ESTABLISHED: - if (res_type == NES_MPA_REQUEST_REJECT) { + if (res_type == NES_MPA_REQUEST_REJECT) /*BIG problem as we are receiving the MPA.. So should - * not be REJECT.. This is Passive Open.. We can - * only receive it Reject for Active Open...*/ + * not be REJECT.. This is Passive Open.. We can + * only receive it Reject for Active Open...*/ WARN_ON(1); - } cm_node->state = NES_CM_STATE_MPAREQ_RCVD; type = NES_CM_EVENT_MPA_REQ; atomic_set(&cm_node->passive_state, - NES_PASSIVE_STATE_INDICATED); + NES_PASSIVE_STATE_INDICATED); break; case NES_CM_STATE_MPAREQ_SENT: cleanup_retrans_entry(cm_node); @@ -1544,8 +1775,8 @@ static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb) case NES_CM_STATE_SYN_SENT: case NES_CM_STATE_MPAREQ_SENT: nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " - "listener=%p state=%d\n", __func__, __LINE__, cm_node, - cm_node->listener, cm_node->state); + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); active_open_err(cm_node, skb, 1); break; case NES_CM_STATE_ESTABLISHED: @@ -1559,11 +1790,11 @@ static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb) } static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph, - struct sk_buff *skb) + struct sk_buff *skb) { int err; - err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num))? 0 : 1; + err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num)) ? 0 : 1; if (err) active_open_err(cm_node, skb, 1); @@ -1571,7 +1802,7 @@ static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph, } static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph, - struct sk_buff *skb) + struct sk_buff *skb) { int err = 0; u32 seq; @@ -1579,21 +1810,22 @@ static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph, u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num; u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt; u32 rcv_wnd; + seq = ntohl(tcph->seq); ack_seq = ntohl(tcph->ack_seq); rcv_wnd = cm_node->tcp_cntxt.rcv_wnd; if (ack_seq != loc_seq_num) err = 1; - else if (!between(seq, rcv_nxt, (rcv_nxt+rcv_wnd))) + else if (!between(seq, rcv_nxt, (rcv_nxt + rcv_wnd))) err = 1; if (err) { nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " - "listener=%p state=%d\n", __func__, __LINE__, cm_node, - cm_node->listener, cm_node->state); + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); indicate_pkt_err(cm_node, skb); nes_debug(NES_DBG_CM, "seq ERROR cm_node =%p seq=0x%08X " - "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt, - rcv_wnd); + "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt, + rcv_wnd); } return err; } @@ -1603,9 +1835,8 @@ static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph, * is created with a listener or it may comein as rexmitted packet which in * that case will be just dropped. */ - static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, - struct tcphdr *tcph) + struct tcphdr *tcph) { int ret; u32 inc_sequence; @@ -1624,15 +1855,15 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, case NES_CM_STATE_LISTENING: /* Passive OPEN */ if (atomic_read(&cm_node->listener->pend_accepts_cnt) > - cm_node->listener->backlog) { + cm_node->listener->backlog) { nes_debug(NES_DBG_CM, "drop syn due to backlog " - "pressure \n"); + "pressure \n"); cm_backlog_drops++; passive_open_err(cm_node, skb, 0); break; } ret = handle_tcp_options(cm_node, tcph, skb, optionsize, - 1); + 1); if (ret) { passive_open_err(cm_node, skb, 0); /* drop pkt */ @@ -1666,9 +1897,8 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, } static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, - struct tcphdr *tcph) + struct tcphdr *tcph) { - int ret; u32 inc_sequence; int optionsize; @@ -1687,7 +1917,7 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 0); if (ret) { nes_debug(NES_DBG_CM, "cm_node=%p tcp_options failed\n", - cm_node); + cm_node); break; } cleanup_retrans_entry(cm_node); @@ -1726,12 +1956,13 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, } static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, - struct tcphdr *tcph) + struct tcphdr *tcph) { int datasize = 0; u32 inc_sequence; int ret = 0; int optionsize; + optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); if (check_seq(cm_node, tcph, skb)) @@ -1752,8 +1983,9 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; handle_rcv_mpa(cm_node, skb); - } else /* rcvd ACK only */ + } else { /* rcvd ACK only */ dev_kfree_skb_any(skb); + } break; case NES_CM_STATE_ESTABLISHED: /* Passive OPEN */ @@ -1761,16 +1993,18 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; handle_rcv_mpa(cm_node, skb); - } else + } else { drop_packet(skb); + } break; case NES_CM_STATE_MPAREQ_SENT: cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; handle_rcv_mpa(cm_node, skb); - } else /* Could be just an ack pkt.. */ + } else { /* Could be just an ack pkt.. */ dev_kfree_skb_any(skb); + } break; case NES_CM_STATE_LISTENING: cleanup_retrans_entry(cm_node); @@ -1811,14 +2045,15 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, - struct sk_buff *skb, int optionsize, int passive) + struct sk_buff *skb, int optionsize, int passive) { u8 *optionsloc = (u8 *)&tcph[1]; + if (optionsize) { if (process_options(cm_node, optionsloc, optionsize, - (u32)tcph->syn)) { + (u32)tcph->syn)) { nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", - __func__, cm_node); + __func__, cm_node); if (passive) passive_open_err(cm_node, skb, 1); else @@ -1828,7 +2063,7 @@ static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, } cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) << - cm_node->tcp_cntxt.snd_wscale; + cm_node->tcp_cntxt.snd_wscale; if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd; @@ -1839,18 +2074,18 @@ static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, * active_open_err() will send reset() if flag set.. * It will also send ABORT event. */ - static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, - int reset) + int reset) { cleanup_retrans_entry(cm_node); if (reset) { nes_debug(NES_DBG_CM, "ERROR active err called for cm_node=%p, " - "state=%d\n", cm_node, cm_node->state); + "state=%d\n", cm_node, cm_node->state); add_ref_cm_node(cm_node); send_reset(cm_node, skb); - } else + } else { dev_kfree_skb_any(skb); + } cm_node->state = NES_CM_STATE_CLOSED; create_event(cm_node, NES_CM_EVENT_ABORTED); @@ -1860,15 +2095,14 @@ static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, * passive_open_err() will either do a reset() or will free up the skb and * remove the cm_node. */ - static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, - int reset) + int reset) { cleanup_retrans_entry(cm_node); cm_node->state = NES_CM_STATE_CLOSED; if (reset) { nes_debug(NES_DBG_CM, "passive_open_err sending RST for " - "cm_node=%p state =%d\n", cm_node, cm_node->state); + "cm_node=%p state =%d\n", cm_node, cm_node->state); send_reset(cm_node, skb); } else { dev_kfree_skb_any(skb); @@ -1883,6 +2117,7 @@ static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, static void free_retrans_entry(struct nes_cm_node *cm_node) { struct nes_timer_entry *send_entry; + send_entry = cm_node->send_entry; if (send_entry) { cm_node->send_entry = NULL; @@ -1906,26 +2141,28 @@ static void cleanup_retrans_entry(struct nes_cm_node *cm_node) * Returns skb if to be freed, else it will return NULL if already used.. */ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, - struct nes_cm_core *cm_core) + struct nes_cm_core *cm_core) { - enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN; + enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN; struct tcphdr *tcph = tcp_hdr(skb); - u32 fin_set = 0; + u32 fin_set = 0; int ret = 0; + skb_pull(skb, ip_hdr(skb)->ihl << 2); nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d " - "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn, - tcph->ack, tcph->rst, tcph->fin); + "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn, + tcph->ack, tcph->rst, tcph->fin); - if (tcph->rst) + if (tcph->rst) { pkt_type = NES_PKT_TYPE_RST; - else if (tcph->syn) { + } else if (tcph->syn) { pkt_type = NES_PKT_TYPE_SYN; if (tcph->ack) pkt_type = NES_PKT_TYPE_SYNACK; - } else if (tcph->ack) + } else if (tcph->ack) { pkt_type = NES_PKT_TYPE_ACK; + } if (tcph->fin) fin_set = 1; @@ -1956,17 +2193,17 @@ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, * mini_cm_listen - create a listen node with params */ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) + struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) { struct nes_cm_listener *listener; unsigned long flags; nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n", - cm_info->loc_addr, cm_info->loc_port); + cm_info->loc_addr, cm_info->loc_port); /* cannot have multiple matching listeners */ listener = find_listener(cm_core, htonl(cm_info->loc_addr), - htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE); + htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE); if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) { /* find automatically incs ref count ??? */ atomic_dec(&listener->ref_count); @@ -2012,9 +2249,9 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, } nes_debug(NES_DBG_CM, "Api - listen(): addr=0x%08X, port=0x%04x," - " listener = %p, backlog = %d, cm_id = %p.\n", - cm_info->loc_addr, cm_info->loc_port, - listener, listener->backlog, listener->cm_id); + " listener = %p, backlog = %d, cm_id = %p.\n", + cm_info->loc_addr, cm_info->loc_port, + listener, listener->backlog, listener->cm_id); return listener; } @@ -2024,26 +2261,20 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, * mini_cm_connect - make a connection node with params */ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, u16 private_data_len, - void *private_data, struct nes_cm_info *cm_info) + struct nes_vnic *nesvnic, u16 private_data_len, + void *private_data, struct nes_cm_info *cm_info) { int ret = 0; struct nes_cm_node *cm_node; struct nes_cm_listener *loopbackremotelistener; struct nes_cm_node *loopbackremotenode; struct nes_cm_info loopback_cm_info; - u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + private_data_len; - struct ietf_mpa_frame *mpa_frame = NULL; + u8 *start_buff; /* create a CM connection node */ cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL); if (!cm_node) return NULL; - mpa_frame = &cm_node->mpa_frame; - memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE); - mpa_frame->flags = IETF_MPA_FLAGS_CRC; - mpa_frame->rev = IETF_MPA_VERSION; - mpa_frame->priv_data_len = htons(private_data_len); /* set our node side to client (active) side */ cm_node->tcp_cntxt.client = 1; @@ -2051,8 +2282,8 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, if (cm_info->loc_addr == cm_info->rem_addr) { loopbackremotelistener = find_listener(cm_core, - ntohl(nesvnic->local_ipaddr), cm_node->rem_port, - NES_CM_LISTENER_ACTIVE_STATE); + ntohl(nesvnic->local_ipaddr), cm_node->rem_port, + NES_CM_LISTENER_ACTIVE_STATE); if (loopbackremotelistener == NULL) { create_event(cm_node, NES_CM_EVENT_ABORTED); } else { @@ -2061,7 +2292,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, loopback_cm_info.rem_port = cm_info->loc_port; loopback_cm_info.cm_id = loopbackremotelistener->cm_id; loopbackremotenode = make_cm_node(cm_core, nesvnic, - &loopback_cm_info, loopbackremotelistener); + &loopback_cm_info, loopbackremotelistener); if (!loopbackremotenode) { rem_ref_cm_node(cm_node->cm_core, cm_node); return NULL; @@ -2072,7 +2303,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, NES_CM_DEFAULT_RCV_WND_SCALE; cm_node->loopbackpartner = loopbackremotenode; memcpy(loopbackremotenode->mpa_frame_buf, private_data, - private_data_len); + private_data_len); loopbackremotenode->mpa_frame_size = private_data_len; /* we are done handling this state. */ @@ -2100,12 +2331,10 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, return cm_node; } - /* set our node side to client (active) side */ - cm_node->tcp_cntxt.client = 1; - /* init our MPA frame ptr */ - memcpy(mpa_frame->priv_data, private_data, private_data_len); + start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2); + cm_node->mpa_frame_size = private_data_len; - cm_node->mpa_frame_size = mpa_frame_size; + memcpy(start_buff, private_data, private_data_len); /* send a syn and goto syn sent state */ cm_node->state = NES_CM_STATE_SYN_SENT; @@ -2114,18 +2343,19 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, if (ret) { /* error in sending the syn free up the cm_node struct */ nes_debug(NES_DBG_CM, "Api - connect() FAILED: dest " - "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n", - cm_node->rem_addr, cm_node->rem_port, cm_node, - cm_node->cm_id); + "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n", + cm_node->rem_addr, cm_node->rem_port, cm_node, + cm_node->cm_id); rem_ref_cm_node(cm_node->cm_core, cm_node); cm_node = NULL; } - if (cm_node) + if (cm_node) { nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X," - "port=0x%04x, cm_node=%p, cm_id = %p.\n", - cm_node->rem_addr, cm_node->rem_port, cm_node, - cm_node->cm_id); + "port=0x%04x, cm_node=%p, cm_id = %p.\n", + cm_node->rem_addr, cm_node->rem_port, cm_node, + cm_node->cm_id); + } return cm_node; } @@ -2135,8 +2365,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, * mini_cm_accept - accept a connection * This function is never called */ -static int mini_cm_accept(struct nes_cm_core *cm_core, - struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node) +static int mini_cm_accept(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) { return 0; } @@ -2145,8 +2374,7 @@ static int mini_cm_accept(struct nes_cm_core *cm_core, /** * mini_cm_reject - reject and teardown a connection */ -static int mini_cm_reject(struct nes_cm_core *cm_core, - struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node) +static int mini_cm_reject(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) { int ret = 0; int err = 0; @@ -2156,7 +2384,7 @@ static int mini_cm_reject(struct nes_cm_core *cm_core, struct nes_cm_node *loopback = cm_node->loopbackpartner; nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n", - __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state); + __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state); if (cm_node->tcp_cntxt.client) return ret; @@ -2177,8 +2405,9 @@ static int mini_cm_reject(struct nes_cm_core *cm_core, err = send_reset(cm_node, NULL); if (err) WARN_ON(1); - } else + } else { cm_id->add_ref(cm_id); + } } } } else { @@ -2253,7 +2482,7 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod case NES_CM_STATE_TSA: if (cm_node->send_entry) printk(KERN_ERR "ERROR Close got called from STATE_TSA " - "send_entry=%p\n", cm_node->send_entry); + "send_entry=%p\n", cm_node->send_entry); ret = rem_ref_cm_node(cm_core, cm_node); break; } @@ -2266,7 +2495,7 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod * node state machine */ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, struct sk_buff *skb) + struct nes_vnic *nesvnic, struct sk_buff *skb) { struct nes_cm_node *cm_node = NULL; struct nes_cm_listener *listener = NULL; @@ -2278,9 +2507,8 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, if (!skb) return 0; - if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) { + if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) return 0; - } iph = (struct iphdr *)skb->data; tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr)); @@ -2298,8 +2526,8 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, do { cm_node = find_node(cm_core, - nfo.rem_port, nfo.rem_addr, - nfo.loc_port, nfo.loc_addr); + nfo.rem_port, nfo.rem_addr, + nfo.loc_port, nfo.loc_addr); if (!cm_node) { /* Only type of packet accepted are for */ @@ -2309,8 +2537,8 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, break; } listener = find_listener(cm_core, nfo.loc_addr, - nfo.loc_port, - NES_CM_LISTENER_ACTIVE_STATE); + nfo.loc_port, + NES_CM_LISTENER_ACTIVE_STATE); if (!listener) { nfo.cm_id = NULL; nfo.conn_type = 0; @@ -2321,10 +2549,10 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, nfo.cm_id = listener->cm_id; nfo.conn_type = listener->conn_type; cm_node = make_cm_node(cm_core, nesvnic, &nfo, - listener); + listener); if (!cm_node) { nes_debug(NES_DBG_CM, "Unable to allocate " - "node\n"); + "node\n"); cm_packets_dropped++; atomic_dec(&listener->ref_count); dev_kfree_skb_any(skb); @@ -2376,7 +2604,7 @@ static struct nes_cm_core *nes_cm_alloc_core(void) init_timer(&cm_core->tcp_timer); cm_core->tcp_timer.function = nes_cm_timer_tick; - cm_core->mtu = NES_CM_DEFAULT_MTU; + cm_core->mtu = NES_CM_DEFAULT_MTU; cm_core->state = NES_CM_STATE_INITED; cm_core->free_tx_pkt_max = NES_CM_DEFAULT_FREE_PKTS; @@ -2414,9 +2642,8 @@ static int mini_cm_dealloc_core(struct nes_cm_core *cm_core) barrier(); - if (timer_pending(&cm_core->tcp_timer)) { + if (timer_pending(&cm_core->tcp_timer)) del_timer(&cm_core->tcp_timer); - } destroy_workqueue(cm_core->event_wq); destroy_workqueue(cm_core->disconn_wq); @@ -2471,8 +2698,8 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod return -EINVAL; nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_IPV4 | - NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG | - NES_QPCONTEXT_MISC_DROS); + NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG | + NES_QPCONTEXT_MISC_DROS); if (cm_node->tcp_cntxt.snd_wscale || cm_node->tcp_cntxt.rcv_wscale) nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WSCALE); @@ -2482,15 +2709,15 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16); nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32( - (u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT); + (u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT); nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32( - (cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) & - NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK); + (cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) & + NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK); nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32( - (cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) & - NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK); + (cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) & + NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK); nesqp->nesqp_context->keepalive = cpu_to_le32(0x80); nesqp->nesqp_context->ts_recent = 0; @@ -2499,24 +2726,24 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod nesqp->nesqp_context->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd); nesqp->nesqp_context->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt); nesqp->nesqp_context->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd << - cm_node->tcp_cntxt.rcv_wscale); + cm_node->tcp_cntxt.rcv_wscale); nesqp->nesqp_context->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); nesqp->nesqp_context->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); nesqp->nesqp_context->srtt = 0; nesqp->nesqp_context->rttvar = cpu_to_le32(0x6); nesqp->nesqp_context->ssthresh = cpu_to_le32(0x3FFFC000); - nesqp->nesqp_context->cwnd = cpu_to_le32(2*cm_node->tcp_cntxt.mss); + nesqp->nesqp_context->cwnd = cpu_to_le32(2 * cm_node->tcp_cntxt.mss); nesqp->nesqp_context->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt); nesqp->nesqp_context->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); nesqp->nesqp_context->max_snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd); nes_debug(NES_DBG_CM, "QP%u: rcv_nxt = 0x%08X, snd_nxt = 0x%08X," - " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n", - nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt), - le32_to_cpu(nesqp->nesqp_context->snd_nxt), - cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale), - le32_to_cpu(nesqp->nesqp_context->rcv_wnd), - le32_to_cpu(nesqp->nesqp_context->misc)); + " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n", + nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt), + le32_to_cpu(nesqp->nesqp_context->snd_nxt), + cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale), + le32_to_cpu(nesqp->nesqp_context->rcv_wnd), + le32_to_cpu(nesqp->nesqp_context->misc)); nes_debug(NES_DBG_CM, " snd_wnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->snd_wnd)); nes_debug(NES_DBG_CM, " snd_cwnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->cwnd)); nes_debug(NES_DBG_CM, " max_swnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->max_snd_wnd)); @@ -2537,7 +2764,7 @@ int nes_cm_disconn(struct nes_qp *nesqp) work = kzalloc(sizeof *work, GFP_ATOMIC); if (!work) - return -ENOMEM; /* Timer will clean up */ + return -ENOMEM; /* Timer will clean up */ nes_add_ref(&nesqp->ibqp); work->nesqp = nesqp; @@ -2557,7 +2784,7 @@ static void nes_disconnect_worker(struct work_struct *work) kfree(dwork); nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n", - nesqp->last_aeq, nesqp->hwqp.qp_id); + nesqp->last_aeq, nesqp->hwqp.qp_id); nes_cm_disconn_true(nesqp); nes_rem_ref(&nesqp->ibqp); } @@ -2593,7 +2820,7 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) /* make sure we havent already closed this connection */ if (!cm_id) { nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n", - nesqp->hwqp.qp_id); + nesqp->hwqp.qp_id); spin_unlock_irqrestore(&nesqp->lock, flags); return -1; } @@ -2602,7 +2829,7 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) nes_debug(NES_DBG_CM, "Disconnecting QP%u\n", nesqp->hwqp.qp_id); original_hw_tcp_state = nesqp->hw_tcp_state; - original_ibqp_state = nesqp->ibqp_state; + original_ibqp_state = nesqp->ibqp_state; last_ae = nesqp->last_aeq; if (nesqp->term_flags) { @@ -2660,16 +2887,16 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) cm_event.private_data_len = 0; nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event" - " for QP%u, SQ Head = %u, SQ Tail = %u. " - "cm_id = %p, refcount = %u.\n", - nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, - nesqp->hwqp.sq_tail, cm_id, - atomic_read(&nesqp->refcount)); + " for QP%u, SQ Head = %u, SQ Tail = %u. " + "cm_id = %p, refcount = %u.\n", + nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, + nesqp->hwqp.sq_tail, cm_id, + atomic_read(&nesqp->refcount)); ret = cm_id->event_handler(cm_id, &cm_event); if (ret) nes_debug(NES_DBG_CM, "OFA CM event_handler " - "returned, ret=%d\n", ret); + "returned, ret=%d\n", ret); } if (issue_close) { @@ -2687,9 +2914,8 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) cm_event.private_data_len = 0; ret = cm_id->event_handler(cm_id, &cm_event); - if (ret) { + if (ret) nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); - } cm_id->rem_ref(cm_id); } @@ -2729,8 +2955,8 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt) if (nesqp->lsmm_mr) nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr); pci_free_consistent(nesdev->pcidev, - nesqp->private_data_len+sizeof(struct ietf_mpa_frame), - nesqp->ietf_frame, nesqp->ietf_frame_pbase); + nesqp->private_data_len + nesqp->ietf_frame_size, + nesqp->ietf_frame, nesqp->ietf_frame_pbase); } } @@ -2769,6 +2995,12 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct ib_phys_buf ibphysbuf; struct nes_pd *nespd; u64 tagged_offset; + u8 mpa_frame_offset = 0; + struct ietf_mpa_v2 *mpa_v2_frame; + u8 start_addr = 0; + u8 *start_ptr = &start_addr; + u8 **start_buff = &start_ptr; + u16 buff_len = 0; ibqp = nes_get_qp(cm_id->device, conn_param->qpn); if (!ibqp) @@ -2809,53 +3041,49 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n", netdev_refcnt_read(nesvnic->netdev)); + nesqp->ietf_frame_size = sizeof(struct ietf_mpa_v2); /* allocate the ietf frame and space for private data */ nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev, - sizeof(struct ietf_mpa_frame) + conn_param->private_data_len, - &nesqp->ietf_frame_pbase); + nesqp->ietf_frame_size + conn_param->private_data_len, + &nesqp->ietf_frame_pbase); if (!nesqp->ietf_frame) { - nes_debug(NES_DBG_CM, "Unable to allocate memory for private " - "data\n"); + nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n"); return -ENOMEM; } + mpa_v2_frame = (struct ietf_mpa_v2 *)nesqp->ietf_frame; + if (cm_node->mpa_frame_rev == IETF_MPA_V1) + mpa_frame_offset = 4; - /* setup the MPA frame */ - nesqp->private_data_len = conn_param->private_data_len; - memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE); - - memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data, - conn_param->private_data_len); + memcpy(mpa_v2_frame->priv_data, conn_param->private_data, + conn_param->private_data_len); - nesqp->ietf_frame->priv_data_len = - cpu_to_be16(conn_param->private_data_len); - nesqp->ietf_frame->rev = mpa_version; - nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC; + cm_build_mpa_frame(cm_node, start_buff, &buff_len, nesqp->ietf_frame, MPA_KEY_REPLY); + nesqp->private_data_len = conn_param->private_data_len; /* setup our first outgoing iWarp send WQE (the IETF frame response) */ wqe = &nesqp->hwqp.sq_vbase[0]; if (cm_id->remote_addr.sin_addr.s_addr != - cm_id->local_addr.sin_addr.s_addr) { + cm_id->local_addr.sin_addr.s_addr) { u64temp = (unsigned long)nesqp; nesibdev = nesvnic->nesibdev; nespd = nesqp->nespd; - ibphysbuf.addr = nesqp->ietf_frame_pbase; - ibphysbuf.size = conn_param->private_data_len + - sizeof(struct ietf_mpa_frame); - tagged_offset = (u64)(unsigned long)nesqp->ietf_frame; + ibphysbuf.addr = nesqp->ietf_frame_pbase + mpa_frame_offset; + ibphysbuf.size = buff_len; + tagged_offset = (u64)(unsigned long)*start_buff; ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd, - &ibphysbuf, 1, - IB_ACCESS_LOCAL_WRITE, - &tagged_offset); + &ibphysbuf, 1, + IB_ACCESS_LOCAL_WRITE, + &tagged_offset); if (!ibmr) { nes_debug(NES_DBG_CM, "Unable to register memory region" - "for lSMM for cm_node = %p \n", - cm_node); + "for lSMM for cm_node = %p \n", + cm_node); pci_free_consistent(nesdev->pcidev, - nesqp->private_data_len+sizeof(struct ietf_mpa_frame), - nesqp->ietf_frame, nesqp->ietf_frame_pbase); + nesqp->private_data_len + nesqp->ietf_frame_size, + nesqp->ietf_frame, nesqp->ietf_frame_pbase); return -ENOMEM; } @@ -2863,22 +3091,20 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ibmr->device = nespd->ibpd.device; nesqp->lsmm_mr = ibmr; - u64temp |= NES_SW_CONTEXT_ALIGN>>1; + u64temp |= NES_SW_CONTEXT_ALIGN >> 1; set_wqe_64bit_value(wqe->wqe_words, - NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, - u64temp); + NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, + u64temp); wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | - NES_IWARP_SQ_WQE_WRPDU); + NES_IWARP_SQ_WQE_WRPDU); wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = - cpu_to_le32(conn_param->private_data_len + - sizeof(struct ietf_mpa_frame)); + cpu_to_le32(buff_len); set_wqe_64bit_value(wqe->wqe_words, - NES_IWARP_SQ_WQE_FRAG0_LOW_IDX, - (u64)(unsigned long)nesqp->ietf_frame); + NES_IWARP_SQ_WQE_FRAG0_LOW_IDX, + (u64)(unsigned long)(*start_buff)); wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = - cpu_to_le32(conn_param->private_data_len + - sizeof(struct ietf_mpa_frame)); + cpu_to_le32(buff_len); wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey; if (nesqp->sq_kmapped) { nesqp->sq_kmapped = 0; @@ -2887,7 +3113,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | - NES_QPCONTEXT_ORDIRD_WRPDU); + NES_QPCONTEXT_ORDIRD_WRPDU); } else { nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(NES_QPCONTEXT_ORDIRD_WRPDU); @@ -2901,11 +3127,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) /* nesqp->cm_node = (void *)cm_id->provider_data; */ cm_id->provider_data = nesqp; - nesqp->active_conn = 0; + nesqp->active_conn = 0; if (cm_node->state == NES_CM_STATE_TSA) nes_debug(NES_DBG_CM, "Already state = TSA for cm_node=%p\n", - cm_node); + cm_node); nes_cm_init_tsa_conn(nesqp, cm_node); @@ -2922,13 +3148,13 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); nesqp->nesqp_context->misc2 |= cpu_to_le32( - (u32)PCI_FUNC(nesdev->pcidev->devfn) << - NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); + (u32)PCI_FUNC(nesdev->pcidev->devfn) << + NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32(nes_arp_table(nesdev, - le32_to_cpu(nesqp->nesqp_context->ip0), NULL, - NES_ARP_RESOLVE) << 16); + le32_to_cpu(nesqp->nesqp_context->ip0), NULL, + NES_ARP_RESOLVE) << 16); nesqp->nesqp_context->ts_val_delta = cpu_to_le32( jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); @@ -2954,7 +3180,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) crc_value = get_crc_value(&nes_quad); nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n", - nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); + nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); nesqp->hte_index &= adapter->hte_index_mask; nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); @@ -2962,17 +3188,15 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = " - "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + " - "private data length=%zu.\n", nesqp->hwqp.qp_id, - ntohl(cm_id->remote_addr.sin_addr.s_addr), - ntohs(cm_id->remote_addr.sin_port), - ntohl(cm_id->local_addr.sin_addr.s_addr), - ntohs(cm_id->local_addr.sin_port), - le32_to_cpu(nesqp->nesqp_context->rcv_nxt), - le32_to_cpu(nesqp->nesqp_context->snd_nxt), - conn_param->private_data_len + - sizeof(struct ietf_mpa_frame)); - + "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + " + "private data length=%u.\n", nesqp->hwqp.qp_id, + ntohl(cm_id->remote_addr.sin_addr.s_addr), + ntohs(cm_id->remote_addr.sin_port), + ntohl(cm_id->local_addr.sin_addr.s_addr), + ntohs(cm_id->local_addr.sin_port), + le32_to_cpu(nesqp->nesqp_context->rcv_nxt), + le32_to_cpu(nesqp->nesqp_context->snd_nxt), + buff_len); /* notify OF layer that accept event was successful */ cm_id->add_ref(cm_id); @@ -2993,12 +3217,12 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesqp->private_data_len; /* copy entire MPA frame to our cm_node's frame */ memcpy(cm_node->loopbackpartner->mpa_frame_buf, - nesqp->ietf_frame->priv_data, nesqp->private_data_len); + conn_param->private_data, conn_param->private_data_len); create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED); } if (ret) printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " - "ret=%d\n", __func__, __LINE__, ret); + "ret=%d\n", __func__, __LINE__, ret); return 0; } @@ -3011,34 +3235,28 @@ int nes_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { struct nes_cm_node *cm_node; struct nes_cm_node *loopback; - struct nes_cm_core *cm_core; + u8 *start_buff; atomic_inc(&cm_rejects); - cm_node = (struct nes_cm_node *) cm_id->provider_data; + cm_node = (struct nes_cm_node *)cm_id->provider_data; loopback = cm_node->loopbackpartner; cm_core = cm_node->cm_core; cm_node->cm_id = cm_id; - cm_node->mpa_frame_size = sizeof(struct ietf_mpa_frame) + pdata_len; - if (cm_node->mpa_frame_size > MAX_CM_BUFFER) + if (pdata_len + sizeof(struct ietf_mpa_v2) > MAX_CM_BUFFER) return -EINVAL; - memcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE); if (loopback) { memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len); loopback->mpa_frame.priv_data_len = pdata_len; - loopback->mpa_frame_size = sizeof(struct ietf_mpa_frame) + - pdata_len; + loopback->mpa_frame_size = pdata_len; } else { - memcpy(&cm_node->mpa_frame.priv_data, pdata, pdata_len); - cm_node->mpa_frame.priv_data_len = cpu_to_be16(pdata_len); + start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2); + cm_node->mpa_frame_size = pdata_len; + memcpy(start_buff, pdata, pdata_len); } - - cm_node->mpa_frame.rev = mpa_version; - cm_node->mpa_frame.flags = IETF_MPA_FLAGS_CRC | IETF_MPA_FLAGS_REJECT; - - return cm_core->api->reject(cm_core, &cm_node->mpa_frame, cm_node); + return cm_core->api->reject(cm_core, cm_node); } @@ -3065,7 +3283,7 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesvnic = to_nesvnic(nesqp->ibqp.device); if (!nesvnic) return -EINVAL; - nesdev = nesvnic->nesdev; + nesdev = nesvnic->nesdev; if (!nesdev) return -EINVAL; @@ -3073,12 +3291,12 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return -EINVAL; nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = " - "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id, - ntohl(nesvnic->local_ipaddr), - ntohl(cm_id->remote_addr.sin_addr.s_addr), - ntohs(cm_id->remote_addr.sin_port), - ntohl(cm_id->local_addr.sin_addr.s_addr), - ntohs(cm_id->local_addr.sin_port)); + "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id, + ntohl(nesvnic->local_ipaddr), + ntohl(cm_id->remote_addr.sin_addr.s_addr), + ntohs(cm_id->remote_addr.sin_port), + ntohl(cm_id->local_addr.sin_addr.s_addr), + ntohs(cm_id->local_addr.sin_port)); atomic_inc(&cm_connects); nesqp->active_conn = 1; @@ -3092,12 +3310,12 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord); nes_debug(NES_DBG_CM, "mpa private data len =%u\n", - conn_param->private_data_len); + conn_param->private_data_len); if (cm_id->local_addr.sin_addr.s_addr != - cm_id->remote_addr.sin_addr.s_addr) { + cm_id->remote_addr.sin_addr.s_addr) { nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); + PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); apbvt_set = 1; } @@ -3113,13 +3331,13 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) /* create a connect CM node connection */ cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, - conn_param->private_data_len, (void *)conn_param->private_data, - &cm_info); + conn_param->private_data_len, (void *)conn_param->private_data, + &cm_info); if (!cm_node) { if (apbvt_set) nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesdev->pcidev->devfn), - NES_MANAGE_APBVT_DEL); + PCI_FUNC(nesdev->pcidev->devfn), + NES_MANAGE_APBVT_DEL); cm_id->rem_ref(cm_id); return -ENOMEM; @@ -3169,7 +3387,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info); if (!cm_node) { printk(KERN_ERR "%s[%u] Error returned from listen API call\n", - __func__, __LINE__); + __func__, __LINE__); return -ENOMEM; } @@ -3177,12 +3395,12 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) if (!cm_node->reused_node) { err = nes_manage_apbvt(nesvnic, - ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesvnic->nesdev->pcidev->devfn), - NES_MANAGE_APBVT_ADD); + ntohs(cm_id->local_addr.sin_port), + PCI_FUNC(nesvnic->nesdev->pcidev->devfn), + NES_MANAGE_APBVT_ADD); if (err) { printk(KERN_ERR "nes_manage_apbvt call returned %d.\n", - err); + err); g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node); return err; } @@ -3219,13 +3437,13 @@ int nes_destroy_listen(struct iw_cm_id *cm_id) int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice) { int rc = 0; + cm_packets_received++; - if ((g_cm_core) && (g_cm_core->api)) { + if ((g_cm_core) && (g_cm_core->api)) rc = g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb); - } else { + else nes_debug(NES_DBG_CM, "Unable to process packet for CM," - " cm is not setup properly.\n"); - } + " cm is not setup properly.\n"); return rc; } @@ -3240,11 +3458,10 @@ int nes_cm_start(void) nes_debug(NES_DBG_CM, "\n"); /* create the primary CM core, pass this handle to subsequent core inits */ g_cm_core = nes_cm_alloc_core(); - if (g_cm_core) { + if (g_cm_core) return 0; - } else { + else return -ENOMEM; - } } @@ -3265,7 +3482,6 @@ int nes_cm_stop(void) */ static void cm_event_connected(struct nes_cm_event *event) { - u64 u64temp; struct nes_qp *nesqp; struct nes_vnic *nesvnic; struct nes_device *nesdev; @@ -3274,7 +3490,6 @@ static void cm_event_connected(struct nes_cm_event *event) struct ib_qp_attr attr; struct iw_cm_id *cm_id; struct iw_cm_event cm_event; - struct nes_hw_qp_wqe *wqe; struct nes_v4_quad nes_quad; u32 crc_value; int ret; @@ -3288,17 +3503,16 @@ static void cm_event_connected(struct nes_cm_event *event) nesdev = nesvnic->nesdev; nesadapter = nesdev->nesadapter; - if (nesqp->destroyed) { + if (nesqp->destroyed) return; - } atomic_inc(&cm_connecteds); nes_debug(NES_DBG_CM, "QP%u attempting to connect to 0x%08X:0x%04X on" - " local port 0x%04X. jiffies = %lu.\n", - nesqp->hwqp.qp_id, - ntohl(cm_id->remote_addr.sin_addr.s_addr), - ntohs(cm_id->remote_addr.sin_port), - ntohs(cm_id->local_addr.sin_port), - jiffies); + " local port 0x%04X. jiffies = %lu.\n", + nesqp->hwqp.qp_id, + ntohl(cm_id->remote_addr.sin_addr.s_addr), + ntohs(cm_id->remote_addr.sin_port), + ntohs(cm_id->local_addr.sin_port), + jiffies); nes_cm_init_tsa_conn(nesqp, cm_node); @@ -3329,40 +3543,12 @@ static void cm_event_connected(struct nes_cm_event *event) NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT); /* Adjust tail for not having a LSMM */ - nesqp->hwqp.sq_tail = 1; - -#if defined(NES_SEND_FIRST_WRITE) - if (cm_node->send_write0) { - nes_debug(NES_DBG_CM, "Sending first write.\n"); - wqe = &nesqp->hwqp.sq_vbase[0]; - u64temp = (unsigned long)nesqp; - u64temp |= NES_SW_CONTEXT_ALIGN>>1; - set_wqe_64bit_value(wqe->wqe_words, - NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp); - wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = - cpu_to_le32(NES_IWARP_SQ_OP_RDMAW); - wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; + /*nesqp->hwqp.sq_tail = 1;*/ - if (nesqp->sq_kmapped) { - nesqp->sq_kmapped = 0; - kunmap(nesqp->page); - } + build_rdma0_msg(cm_node, &nesqp); - /* use the reserved spot on the WQ for the extra first WQE */ - nesqp->nesqp_context->ird_ord_sizes &= - cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | - NES_QPCONTEXT_ORDIRD_WRPDU | - NES_QPCONTEXT_ORDIRD_ALSMM)); - nesqp->skip_lsmm = 1; - nesqp->hwqp.sq_tail = 0; - nes_write32(nesdev->regs + NES_WQE_ALLOC, - (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); - } -#endif + nes_write32(nesdev->regs + NES_WQE_ALLOC, + (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); memset(&nes_quad, 0, sizeof(nes_quad)); @@ -3379,13 +3565,13 @@ static void cm_event_connected(struct nes_cm_event *event) crc_value = get_crc_value(&nes_quad); nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n", - nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask); + nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask); nesqp->hte_index &= nesadapter->hte_index_mask; nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); nesqp->ietf_frame = &cm_node->mpa_frame; - nesqp->private_data_len = (u8) cm_node->mpa_frame_size; + nesqp->private_data_len = (u8)cm_node->mpa_frame_size; cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); /* notify OF layer we successfully created the requested connection */ @@ -3397,7 +3583,9 @@ static void cm_event_connected(struct nes_cm_event *event) cm_event.remote_addr = cm_id->remote_addr; cm_event.private_data = (void *)event->cm_node->mpa_frame_buf; - cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size; + cm_event.private_data_len = (u8)event->cm_node->mpa_frame_size; + cm_event.ird = cm_node->ird_size; + cm_event.ord = cm_node->ord_size; cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr; ret = cm_id->event_handler(cm_id, &cm_event); @@ -3405,12 +3593,12 @@ static void cm_event_connected(struct nes_cm_event *event) if (ret) printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " - "ret=%d\n", __func__, __LINE__, ret); + "ret=%d\n", __func__, __LINE__, ret); attr.qp_state = IB_QPS_RTS; nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = " - "%lu\n", nesqp->hwqp.qp_id, jiffies); + "%lu\n", nesqp->hwqp.qp_id, jiffies); return; } @@ -3431,16 +3619,14 @@ static void cm_event_connect_error(struct nes_cm_event *event) return; cm_id = event->cm_node->cm_id; - if (!cm_id) { + if (!cm_id) return; - } nes_debug(NES_DBG_CM, "cm_node=%p, cm_id=%p\n", event->cm_node, cm_id); nesqp = cm_id->provider_data; - if (!nesqp) { + if (!nesqp) return; - } /* notify OF layer about this connection error event */ /* cm_id->rem_ref(cm_id); */ @@ -3455,14 +3641,14 @@ static void cm_event_connect_error(struct nes_cm_event *event) cm_event.private_data_len = 0; nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, " - "remove_addr=%08x\n", cm_event.local_addr.sin_addr.s_addr, - cm_event.remote_addr.sin_addr.s_addr); + "remove_addr=%08x\n", cm_event.local_addr.sin_addr.s_addr, + cm_event.remote_addr.sin_addr.s_addr); ret = cm_id->event_handler(cm_id, &cm_event); nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); if (ret) printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " - "ret=%d\n", __func__, __LINE__, ret); + "ret=%d\n", __func__, __LINE__, ret); cm_id->rem_ref(cm_id); rem_ref_cm_node(event->cm_node->cm_core, event->cm_node); @@ -3532,7 +3718,7 @@ static void cm_event_reset(struct nes_cm_event *event) */ static void cm_event_mpa_req(struct nes_cm_event *event) { - struct iw_cm_id *cm_id; + struct iw_cm_id *cm_id; struct iw_cm_event cm_event; int ret; struct nes_cm_node *cm_node; @@ -3544,7 +3730,7 @@ static void cm_event_mpa_req(struct nes_cm_event *event) atomic_inc(&cm_connect_reqs); nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n", - cm_node, cm_id, jiffies); + cm_node, cm_id, jiffies); cm_event.event = IW_CM_EVENT_CONNECT_REQUEST; cm_event.status = 0; @@ -3558,19 +3744,21 @@ static void cm_event_mpa_req(struct nes_cm_event *event) cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port); cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr); cm_event.private_data = cm_node->mpa_frame_buf; - cm_event.private_data_len = (u8) cm_node->mpa_frame_size; + cm_event.private_data_len = (u8)cm_node->mpa_frame_size; + cm_event.ird = cm_node->ird_size; + cm_event.ord = cm_node->ord_size; ret = cm_id->event_handler(cm_id, &cm_event); if (ret) printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); + __func__, __LINE__, ret); return; } static void cm_event_mpa_reject(struct nes_cm_event *event) { - struct iw_cm_id *cm_id; + struct iw_cm_id *cm_id; struct iw_cm_event cm_event; struct nes_cm_node *cm_node; int ret; @@ -3582,7 +3770,7 @@ static void cm_event_mpa_reject(struct nes_cm_event *event) atomic_inc(&cm_connect_reqs); nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n", - cm_node, cm_id, jiffies); + cm_node, cm_id, jiffies); cm_event.event = IW_CM_EVENT_CONNECT_REPLY; cm_event.status = -ECONNREFUSED; @@ -3597,17 +3785,17 @@ static void cm_event_mpa_reject(struct nes_cm_event *event) cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr); cm_event.private_data = cm_node->mpa_frame_buf; - cm_event.private_data_len = (u8) cm_node->mpa_frame_size; + cm_event.private_data_len = (u8)cm_node->mpa_frame_size; nes_debug(NES_DBG_CM, "call CM_EVENT_MPA_REJECTED, local_addr=%08x, " - "remove_addr=%08x\n", - cm_event.local_addr.sin_addr.s_addr, - cm_event.remote_addr.sin_addr.s_addr); + "remove_addr=%08x\n", + cm_event.local_addr.sin_addr.s_addr, + cm_event.remote_addr.sin_addr.s_addr); ret = cm_id->event_handler(cm_id, &cm_event); if (ret) printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); + __func__, __LINE__, ret); return; } @@ -3626,7 +3814,7 @@ static int nes_cm_post_event(struct nes_cm_event *event) event->cm_info.cm_id->add_ref(event->cm_info.cm_id); INIT_WORK(&event->event_work, nes_cm_event_handler); nes_debug(NES_DBG_CM, "cm_node=%p queue_work, event=%p\n", - event->cm_node, event); + event->cm_node, event); queue_work(event->cm_node->cm_core->event_wq, &event->event_work); @@ -3643,7 +3831,7 @@ static int nes_cm_post_event(struct nes_cm_event *event) static void nes_cm_event_handler(struct work_struct *work) { struct nes_cm_event *event = container_of(work, struct nes_cm_event, - event_work); + event_work); struct nes_cm_core *cm_core; if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core)) @@ -3651,29 +3839,29 @@ static void nes_cm_event_handler(struct work_struct *work) cm_core = event->cm_node->cm_core; nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n", - event, event->type, atomic_read(&cm_core->events_posted)); + event, event->type, atomic_read(&cm_core->events_posted)); switch (event->type) { case NES_CM_EVENT_MPA_REQ: cm_event_mpa_req(event); nes_debug(NES_DBG_CM, "cm_node=%p CM Event: MPA REQUEST\n", - event->cm_node); + event->cm_node); break; case NES_CM_EVENT_RESET: nes_debug(NES_DBG_CM, "cm_node = %p CM Event: RESET\n", - event->cm_node); + event->cm_node); cm_event_reset(event); break; case NES_CM_EVENT_CONNECTED: if ((!event->cm_node->cm_id) || - (event->cm_node->state != NES_CM_STATE_TSA)) + (event->cm_node->state != NES_CM_STATE_TSA)) break; cm_event_connected(event); nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n"); break; case NES_CM_EVENT_MPA_REJECT: if ((!event->cm_node->cm_id) || - (event->cm_node->state == NES_CM_STATE_TSA)) + (event->cm_node->state == NES_CM_STATE_TSA)) break; cm_event_mpa_reject(event); nes_debug(NES_DBG_CM, "CM Event: REJECT\n"); @@ -3681,7 +3869,7 @@ static void nes_cm_event_handler(struct work_struct *work) case NES_CM_EVENT_ABORTED: if ((!event->cm_node->cm_id) || - (event->cm_node->state == NES_CM_STATE_TSA)) + (event->cm_node->state == NES_CM_STATE_TSA)) break; cm_event_connect_error(event); nes_debug(NES_DBG_CM, "CM Event: ABORTED\n"); diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index 130c185..bdfa1fb 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -48,7 +48,16 @@ #define IETF_MPA_KEY_SIZE 16 #define IETF_MPA_VERSION 1 #define IETF_MAX_PRIV_DATA_LEN 512 -#define IETF_MPA_FRAME_SIZE 20 +#define IETF_MPA_FRAME_SIZE 20 +#define IETF_RTR_MSG_SIZE 4 +#define IETF_MPA_V2_FLAG 0x10 + +/* IETF RTR MSG Fields */ +#define IETF_PEER_TO_PEER 0x8000 +#define IETF_FLPDU_ZERO_LEN 0x4000 +#define IETF_RDMA0_WRITE 0x8000 +#define IETF_RDMA0_READ 0x4000 +#define IETF_NO_IRD_ORD 0x3FFF enum ietf_mpa_flags { IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */ @@ -56,7 +65,7 @@ enum ietf_mpa_flags { IETF_MPA_FLAGS_REJECT = 0x20, /* Reject */ }; -struct ietf_mpa_frame { +struct ietf_mpa_v1 { u8 key[IETF_MPA_KEY_SIZE]; u8 flags; u8 rev; @@ -66,6 +75,20 @@ struct ietf_mpa_frame { #define ietf_mpa_req_resp_frame ietf_mpa_frame +struct ietf_rtr_msg { + __be16 ctrl_ird; + __be16 ctrl_ord; +}; + +struct ietf_mpa_v2 { + u8 key[IETF_MPA_KEY_SIZE]; + u8 flags; + u8 rev; + __be16 priv_data_len; + struct ietf_rtr_msg rtr_msg; + u8 priv_data[0]; +}; + struct nes_v4_quad { u32 rsvd0; __le32 DstIpAdrIndex; /* Only most significant 5 bits are valid */ @@ -171,8 +194,7 @@ struct nes_timer_entry { #define NES_CM_DEF_SEQ2 0x18ed5740 #define NES_CM_DEF_LOCAL_ID2 0xb807 -#define MAX_CM_BUFFER (IETF_MPA_FRAME_SIZE + IETF_MAX_PRIV_DATA_LEN) - +#define MAX_CM_BUFFER (IETF_MPA_FRAME_SIZE + IETF_RTR_MSG_SIZE + IETF_MAX_PRIV_DATA_LEN) typedef u32 nes_addr_t; @@ -204,6 +226,21 @@ enum nes_cm_node_state { NES_CM_STATE_CLOSED }; +enum mpa_frame_version { + IETF_MPA_V1 = 1, + IETF_MPA_V2 = 2 +}; + +enum mpa_frame_key { + MPA_KEY_REQUEST, + MPA_KEY_REPLY +}; + +enum send_rdma0 { + SEND_RDMA_READ_ZERO = 1, + SEND_RDMA_WRITE_ZERO = 2 +}; + enum nes_tcpip_pkt_type { NES_PKT_TYPE_UNKNOWN, NES_PKT_TYPE_SYN, @@ -245,9 +282,9 @@ struct nes_cm_tcp_context { enum nes_cm_listener_state { - NES_CM_LISTENER_PASSIVE_STATE=1, - NES_CM_LISTENER_ACTIVE_STATE=2, - NES_CM_LISTENER_EITHER_STATE=3 + NES_CM_LISTENER_PASSIVE_STATE = 1, + NES_CM_LISTENER_ACTIVE_STATE = 2, + NES_CM_LISTENER_EITHER_STATE = 3 }; struct nes_cm_listener { @@ -283,16 +320,20 @@ struct nes_cm_node { struct nes_cm_node *loopbackpartner; - struct nes_timer_entry *send_entry; - + struct nes_timer_entry *send_entry; + struct nes_timer_entry *recv_entry; spinlock_t retrans_list_lock; - struct nes_timer_entry *recv_entry; + enum send_rdma0 send_rdma0_op; - int send_write0; union { - struct ietf_mpa_frame mpa_frame; - u8 mpa_frame_buf[MAX_CM_BUFFER]; + struct ietf_mpa_v1 mpa_frame; + struct ietf_mpa_v2 mpa_v2_frame; + u8 mpa_frame_buf[MAX_CM_BUFFER]; }; + enum mpa_frame_version mpa_frame_rev; + u16 ird_size; + u16 ord_size; + u16 mpa_frame_size; struct iw_cm_id *cm_id; struct list_head list; @@ -399,10 +440,8 @@ struct nes_cm_ops { struct nes_vnic *, u16, void *, struct nes_cm_info *); int (*close)(struct nes_cm_core *, struct nes_cm_node *); - int (*accept)(struct nes_cm_core *, struct ietf_mpa_frame *, - struct nes_cm_node *); - int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *, - struct nes_cm_node *); + int (*accept)(struct nes_cm_core *, struct nes_cm_node *); + int (*reject)(struct nes_cm_core *, struct nes_cm_node *); int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *); int (*destroy_cm_core)(struct nes_cm_core *); diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h index 2800573..fe6b6e9 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.h +++ b/drivers/infiniband/hw/nes/nes_verbs.h @@ -139,7 +139,8 @@ struct nes_qp { struct nes_cq *nesrcq; struct nes_pd *nespd; void *cm_node; /* handle of the node this QP is associated with */ - struct ietf_mpa_frame *ietf_frame; + void *ietf_frame; + u8 ietf_frame_size; dma_addr_t ietf_frame_pbase; struct ib_mr *lsmm_mr; struct nes_hw_qp hwqp; diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index c9624ea..b881bdc 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -171,7 +171,9 @@ struct qib_ctxtdata { /* how many alloc_pages() chunks in rcvegrbuf_pages */ u32 rcvegrbuf_chunks; /* how many egrbufs per chunk */ - u32 rcvegrbufs_perchunk; + u16 rcvegrbufs_perchunk; + /* ilog2 of above */ + u16 rcvegrbufs_perchunk_shift; /* order for rcvegrbuf_pages */ size_t rcvegrbuf_size; /* rcvhdrq size (for freeing) */ @@ -221,6 +223,9 @@ struct qib_ctxtdata { /* ctxt rcvhdrq head offset */ u32 head; u32 pkt_count; + /* lookaside fields */ + struct qib_qp *lookaside_qp; + u32 lookaside_qpn; /* QPs waiting for context processing */ struct list_head qp_wait_list; }; @@ -807,6 +812,10 @@ struct qib_devdata { * supports, less gives more pio bufs/ctxt, etc. */ u32 cfgctxts; + /* + * number of ctxts available for PSM open + */ + u32 freectxts; /* * hint that we should update pioavailshadow before @@ -936,7 +945,9 @@ struct qib_devdata { /* chip address space used by 4k pio buffers */ u32 align4k; /* size of each rcvegrbuffer */ - u32 rcvegrbufsize; + u16 rcvegrbufsize; + /* log2 of above */ + u16 rcvegrbufsize_shift; /* localbus width (1, 2,4,8,16,32) from config space */ u32 lbus_width; /* localbus speed in MHz */ diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 23e584f..9a9047f 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -279,10 +279,10 @@ bail: */ static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail) { - const u32 chunk = etail / rcd->rcvegrbufs_perchunk; - const u32 idx = etail % rcd->rcvegrbufs_perchunk; + const u32 chunk = etail >> rcd->rcvegrbufs_perchunk_shift; + const u32 idx = etail & ((u32)rcd->rcvegrbufs_perchunk - 1); - return rcd->rcvegrbuf[chunk] + idx * rcd->dd->rcvegrbufsize; + return rcd->rcvegrbuf[chunk] + (idx << rcd->dd->rcvegrbufsize_shift); } /* @@ -310,7 +310,6 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, u32 opcode; u32 psn; int diff; - unsigned long flags; /* Sanity check packet */ if (tlen < 24) @@ -365,7 +364,6 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, switch (qp->ibqp.qp_type) { case IB_QPT_RC: - spin_lock_irqsave(&qp->s_lock, flags); ruc_res = qib_ruc_check_hdr( ibp, hdr, @@ -373,11 +371,8 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, qp, be32_to_cpu(ohdr->bth[0])); if (ruc_res) { - spin_unlock_irqrestore(&qp->s_lock, - flags); goto unlock; } - spin_unlock_irqrestore(&qp->s_lock, flags); /* Only deal with RDMA Writes for now */ if (opcode < @@ -547,6 +542,15 @@ move_along: updegr = 0; } } + /* + * Notify qib_destroy_qp() if it is waiting + * for lookaside_qp to finish. + */ + if (rcd->lookaside_qp) { + if (atomic_dec_and_test(&rcd->lookaside_qp->refcount)) + wake_up(&rcd->lookaside_qp->wait); + rcd->lookaside_qp = NULL; + } rcd->head = l; rcd->pkt_count += i; diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 2625303..7763366 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1284,6 +1284,7 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, strlcpy(rcd->comm, current->comm, sizeof(rcd->comm)); ctxt_fp(fp) = rcd; qib_stats.sps_ctxts++; + dd->freectxts++; ret = 0; goto bail; @@ -1792,6 +1793,7 @@ static int qib_close(struct inode *in, struct file *fp) if (dd->pageshadow) unlock_expected_tids(rcd); qib_stats.sps_ctxts--; + dd->freectxts--; } mutex_unlock(&qib_mutex); diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index d8ca0a0..781a802 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -3273,6 +3273,8 @@ static int init_6120_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ ret = ib_mtu_enum_to_int(qib_ibmtu); dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; + BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); + dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_6120_tidtemplate(dd); diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index e1f9474..3f1d562 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -4085,6 +4085,8 @@ static int qib_init_7220_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ ret = ib_mtu_enum_to_int(qib_ibmtu); dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; + BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); + dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_7220_tidtemplate(dd); diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 5ea9ece..efd0a11 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2310,12 +2310,15 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) val = ppd->cpspec->ibcctrl_a | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE << QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); + ppd->cpspec->ibcctrl_a = val; /* * Reset the PCS interface to the serdes (and also ibc, which is still * in reset from above). Writes new value of ibcctrl_a as last step. */ qib_7322_mini_pcs_reset(ppd); qib_write_kreg(dd, kr_scratch, 0ULL); + /* clear the linkinit cmds */ + ppd->cpspec->ibcctrl_a &= ~SYM_MASK(IBCCtrlA_0, LinkInitCmd); if (!ppd->cpspec->ibcctrl_b) { unsigned lse = ppd->link_speed_enabled; @@ -2387,11 +2390,6 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) qib_write_kreg_port(ppd, krp_rcvctrl, ppd->p_rcvctrl); spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); - /* Hold the link state machine for mezz boards */ - if (IS_QMH(dd) || IS_QME(dd)) - qib_set_ib_7322_lstate(ppd, 0, - QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); - /* Also enable IBSTATUSCHG interrupt. */ val = qib_read_kreg_port(ppd, krp_errmask); qib_write_kreg_port(ppd, krp_errmask, @@ -2853,9 +2851,8 @@ static irqreturn_t qib_7322intr(int irq, void *data) for (i = 0; i < dd->first_user_ctxt; i++) { if (ctxtrbits & rmask) { ctxtrbits &= ~rmask; - if (dd->rcd[i]) { + if (dd->rcd[i]) qib_kreceive(dd->rcd[i], NULL, &npkts); - } } rmask <<= 1; } @@ -5230,6 +5227,8 @@ static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs) QIBL_IB_AUTONEG_INPROG))) set_7322_ibspeed_fast(ppd, ppd->link_speed_enabled); if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) { + struct qib_qsfp_data *qd = + &ppd->cpspec->qsfp_data; /* unlock the Tx settings, speed may change */ qib_write_kreg_port(ppd, krp_tx_deemph_override, SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, @@ -5237,6 +5236,12 @@ static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs) qib_cancel_sends(ppd); /* on link down, ensure sane pcs state */ qib_7322_mini_pcs_reset(ppd); + /* schedule the qsfp refresh which should turn the link + off */ + if (ppd->dd->flags & QIB_HAS_QSFP) { + qd->t_insert = get_jiffies_64(); + schedule_work(&qd->work); + } spin_lock_irqsave(&ppd->sdma_lock, flags); if (__qib_sdma_running(ppd)) __qib_sdma_process_event(ppd, @@ -5587,43 +5592,79 @@ static void qsfp_7322_event(struct work_struct *work) struct qib_qsfp_data *qd; struct qib_pportdata *ppd; u64 pwrup; + unsigned long flags; int ret; u32 le2; qd = container_of(work, struct qib_qsfp_data, work); ppd = qd->ppd; - pwrup = qd->t_insert + msecs_to_jiffies(QSFP_PWR_LAG_MSEC); + pwrup = qd->t_insert + + msecs_to_jiffies(QSFP_PWR_LAG_MSEC - QSFP_MODPRS_LAG_MSEC); - /* - * Some QSFP's not only do not respond until the full power-up - * time, but may behave badly if we try. So hold off responding - * to insertion. - */ - while (1) { - u64 now = get_jiffies_64(); - if (time_after64(now, pwrup)) - break; - msleep(20); - } - ret = qib_refresh_qsfp_cache(ppd, &qd->cache); - /* - * Need to change LE2 back to defaults if we couldn't - * read the cable type (to handle cable swaps), so do this - * even on failure to read cable information. We don't - * get here for QME, so IS_QME check not needed here. - */ - if (!ret && !ppd->dd->cspec->r1) { - if (QSFP_IS_ACTIVE_FAR(qd->cache.tech)) - le2 = LE2_QME; - else if (qd->cache.atten[1] >= qib_long_atten && - QSFP_IS_CU(qd->cache.tech)) - le2 = LE2_5m; - else + /* Delay for 20 msecs to allow ModPrs resistor to setup */ + mdelay(QSFP_MODPRS_LAG_MSEC); + + if (!qib_qsfp_mod_present(ppd)) { + ppd->cpspec->qsfp_data.modpresent = 0; + /* Set the physical link to disabled */ + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_LINKV; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } else { + /* + * Some QSFP's not only do not respond until the full power-up + * time, but may behave badly if we try. So hold off responding + * to insertion. + */ + while (1) { + u64 now = get_jiffies_64(); + if (time_after64(now, pwrup)) + break; + msleep(20); + } + + ret = qib_refresh_qsfp_cache(ppd, &qd->cache); + + /* + * Need to change LE2 back to defaults if we couldn't + * read the cable type (to handle cable swaps), so do this + * even on failure to read cable information. We don't + * get here for QME, so IS_QME check not needed here. + */ + if (!ret && !ppd->dd->cspec->r1) { + if (QSFP_IS_ACTIVE_FAR(qd->cache.tech)) + le2 = LE2_QME; + else if (qd->cache.atten[1] >= qib_long_atten && + QSFP_IS_CU(qd->cache.tech)) + le2 = LE2_5m; + else + le2 = LE2_DEFAULT; + } else le2 = LE2_DEFAULT; - } else - le2 = LE2_DEFAULT; - ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7)); - init_txdds_table(ppd, 0); + ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7)); + /* + * We always change parameteters, since we can choose + * values for cables without eeproms, and the cable may have + * changed from a cable with full or partial eeprom content + * to one with partial or no content. + */ + init_txdds_table(ppd, 0); + /* The physical link is being re-enabled only when the + * previous state was DISABLED and the VALID bit is not + * set. This should only happen when the cable has been + * physically pulled. */ + if (!ppd->cpspec->qsfp_data.modpresent && + (ppd->lflags & (QIBL_LINKV | QIBL_IB_LINK_DISABLED))) { + ppd->cpspec->qsfp_data.modpresent = 1; + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_SLEEP); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_LINKV; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } + } } /* @@ -5727,7 +5768,8 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change) /* now change the IBC and serdes, overriding generic */ init_txdds_table(ppd, 1); /* Re-enable the physical state machine on mezz boards - * now that the correct settings have been set. */ + * now that the correct settings have been set. + * QSFP boards are handles by the QSFP event handler */ if (IS_QMH(dd) || IS_QME(dd)) qib_set_ib_7322_lstate(ppd, 0, QLOGIC_IB_IBCC_LINKINITCMD_SLEEP); @@ -6205,6 +6247,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ dd->rcvegrbufsize = max(mtu, 2048); + BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); + dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_7322_tidtemplate(dd); @@ -7147,7 +7191,8 @@ static void find_best_ent(struct qib_pportdata *ppd, } } - /* Lookup serdes setting by cable type and attenuation */ + /* Active cables don't have attenuation so we only set SERDES + * settings to account for the attenuation of the board traces. */ if (!override && QSFP_IS_ACTIVE(qd->tech)) { *sdr_dds = txdds_sdr + ppd->dd->board_atten; *ddr_dds = txdds_ddr + ppd->dd->board_atten; @@ -7464,12 +7509,6 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd) u32 le_val, rxcaldone; int chan, chan_done = (1 << SERDES_CHANS) - 1; - /* - * Initialize the Tx DDS tables. Also done every QSFP event, - * for adapters with QSFP - */ - init_txdds_table(ppd, 0); - /* Clear cmode-override, may be set from older driver */ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 0 << 14, 1 << 14); @@ -7655,6 +7694,12 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd) /* VGA output common mode */ ibsd_wr_allchans(ppd, 12, (3 << 2), BMASK(3, 2)); + /* + * Initialize the Tx DDS tables. Also done every QSFP event, + * for adapters with QSFP + */ + init_txdds_table(ppd, 0); + return 0; } diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index a01f3fc..b093a0b 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -183,6 +183,9 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt + rcd->rcvegrbufs_perchunk - 1) / rcd->rcvegrbufs_perchunk; + BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk)); + rcd->rcvegrbufs_perchunk_shift = + ilog2(rcd->rcvegrbufs_perchunk); } return rcd; } @@ -398,6 +401,7 @@ static void enable_chip(struct qib_devdata *dd) if (rcd) dd->f_rcvctrl(rcd->ppd, rcvmask, i); } + dd->freectxts = dd->cfgctxts - dd->first_user_ctxt; } static void verify_interrupt(unsigned long opaque) @@ -581,10 +585,6 @@ int qib_init(struct qib_devdata *dd, int reinit) continue; } - /* let link come up, and enable IBC */ - spin_lock_irqsave(&ppd->lflags_lock, flags); - ppd->lflags &= ~QIBL_IB_LINK_DISABLED; - spin_unlock_irqrestore(&ppd->lflags_lock, flags); portok++; } diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index e16751f..7e7e16f 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -34,6 +34,7 @@ #include <linux/err.h> #include <linux/vmalloc.h> +#include <linux/jhash.h> #include "qib.h" @@ -204,6 +205,13 @@ static void free_qpn(struct qib_qpn_table *qpt, u32 qpn) clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); } +static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn) +{ + return jhash_1word(qpn, dev->qp_rnd) & + (dev->qp_table_size - 1); +} + + /* * Put the QP into the hash table. * The hash table holds a reference to the QP. @@ -211,22 +219,23 @@ static void free_qpn(struct qib_qpn_table *qpt, u32 qpn) static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) { struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - unsigned n = qp->ibqp.qp_num % dev->qp_table_size; unsigned long flags; + unsigned n = qpn_hash(dev, qp->ibqp.qp_num); spin_lock_irqsave(&dev->qpt_lock, flags); + atomic_inc(&qp->refcount); if (qp->ibqp.qp_num == 0) - ibp->qp0 = qp; + rcu_assign_pointer(ibp->qp0, qp); else if (qp->ibqp.qp_num == 1) - ibp->qp1 = qp; + rcu_assign_pointer(ibp->qp1, qp); else { qp->next = dev->qp_table[n]; - dev->qp_table[n] = qp; + rcu_assign_pointer(dev->qp_table[n], qp); } - atomic_inc(&qp->refcount); spin_unlock_irqrestore(&dev->qpt_lock, flags); + synchronize_rcu(); } /* @@ -236,29 +245,32 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) { struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct qib_qp *q, **qpp; + unsigned n = qpn_hash(dev, qp->ibqp.qp_num); unsigned long flags; - qpp = &dev->qp_table[qp->ibqp.qp_num % dev->qp_table_size]; - spin_lock_irqsave(&dev->qpt_lock, flags); if (ibp->qp0 == qp) { - ibp->qp0 = NULL; atomic_dec(&qp->refcount); + rcu_assign_pointer(ibp->qp0, NULL); } else if (ibp->qp1 == qp) { - ibp->qp1 = NULL; atomic_dec(&qp->refcount); - } else + rcu_assign_pointer(ibp->qp1, NULL); + } else { + struct qib_qp *q, **qpp; + + qpp = &dev->qp_table[n]; for (; (q = *qpp) != NULL; qpp = &q->next) if (q == qp) { - *qpp = qp->next; - qp->next = NULL; atomic_dec(&qp->refcount); + rcu_assign_pointer(*qpp, qp->next); + qp->next = NULL; break; } + } spin_unlock_irqrestore(&dev->qpt_lock, flags); + synchronize_rcu(); } /** @@ -280,21 +292,24 @@ unsigned qib_free_all_qps(struct qib_devdata *dd) if (!qib_mcast_tree_empty(ibp)) qp_inuse++; - if (ibp->qp0) + rcu_read_lock(); + if (rcu_dereference(ibp->qp0)) qp_inuse++; - if (ibp->qp1) + if (rcu_dereference(ibp->qp1)) qp_inuse++; + rcu_read_unlock(); } spin_lock_irqsave(&dev->qpt_lock, flags); for (n = 0; n < dev->qp_table_size; n++) { qp = dev->qp_table[n]; - dev->qp_table[n] = NULL; + rcu_assign_pointer(dev->qp_table[n], NULL); for (; qp; qp = qp->next) qp_inuse++; } spin_unlock_irqrestore(&dev->qpt_lock, flags); + synchronize_rcu(); return qp_inuse; } @@ -309,25 +324,28 @@ unsigned qib_free_all_qps(struct qib_devdata *dd) */ struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) { - struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; - unsigned long flags; - struct qib_qp *qp; + struct qib_qp *qp = NULL; - spin_lock_irqsave(&dev->qpt_lock, flags); + if (unlikely(qpn <= 1)) { + rcu_read_lock(); + if (qpn == 0) + qp = rcu_dereference(ibp->qp0); + else + qp = rcu_dereference(ibp->qp1); + } else { + struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; + unsigned n = qpn_hash(dev, qpn); - if (qpn == 0) - qp = ibp->qp0; - else if (qpn == 1) - qp = ibp->qp1; - else - for (qp = dev->qp_table[qpn % dev->qp_table_size]; qp; - qp = qp->next) + rcu_read_lock(); + for (qp = dev->qp_table[n]; rcu_dereference(qp); qp = qp->next) if (qp->ibqp.qp_num == qpn) break; + } if (qp) - atomic_inc(&qp->refcount); + if (unlikely(!atomic_inc_not_zero(&qp->refcount))) + qp = NULL; - spin_unlock_irqrestore(&dev->qpt_lock, flags); + rcu_read_unlock(); return qp; } @@ -765,8 +783,10 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } } - if (attr_mask & IB_QP_PATH_MTU) + if (attr_mask & IB_QP_PATH_MTU) { qp->path_mtu = pmtu; + qp->pmtu = ib_mtu_enum_to_int(pmtu); + } if (attr_mask & IB_QP_RETRY_CNT) { qp->s_retry_cnt = attr->retry_cnt; @@ -781,8 +801,12 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MIN_RNR_TIMER) qp->r_min_rnr_timer = attr->min_rnr_timer; - if (attr_mask & IB_QP_TIMEOUT) + if (attr_mask & IB_QP_TIMEOUT) { qp->timeout = attr->timeout; + qp->timeout_jiffies = + usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / + 1000UL); + } if (attr_mask & IB_QP_QKEY) qp->qkey = attr->qkey; @@ -1013,6 +1037,10 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, ret = ERR_PTR(-ENOMEM); goto bail_swq; } + RCU_INIT_POINTER(qp->next, NULL); + qp->timeout_jiffies = + usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / + 1000UL); if (init_attr->srq) sz = 0; else { diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c index 3374a52..e06c4ed 100644 --- a/drivers/infiniband/hw/qib/qib_qsfp.c +++ b/drivers/infiniband/hw/qib/qib_qsfp.c @@ -273,18 +273,12 @@ int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp) int ret; int idx; u16 cks; - u32 mask; u8 peek[4]; /* ensure sane contents on invalid reads, for cable swaps */ memset(cp, 0, sizeof(*cp)); - mask = QSFP_GPIO_MOD_PRS_N; - if (ppd->hw_pidx) - mask <<= QSFP_GPIO_PORT2_SHIFT; - - ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0); - if (ret & mask) { + if (!qib_qsfp_mod_present(ppd)) { ret = -ENODEV; goto bail; } @@ -444,6 +438,19 @@ const char * const qib_qsfp_devtech[16] = { static const char *pwr_codes = "1.5W2.0W2.5W3.5W"; +int qib_qsfp_mod_present(struct qib_pportdata *ppd) +{ + u32 mask; + int ret; + + mask = QSFP_GPIO_MOD_PRS_N << + (ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT); + ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0); + + return !((ret & mask) >> + ((ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT) + 3)); +} + /* * Initialize structures that control access to QSFP. Called once per port * on cards that support QSFP. @@ -452,7 +459,6 @@ void qib_qsfp_init(struct qib_qsfp_data *qd, void (*fevent)(struct work_struct *)) { u32 mask, highs; - int pins; struct qib_devdata *dd = qd->ppd->dd; @@ -480,8 +486,7 @@ void qib_qsfp_init(struct qib_qsfp_data *qd, mask <<= QSFP_GPIO_PORT2_SHIFT; /* Do not try to wait here. Better to let event handle it */ - pins = dd->f_gpio_mod(dd, 0, 0, 0); - if (pins & mask) + if (!qib_qsfp_mod_present(qd->ppd)) goto bail; /* We see a module, but it may be unwise to look yet. Just schedule */ qd->t_insert = get_jiffies_64(); diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h index c109bbd..46002a9 100644 --- a/drivers/infiniband/hw/qib/qib_qsfp.h +++ b/drivers/infiniband/hw/qib/qib_qsfp.h @@ -34,6 +34,7 @@ #define QSFP_DEV 0xA0 #define QSFP_PWR_LAG_MSEC 2000 +#define QSFP_MODPRS_LAG_MSEC 20 /* * Below are masks for various QSFP signals, for Port 1. @@ -177,10 +178,12 @@ struct qib_qsfp_data { struct work_struct work; struct qib_qsfp_cache cache; u64 t_insert; + u8 modpresent; }; extern int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp); +extern int qib_qsfp_mod_present(struct qib_pportdata *ppd); extern void qib_qsfp_init(struct qib_qsfp_data *qd, void (*fevent)(struct work_struct *)); extern void qib_qsfp_deinit(struct qib_qsfp_data *qd); diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index eca0c41..afaf4ac 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -59,8 +59,7 @@ static void start_timer(struct qib_qp *qp) qp->s_flags |= QIB_S_TIMER; qp->s_timer.function = rc_timeout; /* 4.096 usec. * (1 << qp->timeout) */ - qp->s_timer.expires = jiffies + - usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL); + qp->s_timer.expires = jiffies + qp->timeout_jiffies; add_timer(&qp->s_timer); } @@ -239,7 +238,7 @@ int qib_make_rc_req(struct qib_qp *qp) u32 len; u32 bth0; u32 bth2; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + u32 pmtu = qp->pmtu; char newreq; unsigned long flags; int ret = 0; @@ -1519,9 +1518,7 @@ read_middle: * 4.096 usec. * (1 << qp->timeout) */ qp->s_flags |= QIB_S_TIMER; - mod_timer(&qp->s_timer, jiffies + - usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / - 1000UL)); + mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies); if (qp->s_flags & QIB_S_WAIT_ACK) { qp->s_flags &= ~QIB_S_WAIT_ACK; qib_schedule_send(qp); @@ -1732,7 +1729,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, * same request. */ offset = ((psn - e->psn) & QIB_PSN_MASK) * - ib_mtu_enum_to_int(qp->path_mtu); + qp->pmtu; len = be32_to_cpu(reth->length); if (unlikely(offset + len != e->rdma_sge.sge_length)) goto unlock_done; @@ -1876,7 +1873,7 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, u32 psn; u32 pad; struct ib_wc wc; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + u32 pmtu = qp->pmtu; int diff; struct ib_reth *reth; unsigned long flags; @@ -1892,10 +1889,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, } opcode = be32_to_cpu(ohdr->bth[0]); - spin_lock_irqsave(&qp->s_lock, flags); if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode)) - goto sunlock; - spin_unlock_irqrestore(&qp->s_lock, flags); + return; psn = be32_to_cpu(ohdr->bth[2]); opcode >>= 24; @@ -1955,8 +1950,6 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, break; } - memset(&wc, 0, sizeof wc); - if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) { qp->r_flags |= QIB_R_COMM_EST; if (qp->ibqp.event_handler) { @@ -2009,16 +2002,19 @@ send_middle: goto rnr_nak; qp->r_rcv_len = 0; if (opcode == OP(SEND_ONLY)) - goto send_last; - /* FALLTHROUGH */ + goto no_immediate_data; + /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */ case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: wc.ex.imm_data = ohdr->u.imm_data; hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; - /* FALLTHROUGH */ + goto send_last; case OP(SEND_LAST): case OP(RDMA_WRITE_LAST): +no_immediate_data: + wc.wc_flags = 0; + wc.ex.imm_data = 0; send_last: /* Get the number of bytes the message was padded by. */ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; @@ -2051,6 +2047,12 @@ send_last: wc.src_qp = qp->remote_qpn; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; + /* zero fields that are N/A */ + wc.vendor_err = 0; + wc.pkey_index = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; + wc.csum_ok = 0; /* Signal completion event if the solicited bit is set. */ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & @@ -2089,7 +2091,7 @@ send_last: if (opcode == OP(RDMA_WRITE_FIRST)) goto send_middle; else if (opcode == OP(RDMA_WRITE_ONLY)) - goto send_last; + goto no_immediate_data; ret = qib_get_rwqe(qp, 1); if (ret < 0) goto nack_op_err; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index eb78d93..b4b37e4 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -260,12 +260,15 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) /* * - * This should be called with the QP s_lock held. + * This should be called with the QP r_lock held. + * + * The s_lock will be acquired around the qib_migrate_qp() call. */ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, int has_grh, struct qib_qp *qp, u32 bth0) { __be64 guid; + unsigned long flags; if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) { if (!has_grh) { @@ -295,7 +298,9 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid || ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num) goto err; + spin_lock_irqsave(&qp->s_lock, flags); qib_migrate_qp(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); } else { if (!has_grh) { if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c index c3ec8ef..d623593 100644 --- a/drivers/infiniband/hw/qib/qib_srq.c +++ b/drivers/infiniband/hw/qib/qib_srq.c @@ -107,6 +107,11 @@ struct ib_srq *qib_create_srq(struct ib_pd *ibpd, u32 sz; struct ib_srq *ret; + if (srq_init_attr->srq_type != IB_SRQT_BASIC) { + ret = ERR_PTR(-ENOSYS); + goto done; + } + if (srq_init_attr->attr.max_sge == 0 || srq_init_attr->attr.max_sge > ib_qib_max_srq_sges || srq_init_attr->attr.max_wr == 0 || diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 14d129d..78fbd56 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -515,8 +515,7 @@ static ssize_t show_nfreectxts(struct device *device, struct qib_devdata *dd = dd_from_dev(dev); /* Return the number of free user ports (contexts) available. */ - return scnprintf(buf, PAGE_SIZE, "%u\n", dd->cfgctxts - - dd->first_user_ctxt - (u32)qib_stats.sps_ctxts); + return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); } static ssize_t show_serial(struct device *device, diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 32ccf3c..847e7af 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -51,7 +51,7 @@ int qib_make_uc_req(struct qib_qp *qp) u32 hwords; u32 bth0; u32 len; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + u32 pmtu = qp->pmtu; int ret = 0; spin_lock_irqsave(&qp->s_lock, flags); @@ -243,13 +243,12 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, int has_grh, void *data, u32 tlen, struct qib_qp *qp) { struct qib_other_headers *ohdr; - unsigned long flags; u32 opcode; u32 hdrsize; u32 psn; u32 pad; struct ib_wc wc; - u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); + u32 pmtu = qp->pmtu; struct ib_reth *reth; int ret; @@ -263,14 +262,11 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, } opcode = be32_to_cpu(ohdr->bth[0]); - spin_lock_irqsave(&qp->s_lock, flags); if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode)) - goto sunlock; - spin_unlock_irqrestore(&qp->s_lock, flags); + return; psn = be32_to_cpu(ohdr->bth[2]); opcode >>= 24; - memset(&wc, 0, sizeof wc); /* Compare the PSN verses the expected PSN. */ if (unlikely(qib_cmp24(psn, qp->r_psn) != 0)) { @@ -370,7 +366,7 @@ send_first: } qp->r_rcv_len = 0; if (opcode == OP(SEND_ONLY)) - goto send_last; + goto no_immediate_data; else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) goto send_last_imm; /* FALLTHROUGH */ @@ -389,8 +385,11 @@ send_last_imm: wc.ex.imm_data = ohdr->u.imm_data; hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; - /* FALLTHROUGH */ + goto send_last; case OP(SEND_LAST): +no_immediate_data: + wc.ex.imm_data = 0; + wc.wc_flags = 0; send_last: /* Get the number of bytes the message was padded by. */ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; @@ -418,6 +417,12 @@ last_imm: wc.src_qp = qp->remote_qpn; wc.slid = qp->remote_ah_attr.dlid; wc.sl = qp->remote_ah_attr.sl; + /* zero fields that are N/A */ + wc.vendor_err = 0; + wc.pkey_index = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; + wc.csum_ok = 0; /* Signal completion event if the solicited bit is set. */ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & @@ -546,6 +551,4 @@ op_err: qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); return; -sunlock: - spin_unlock_irqrestore(&qp->s_lock, flags); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 9fab404..9627cb73 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -38,11 +38,12 @@ #include <linux/utsname.h> #include <linux/rculist.h> #include <linux/mm.h> +#include <linux/random.h> #include "qib.h" #include "qib_common.h" -static unsigned int ib_qib_qp_table_size = 251; +static unsigned int ib_qib_qp_table_size = 256; module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO); MODULE_PARM_DESC(qp_table_size, "QP table size"); @@ -659,17 +660,25 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) if (atomic_dec_return(&mcast->refcount) <= 1) wake_up(&mcast->wait); } else { - qp = qib_lookup_qpn(ibp, qp_num); - if (!qp) - goto drop; + if (rcd->lookaside_qp) { + if (rcd->lookaside_qpn != qp_num) { + if (atomic_dec_and_test( + &rcd->lookaside_qp->refcount)) + wake_up( + &rcd->lookaside_qp->wait); + rcd->lookaside_qp = NULL; + } + } + if (!rcd->lookaside_qp) { + qp = qib_lookup_qpn(ibp, qp_num); + if (!qp) + goto drop; + rcd->lookaside_qp = qp; + rcd->lookaside_qpn = qp_num; + } else + qp = rcd->lookaside_qp; ibp->n_unicast_rcv++; qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp); - /* - * Notify qib_destroy_qp() if it is waiting - * for us to finish. - */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); } return; @@ -1974,6 +1983,8 @@ static void init_ibport(struct qib_pportdata *ppd) ibp->z_excessive_buffer_overrun_errors = cntrs.excessive_buffer_overrun_errors; ibp->z_vl15_dropped = cntrs.vl15_dropped; + RCU_INIT_POINTER(ibp->qp0, NULL); + RCU_INIT_POINTER(ibp->qp1, NULL); } /** @@ -1990,12 +2001,15 @@ int qib_register_ib_device(struct qib_devdata *dd) int ret; dev->qp_table_size = ib_qib_qp_table_size; - dev->qp_table = kzalloc(dev->qp_table_size * sizeof *dev->qp_table, + get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd)); + dev->qp_table = kmalloc(dev->qp_table_size * sizeof *dev->qp_table, GFP_KERNEL); if (!dev->qp_table) { ret = -ENOMEM; goto err_qpt; } + for (i = 0; i < dev->qp_table_size; i++) + RCU_INIT_POINTER(dev->qp_table[i], NULL); for (i = 0; i < dd->num_pports; i++) init_ibport(ppd + i); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 95e5b47..0c19ef0 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -485,6 +485,7 @@ struct qib_qp { u8 alt_timeout; /* Alternate path timeout for this QP */ u8 port_num; enum ib_mtu path_mtu; + u32 pmtu; /* decoded from path_mtu */ u32 remote_qpn; u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ @@ -495,6 +496,7 @@ struct qib_qp { u32 s_last; /* last completed entry */ u32 s_ssn; /* SSN of tail entry */ u32 s_lsn; /* limit sequence number (credit) */ + unsigned long timeout_jiffies; /* computed from timeout */ struct qib_swqe *s_wq; /* send work queue */ struct qib_swqe *s_wqe; struct qib_rq r_rq; /* receive work queue */ @@ -723,7 +725,8 @@ struct qib_ibdev { dma_addr_t pio_hdrs_phys; /* list of QPs waiting for RNR timer */ spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */ - unsigned qp_table_size; /* size of the hash table */ + u32 qp_table_size; /* size of the hash table */ + u32 qp_rnd; /* random bytes for hash */ spinlock_t qpt_lock; u32 n_piowait; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 39913a0..fe48677 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -84,7 +84,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); for (i = 0; i < frags; ++i) - ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); + ib_dma_unmap_page(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); } static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) @@ -183,7 +183,7 @@ partial_error: ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); for (; i > 0; --i) - ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE); + ib_dma_unmap_page(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); return NULL; @@ -1496,6 +1496,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_srq_init_attr srq_init_attr = { + .srq_type = IB_SRQT_BASIC, .attr = { .max_wr = ipoib_recvq_size, .max_sge = max_sge diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 86eae22..0e2fe463 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -212,16 +212,15 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr) gid_buf, path.pathrec.dlid ? "yes" : "no"); if (path.pathrec.dlid) { - rate = ib_rate_to_mult(path.pathrec.rate) * 25; + rate = ib_rate_to_mbps(path.pathrec.rate); seq_printf(file, " DLID: 0x%04x\n" " SL: %12d\n" - " rate: %*d%s Gb/sec\n", + " rate: %8d.%d Gb/sec\n", be16_to_cpu(path.pathrec.dlid), path.pathrec.sl, - 10 - ((rate % 10) ? 2 : 0), - rate / 10, rate % 10 ? ".5" : ""); + rate / 1000, rate % 1000); } seq_putc(file, '\n'); |