diff options
Diffstat (limited to 'drivers/infiniband')
92 files changed, 2954 insertions, 1190 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 994decc..a193dfb 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -1,14 +1,14 @@ -menu "InfiniBand support" - depends on HAS_IOMEM - -config INFINIBAND - depends on PCI || BROKEN +menuconfig INFINIBAND tristate "InfiniBand support" + depends on PCI || BROKEN + depends on HAS_IOMEM ---help--- Core support for InfiniBand (IB). Make sure to also select any protocols you wish to use as well as drivers for your InfiniBand hardware. +if INFINIBAND + config INFINIBAND_USER_MAD tristate "InfiniBand userspace MAD support" depends on INFINIBAND @@ -20,7 +20,6 @@ config INFINIBAND_USER_MAD config INFINIBAND_USER_ACCESS tristate "InfiniBand userspace access (verbs and CM)" - depends on INFINIBAND ---help--- Userspace InfiniBand access support. This enables the kernel side of userspace verbs and the userspace @@ -37,7 +36,7 @@ config INFINIBAND_USER_MEM config INFINIBAND_ADDR_TRANS bool - depends on INFINIBAND && INET + depends on INET default y source "drivers/infiniband/hw/mthca/Kconfig" @@ -54,4 +53,4 @@ source "drivers/infiniband/ulp/srp/Kconfig" source "drivers/infiniband/ulp/iser/Kconfig" -endmenu +endif # INFINIBAND diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index ecd1a30..db2633e 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -3,7 +3,7 @@ * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. - * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. + * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -34,7 +34,6 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $ */ #include <linux/slab.h> @@ -42,6 +41,7 @@ #include "agent.h" #include "smi.h" +#include "mad_priv.h" #define SPFX "ib_agent: " @@ -87,8 +87,13 @@ int agent_send_response(struct ib_mad *mad, struct ib_grh *grh, struct ib_mad_send_buf *send_buf; struct ib_ah *ah; int ret; + struct ib_mad_send_wr_private *mad_send_wr; + + if (device->node_type == RDMA_NODE_IB_SWITCH) + port_priv = ib_get_agent_port(device, 0); + else + port_priv = ib_get_agent_port(device, port_num); - port_priv = ib_get_agent_port(device, port_num); if (!port_priv) { printk(KERN_ERR SPFX "Unable to find port agent\n"); return -ENODEV; @@ -113,6 +118,14 @@ int agent_send_response(struct ib_mad *mad, struct ib_grh *grh, memcpy(send_buf->mad, mad, sizeof *mad); send_buf->ah = ah; + + if (device->node_type == RDMA_NODE_IB_SWITCH) { + mad_send_wr = container_of(send_buf, + struct ib_mad_send_wr_private, + send_buf); + mad_send_wr->send_wr.wr.ud.port_num = port_num; + } + if ((ret = ib_post_send_mad(send_buf, NULL))) { printk(KERN_ERR SPFX "ib_post_send_mad error:%d\n", ret); goto err2; diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 40c004a..9820c67 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -87,6 +87,7 @@ struct cm_port { struct cm_device { struct list_head list; struct ib_device *device; + u8 ack_delay; struct cm_port port[0]; }; @@ -95,7 +96,7 @@ struct cm_av { union ib_gid dgid; struct ib_ah_attr ah_attr; u16 pkey_index; - u8 packet_life_time; + u8 timeout; }; struct cm_work { @@ -154,6 +155,7 @@ struct cm_id_private { u8 retry_count; u8 rnr_retry_count; u8 service_timeout; + u8 target_ack_delay; struct list_head work_list; atomic_t work_count; @@ -293,7 +295,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) av->port = port; ib_init_ah_from_path(cm_dev->device, port->port_num, path, &av->ah_attr); - av->packet_life_time = path->packet_life_time; + av->timeout = path->packet_life_time + 1; return 0; } @@ -318,12 +320,10 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv) static void cm_free_id(__be32 local_id) { - unsigned long flags; - - spin_lock_irqsave(&cm.lock, flags); + spin_lock_irq(&cm.lock); idr_remove(&cm.local_id_table, (__force int) (local_id ^ cm.random_id_operand)); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); } static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) @@ -345,11 +345,10 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; - unsigned long flags; - spin_lock_irqsave(&cm.lock, flags); + spin_lock_irq(&cm.lock); cm_id_priv = cm_get_id(local_id, remote_id); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); return cm_id_priv; } @@ -646,6 +645,25 @@ static inline int cm_convert_to_ms(int iba_time) return 1 << max(iba_time - 8, 0); } +/* + * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time + * Because of how ack_timeout is stored, adding one doubles the timeout. + * To avoid large timeouts, select the max(ack_delay, life_time + 1), and + * increment it (round up) only if the other is within 50%. + */ +static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time) +{ + int ack_timeout = packet_life_time + 1; + + if (ack_timeout >= ca_ack_delay) + ack_timeout += (ca_ack_delay >= (ack_timeout - 1)); + else + ack_timeout = ca_ack_delay + + (ack_timeout >= (ca_ack_delay - 1)); + + return min(31, ack_timeout); +} + static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) { if (timewait_info->inserted_remote_id) { @@ -689,7 +707,7 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) * timewait before notifying the user that we've exited timewait. */ cm_id_priv->id.state = IB_CM_TIMEWAIT; - wait_time = cm_convert_to_ms(cm_id_priv->av.packet_life_time + 1); + wait_time = cm_convert_to_ms(cm_id_priv->av.timeout); queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, msecs_to_jiffies(wait_time)); cm_id_priv->timewait_info = NULL; @@ -713,31 +731,30 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) { struct cm_id_private *cm_id_priv; struct cm_work *work; - unsigned long flags; cm_id_priv = container_of(cm_id, struct cm_id_private, id); retest: - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); switch (cm_id->state) { case IB_CM_LISTEN: cm_id->state = IB_CM_IDLE; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - spin_lock_irqsave(&cm.lock, flags); + spin_unlock_irq(&cm_id_priv->lock); + spin_lock_irq(&cm.lock); rb_erase(&cm_id_priv->service_node, &cm.listen_service_table); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); break; case IB_CM_SIDR_REQ_SENT: cm_id->state = IB_CM_IDLE; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); break; case IB_CM_SIDR_REQ_RCVD: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); break; case IB_CM_REQ_SENT: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, &cm_id_priv->id.device->node_guid, sizeof cm_id_priv->id.device->node_guid, @@ -747,9 +764,9 @@ retest: if (err == -ENOMEM) { /* Do not reject to allow future retries. */ cm_reset_to_idle(cm_id_priv); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); } else { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); } @@ -762,25 +779,25 @@ retest: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); break; case IB_CM_ESTABLISHED: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_dreq(cm_id, NULL, 0); goto retest; case IB_CM_DREQ_SENT: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_enter_timewait(cm_id_priv); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); break; case IB_CM_DREQ_RCVD: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_drep(cm_id, NULL, 0); break; default: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); break; } @@ -912,7 +929,8 @@ static void cm_format_req(struct cm_req_msg *req_msg, cm_req_set_primary_sl(req_msg, param->primary_path->sl); cm_req_set_primary_subnet_local(req_msg, 1); /* local only... */ cm_req_set_primary_local_ack_timeout(req_msg, - min(31, param->primary_path->packet_life_time + 1)); + cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, + param->primary_path->packet_life_time)); if (param->alternate_path) { req_msg->alt_local_lid = param->alternate_path->slid; @@ -927,7 +945,8 @@ static void cm_format_req(struct cm_req_msg *req_msg, cm_req_set_alt_sl(req_msg, param->alternate_path->sl); cm_req_set_alt_subnet_local(req_msg, 1); /* local only... */ cm_req_set_alt_local_ack_timeout(req_msg, - min(31, param->alternate_path->packet_life_time + 1)); + cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, + param->alternate_path->packet_life_time)); } if (param->private_data && param->private_data_len) @@ -1169,7 +1188,6 @@ static void cm_format_req_event(struct cm_work *work, static void cm_process_work(struct cm_id_private *cm_id_priv, struct cm_work *work) { - unsigned long flags; int ret; /* We will typically only have the current event to report. */ @@ -1177,9 +1195,9 @@ static void cm_process_work(struct cm_id_private *cm_id_priv, cm_free_work(work); while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) { - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); work = cm_dequeue_work(cm_id_priv); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); BUG_ON(!work); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event); @@ -1250,7 +1268,6 @@ static void cm_dup_req_handler(struct cm_work *work, struct cm_id_private *cm_id_priv) { struct ib_mad_send_buf *msg = NULL; - unsigned long flags; int ret; /* Quick state check to discard duplicate REQs. */ @@ -1261,7 +1278,7 @@ static void cm_dup_req_handler(struct cm_work *work, if (ret) return; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_MRA_REQ_SENT: cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, @@ -1276,14 +1293,14 @@ static void cm_dup_req_handler(struct cm_work *work, default: goto unlock; } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ret = ib_post_send_mad(msg, NULL); if (ret) goto free; return; -unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +unlock: spin_unlock_irq(&cm_id_priv->lock); free: cm_free_msg(msg); } @@ -1293,17 +1310,16 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv; struct cm_timewait_info *timewait_info; struct cm_req_msg *req_msg; - unsigned long flags; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; /* Check for possible duplicate REQ. */ - spin_lock_irqsave(&cm.lock, flags); + spin_lock_irq(&cm.lock); timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info); if (timewait_info) { cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, timewait_info->work.remote_id); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); if (cur_cm_id_priv) { cm_dup_req_handler(work, cur_cm_id_priv); cm_deref_id(cur_cm_id_priv); @@ -1315,7 +1331,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); if (timewait_info) { cm_cleanup_timewait(cm_id_priv->timewait_info); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, NULL, 0); @@ -1328,7 +1344,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, req_msg->private_data); if (!listen_cm_id_priv) { cm_cleanup_timewait(cm_id_priv->timewait_info); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, NULL, 0); @@ -1338,7 +1354,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, atomic_inc(&cm_id_priv->refcount); cm_id_priv->id.state = IB_CM_REQ_RCVD; atomic_inc(&cm_id_priv->work_count); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); out: return listen_cm_id_priv; } @@ -1440,7 +1456,8 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg, cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); rep_msg->resp_resources = param->responder_resources; rep_msg->initiator_depth = param->initiator_depth; - cm_rep_set_target_ack_delay(rep_msg, param->target_ack_delay); + cm_rep_set_target_ack_delay(rep_msg, + cm_id_priv->av.port->cm_dev->ack_delay); cm_rep_set_failover(rep_msg, param->failover_accepted); cm_rep_set_flow_ctrl(rep_msg, param->flow_control); cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); @@ -1591,7 +1608,6 @@ static void cm_dup_rep_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; struct cm_rep_msg *rep_msg; struct ib_mad_send_buf *msg = NULL; - unsigned long flags; int ret; rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad; @@ -1604,7 +1620,7 @@ static void cm_dup_rep_handler(struct cm_work *work) if (ret) goto deref; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state == IB_CM_ESTABLISHED) cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, cm_id_priv->private_data, @@ -1616,14 +1632,14 @@ static void cm_dup_rep_handler(struct cm_work *work) cm_id_priv->private_data_len); else goto unlock; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ret = ib_post_send_mad(msg, NULL); if (ret) goto free; goto deref; -unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +unlock: spin_unlock_irq(&cm_id_priv->lock); free: cm_free_msg(msg); deref: cm_deref_id(cm_id_priv); } @@ -1632,7 +1648,6 @@ static int cm_rep_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rep_msg *rep_msg; - unsigned long flags; int ret; rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; @@ -1644,13 +1659,13 @@ static int cm_rep_handler(struct cm_work *work) cm_format_rep_event(work); - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: break; default: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; goto error; } @@ -1663,7 +1678,7 @@ static int cm_rep_handler(struct cm_work *work) /* Check for duplicate REP. */ if (cm_insert_remote_id(cm_id_priv->timewait_info)) { spin_unlock(&cm.lock); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; goto error; } @@ -1673,7 +1688,7 @@ static int cm_rep_handler(struct cm_work *work) &cm.remote_id_table); cm_id_priv->timewait_info->inserted_remote_id = 0; spin_unlock(&cm.lock); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, NULL, 0); @@ -1689,6 +1704,13 @@ static int cm_rep_handler(struct cm_work *work) cm_id_priv->responder_resources = rep_msg->initiator_depth; cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); + cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); + cm_id_priv->av.timeout = + cm_ack_timeout(cm_id_priv->target_ack_delay, + cm_id_priv->av.timeout - 1); + cm_id_priv->alt_av.timeout = + cm_ack_timeout(cm_id_priv->target_ack_delay, + cm_id_priv->alt_av.timeout - 1); /* todo: handle peer_to_peer */ @@ -1696,7 +1718,7 @@ static int cm_rep_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -1712,7 +1734,6 @@ error: static int cm_establish_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; - unsigned long flags; int ret; /* See comment in cm_establish about lookup. */ @@ -1720,9 +1741,9 @@ static int cm_establish_handler(struct cm_work *work) if (!cm_id_priv) return -EINVAL; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); goto out; } @@ -1730,7 +1751,7 @@ static int cm_establish_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -1746,7 +1767,6 @@ static int cm_rtu_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rtu_msg *rtu_msg; - unsigned long flags; int ret; rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad; @@ -1757,10 +1777,10 @@ static int cm_rtu_handler(struct cm_work *work) work->cm_event.private_data = &rtu_msg->private_data; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_REP_SENT && cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.state = IB_CM_ESTABLISHED; @@ -1769,7 +1789,7 @@ static int cm_rtu_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -1932,7 +1952,6 @@ static int cm_dreq_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; struct cm_dreq_msg *dreq_msg; struct ib_mad_send_buf *msg = NULL; - unsigned long flags; int ret; dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad; @@ -1945,7 +1964,7 @@ static int cm_dreq_handler(struct cm_work *work) work->cm_event.private_data = &dreq_msg->private_data; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg)) goto unlock; @@ -1964,7 +1983,7 @@ static int cm_dreq_handler(struct cm_work *work) cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, cm_id_priv->private_data, cm_id_priv->private_data_len); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); @@ -1977,7 +1996,7 @@ static int cm_dreq_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -1985,7 +2004,7 @@ static int cm_dreq_handler(struct cm_work *work) cm_deref_id(cm_id_priv); return 0; -unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +unlock: spin_unlock_irq(&cm_id_priv->lock); deref: cm_deref_id(cm_id_priv); return -EINVAL; } @@ -1994,7 +2013,6 @@ static int cm_drep_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_drep_msg *drep_msg; - unsigned long flags; int ret; drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad; @@ -2005,10 +2023,10 @@ static int cm_drep_handler(struct cm_work *work) work->cm_event.private_data = &drep_msg->private_data; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_DREQ_SENT && cm_id_priv->id.state != IB_CM_DREQ_RCVD) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_enter_timewait(cm_id_priv); @@ -2017,7 +2035,7 @@ static int cm_drep_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -2107,17 +2125,16 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) { struct cm_timewait_info *timewait_info; struct cm_id_private *cm_id_priv; - unsigned long flags; __be32 remote_id; remote_id = rej_msg->local_comm_id; if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) { - spin_lock_irqsave(&cm.lock, flags); + spin_lock_irq(&cm.lock); timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari), remote_id); if (!timewait_info) { - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); return NULL; } cm_id_priv = idr_find(&cm.local_id_table, (__force int) @@ -2129,7 +2146,7 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) else cm_id_priv = NULL; } - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ) cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0); else @@ -2142,7 +2159,6 @@ static int cm_rej_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rej_msg *rej_msg; - unsigned long flags; int ret; rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; @@ -2152,7 +2168,7 @@ static int cm_rej_handler(struct cm_work *work) cm_format_rej_event(work); - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: @@ -2176,7 +2192,7 @@ static int cm_rej_handler(struct cm_work *work) cm_enter_timewait(cm_id_priv); break; default: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; goto out; } @@ -2184,7 +2200,7 @@ static int cm_rej_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -2295,7 +2311,6 @@ static int cm_mra_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_mra_msg *mra_msg; - unsigned long flags; int timeout, ret; mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad; @@ -2307,9 +2322,9 @@ static int cm_mra_handler(struct cm_work *work) work->cm_event.param.mra_rcvd.service_timeout = cm_mra_get_service_timeout(mra_msg); timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + - cm_convert_to_ms(cm_id_priv->av.packet_life_time); + cm_convert_to_ms(cm_id_priv->av.timeout); - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || @@ -2342,7 +2357,7 @@ static int cm_mra_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -2350,7 +2365,7 @@ static int cm_mra_handler(struct cm_work *work) cm_deref_id(cm_id_priv); return 0; out: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); cm_deref_id(cm_id_priv); return -EINVAL; } @@ -2379,7 +2394,8 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg, cm_lap_set_sl(lap_msg, alternate_path->sl); cm_lap_set_subnet_local(lap_msg, 1); /* local only... */ cm_lap_set_local_ack_timeout(lap_msg, - min(31, alternate_path->packet_life_time + 1)); + cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, + alternate_path->packet_life_time)); if (private_data && private_data_len) memcpy(lap_msg->private_data, private_data, private_data_len); @@ -2410,6 +2426,9 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av); if (ret) goto out; + cm_id_priv->alt_av.timeout = + cm_ack_timeout(cm_id_priv->target_ack_delay, + cm_id_priv->alt_av.timeout - 1); ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) @@ -2465,7 +2484,6 @@ static int cm_lap_handler(struct cm_work *work) struct cm_lap_msg *lap_msg; struct ib_cm_lap_event_param *param; struct ib_mad_send_buf *msg = NULL; - unsigned long flags; int ret; /* todo: verify LAP request and send reject APR if invalid. */ @@ -2480,7 +2498,7 @@ static int cm_lap_handler(struct cm_work *work) cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg); work->cm_event.private_data = &lap_msg->private_data; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED) goto unlock; @@ -2497,7 +2515,7 @@ static int cm_lap_handler(struct cm_work *work) cm_id_priv->service_timeout, cm_id_priv->private_data, cm_id_priv->private_data_len); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); @@ -2515,7 +2533,7 @@ static int cm_lap_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -2523,7 +2541,7 @@ static int cm_lap_handler(struct cm_work *work) cm_deref_id(cm_id_priv); return 0; -unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +unlock: spin_unlock_irq(&cm_id_priv->lock); deref: cm_deref_id(cm_id_priv); return -EINVAL; } @@ -2598,7 +2616,6 @@ static int cm_apr_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_apr_msg *apr_msg; - unsigned long flags; int ret; apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad; @@ -2612,11 +2629,11 @@ static int cm_apr_handler(struct cm_work *work) work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length; work->cm_event.private_data = &apr_msg->private_data; - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED || (cm_id_priv->id.lap_state != IB_CM_LAP_SENT && cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; @@ -2626,7 +2643,7 @@ static int cm_apr_handler(struct cm_work *work) ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); @@ -2761,7 +2778,6 @@ static int cm_sidr_req_handler(struct cm_work *work) struct cm_id_private *cm_id_priv, *cur_cm_id_priv; struct cm_sidr_req_msg *sidr_req_msg; struct ib_wc *wc; - unsigned long flags; cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL); if (IS_ERR(cm_id)) @@ -2778,27 +2794,26 @@ static int cm_sidr_req_handler(struct cm_work *work) work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); cm_id_priv->id.remote_id = sidr_req_msg->request_id; - cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; cm_id_priv->tid = sidr_req_msg->hdr.tid; atomic_inc(&cm_id_priv->work_count); - spin_lock_irqsave(&cm.lock, flags); + spin_lock_irq(&cm.lock); cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); if (cur_cm_id_priv) { - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); goto out; /* Duplicate message. */ } + cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; cur_cm_id_priv = cm_find_listen(cm_id->device, sidr_req_msg->service_id, sidr_req_msg->private_data); if (!cur_cm_id_priv) { - rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); - spin_unlock_irqrestore(&cm.lock, flags); - /* todo: reply with no match */ + spin_unlock_irq(&cm.lock); + cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED); goto out; /* No match. */ } atomic_inc(&cur_cm_id_priv->refcount); - spin_unlock_irqrestore(&cm.lock, flags); + spin_unlock_irq(&cm.lock); cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; cm_id_priv->id.context = cur_cm_id_priv->id.context; @@ -2899,7 +2914,6 @@ static int cm_sidr_rep_handler(struct cm_work *work) { struct cm_sidr_rep_msg *sidr_rep_msg; struct cm_id_private *cm_id_priv; - unsigned long flags; sidr_rep_msg = (struct cm_sidr_rep_msg *) work->mad_recv_wc->recv_buf.mad; @@ -2907,14 +2921,14 @@ static int cm_sidr_rep_handler(struct cm_work *work) if (!cm_id_priv) return -EINVAL; /* Unmatched reply. */ - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.state = IB_CM_IDLE; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); cm_format_sidr_rep_event(work); cm_process_work(cm_id_priv, work); @@ -2930,14 +2944,13 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg, struct cm_id_private *cm_id_priv; struct ib_cm_event cm_event; enum ib_cm_state state; - unsigned long flags; int ret; memset(&cm_event, 0, sizeof cm_event); cm_id_priv = msg->context[0]; /* Discard old sends or ones without a response. */ - spin_lock_irqsave(&cm_id_priv->lock, flags); + spin_lock_irq(&cm_id_priv->lock); state = (enum ib_cm_state) (unsigned long) msg->context[1]; if (msg != cm_id_priv->msg || state != cm_id_priv->id.state) goto discard; @@ -2964,7 +2977,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg, default: goto discard; } - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); cm_event.param.send_status = wc_status; /* No other events can occur on the cm_id at this point. */ @@ -2974,7 +2987,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg, ib_destroy_cm_id(&cm_id_priv->id); return; discard: - spin_unlock_irqrestore(&cm_id_priv->lock, flags); + spin_unlock_irq(&cm_id_priv->lock); cm_free_msg(msg); } @@ -3269,8 +3282,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask |= IB_QP_ALT_PATH; qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; - qp_attr->alt_timeout = - cm_id_priv->alt_av.packet_life_time + 1; + qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; } ret = 0; @@ -3308,8 +3320,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC; - qp_attr->timeout = - cm_id_priv->av.packet_life_time + 1; + qp_attr->timeout = cm_id_priv->av.timeout; qp_attr->retry_cnt = cm_id_priv->retry_count; qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; qp_attr->max_rd_atomic = @@ -3323,8 +3334,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE; qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; - qp_attr->alt_timeout = - cm_id_priv->alt_av.packet_life_time + 1; + qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; qp_attr->path_mig_state = IB_MIG_REARM; } @@ -3364,6 +3374,16 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, } EXPORT_SYMBOL(ib_cm_init_qp_attr); +void cm_get_ack_delay(struct cm_device *cm_dev) +{ + struct ib_device_attr attr; + + if (ib_query_device(cm_dev->device, &attr)) + cm_dev->ack_delay = 0; /* acks will rely on packet life time */ + else + cm_dev->ack_delay = attr.local_ca_ack_delay; +} + static void cm_add_one(struct ib_device *device) { struct cm_device *cm_dev; @@ -3388,6 +3408,7 @@ static void cm_add_one(struct ib_device *device) return; cm_dev->device = device; + cm_get_ack_delay(cm_dev); set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); for (i = 1; i <= device->phys_port_cnt; i++) { diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 4d3aee9..aec9c7a 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -35,6 +35,7 @@ #define CM_MSGS_H #include <rdma/ib_mad.h> +#include <rdma/ib_cm.h> /* * Parameters to routines below should be in network-byte order, and values diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 2eb52b7..23af7a0 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2326,7 +2326,6 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, rep.private_data_len = conn_param->private_data_len; rep.responder_resources = conn_param->responder_resources; rep.initiator_depth = conn_param->initiator_depth; - rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT; rep.failover_accepted = 0; rep.flow_control = conn_param->flow_control; rep.rnr_retry_count = conn_param->rnr_retry_count; @@ -2773,8 +2772,8 @@ static int cma_init(void) int ret; get_random_bytes(&next_port, sizeof next_port); - next_port = (next_port % (sysctl_local_port_range[1] - - sysctl_local_port_range[0])) + + next_port = ((unsigned int) next_port % + (sysctl_local_port_range[1] - sysctl_local_port_range[0])) + sysctl_local_port_range[0]; cma_wq = create_singlethread_workqueue("rdma_cm"); if (!cma_wq) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 85ccf13..6b8faca 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -675,10 +675,16 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_port_private *port_priv; struct ib_mad_agent_private *recv_mad_agent = NULL; struct ib_device *device = mad_agent_priv->agent.device; - u8 port_num = mad_agent_priv->agent.port_num; + u8 port_num; struct ib_wc mad_wc; struct ib_send_wr *send_wr = &mad_send_wr->send_wr; + if (device->node_type == RDMA_NODE_IB_SWITCH && + smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + port_num = send_wr->wr.ud.port_num; + else + port_num = mad_agent_priv->agent.port_num; + /* * Directed route handling starts if the initial LID routed part of * a request or the ending LID routed part of a response is empty. @@ -1839,6 +1845,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_mad_private *recv, *response; struct ib_mad_list_head *mad_list; struct ib_mad_agent_private *mad_agent; + int port_num; response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); if (!response) @@ -1872,25 +1879,50 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num)) goto out; + if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) + port_num = wc->port_num; + else + port_num = port_priv->port_num; + if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + enum smi_forward_action retsmi; + if (smi_handle_dr_smp_recv(&recv->mad.smp, port_priv->device->node_type, - port_priv->port_num, + port_num, port_priv->device->phys_port_cnt) == IB_SMI_DISCARD) goto out; - if (smi_check_forward_dr_smp(&recv->mad.smp) == IB_SMI_LOCAL) + retsmi = smi_check_forward_dr_smp(&recv->mad.smp); + if (retsmi == IB_SMI_LOCAL) goto local; - if (smi_handle_dr_smp_send(&recv->mad.smp, - port_priv->device->node_type, - port_priv->port_num) == IB_SMI_DISCARD) - goto out; + if (retsmi == IB_SMI_SEND) { /* don't forward */ + if (smi_handle_dr_smp_send(&recv->mad.smp, + port_priv->device->node_type, + port_num) == IB_SMI_DISCARD) + goto out; + + if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD) + goto out; + } else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) { + /* forward case for switches */ + memcpy(response, recv, sizeof(*response)); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.mad = &response->mad.mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + + if (!agent_send_response(&response->mad.mad, + &response->grh, wc, + port_priv->device, + smi_get_fwd_port(&recv->mad.smp), + qp_info->qp->qp_num)) + response = NULL; - if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD) goto out; + } } local: @@ -1919,7 +1951,7 @@ local: agent_send_response(&response->mad.mad, &recv->grh, wc, port_priv->device, - port_priv->port_num, + port_num, qp_info->qp->qp_num); goto out; } diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 1e13ab4..15b4c4d 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Intel Corporation. All rights reserved. + * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/core/sa.h b/drivers/infiniband/core/sa.h index 24c93fd..b1d4bbf 100644 --- a/drivers/infiniband/core/sa.h +++ b/drivers/infiniband/core/sa.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 6469406..20ab6b3 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two @@ -56,6 +56,7 @@ MODULE_LICENSE("Dual BSD/GPL"); struct ib_sa_sm_ah { struct ib_ah *ah; struct kref ref; + u16 pkey_index; u8 src_path_mask; }; @@ -382,6 +383,13 @@ static void update_sm_ah(struct work_struct *work) kref_init(&new_ah->ref); new_ah->src_path_mask = (1 << port_attr.lmc) - 1; + new_ah->pkey_index = 0; + if (ib_find_pkey(port->agent->device, port->port_num, + IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index) && + ib_find_pkey(port->agent->device, port->port_num, + IB_DEFAULT_PKEY_PARTIAL, &new_ah->pkey_index)) + printk(KERN_ERR "Couldn't find index for default PKey\n"); + memset(&ah_attr, 0, sizeof ah_attr); ah_attr.dlid = port_attr.sm_lid; ah_attr.sl = port_attr.sm_sl; @@ -512,6 +520,35 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, } EXPORT_SYMBOL(ib_init_ah_from_path); +static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask) +{ + unsigned long flags; + + spin_lock_irqsave(&query->port->ah_lock, flags); + kref_get(&query->port->sm_ah->ref); + query->sm_ah = query->port->sm_ah; + spin_unlock_irqrestore(&query->port->ah_lock, flags); + + query->mad_buf = ib_create_send_mad(query->port->agent, 1, + query->sm_ah->pkey_index, + 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, + gfp_mask); + if (!query->mad_buf) { + kref_put(&query->sm_ah->ref, free_sm_ah); + return -ENOMEM; + } + + query->mad_buf->ah = query->sm_ah->ah; + + return 0; +} + +static void free_mad(struct ib_sa_query *query) +{ + ib_free_send_mad(query->mad_buf); + kref_put(&query->sm_ah->ref, free_sm_ah); +} + static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) { unsigned long flags; @@ -548,20 +585,11 @@ retry: query->mad_buf->context[0] = query; query->id = id; - spin_lock_irqsave(&query->port->ah_lock, flags); - kref_get(&query->port->sm_ah->ref); - query->sm_ah = query->port->sm_ah; - spin_unlock_irqrestore(&query->port->ah_lock, flags); - - query->mad_buf->ah = query->sm_ah->ah; - ret = ib_post_send_mad(query->mad_buf, NULL); if (ret) { spin_lock_irqsave(&idr_lock, flags); idr_remove(&query_idr, id); spin_unlock_irqrestore(&idr_lock, flags); - - kref_put(&query->sm_ah->ref, free_sm_ah); } /* @@ -647,13 +675,10 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, if (!query) return -ENOMEM; - query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, - 0, IB_MGMT_SA_HDR, - IB_MGMT_SA_DATA, gfp_mask); - if (!query->sa_query.mad_buf) { - ret = -ENOMEM; + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) goto err1; - } ib_sa_client_get(client); query->sa_query.client = client; @@ -665,7 +690,6 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; query->sa_query.release = ib_sa_path_rec_release; - query->sa_query.port = port; mad->mad_hdr.method = IB_MGMT_METHOD_GET; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); mad->sa_hdr.comp_mask = comp_mask; @@ -683,7 +707,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); - ib_free_send_mad(query->sa_query.mad_buf); + free_mad(&query->sa_query); err1: kfree(query); @@ -773,13 +797,10 @@ int ib_sa_service_rec_query(struct ib_sa_client *client, if (!query) return -ENOMEM; - query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, - 0, IB_MGMT_SA_HDR, - IB_MGMT_SA_DATA, gfp_mask); - if (!query->sa_query.mad_buf) { - ret = -ENOMEM; + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) goto err1; - } ib_sa_client_get(client); query->sa_query.client = client; @@ -791,7 +812,6 @@ int ib_sa_service_rec_query(struct ib_sa_client *client, query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; query->sa_query.release = ib_sa_service_rec_release; - query->sa_query.port = port; mad->mad_hdr.method = method; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC); mad->sa_hdr.comp_mask = comp_mask; @@ -810,7 +830,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client, err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); - ib_free_send_mad(query->sa_query.mad_buf); + free_mad(&query->sa_query); err1: kfree(query); @@ -869,13 +889,10 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client, if (!query) return -ENOMEM; - query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, - 0, IB_MGMT_SA_HDR, - IB_MGMT_SA_DATA, gfp_mask); - if (!query->sa_query.mad_buf) { - ret = -ENOMEM; + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) goto err1; - } ib_sa_client_get(client); query->sa_query.client = client; @@ -887,7 +904,6 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client, query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; query->sa_query.release = ib_sa_mcmember_rec_release; - query->sa_query.port = port; mad->mad_hdr.method = method; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); mad->sa_hdr.comp_mask = comp_mask; @@ -906,7 +922,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client, err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); - ib_free_send_mad(query->sa_query.mad_buf); + free_mad(&query->sa_query); err1: kfree(query); @@ -939,8 +955,7 @@ static void send_handler(struct ib_mad_agent *agent, idr_remove(&query_idr, query->id); spin_unlock_irqrestore(&idr_lock, flags); - ib_free_send_mad(mad_send_wc->send_buf); - kref_put(&query->sm_ah->ref, free_sm_ah); + free_mad(query); ib_sa_client_put(query->client); query->release(query); } diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c index 2bca753..8723675 100644 --- a/drivers/infiniband/core/smi.c +++ b/drivers/infiniband/core/smi.c @@ -192,7 +192,7 @@ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, } /* smp->hop_ptr updated when sending */ return (node_type == RDMA_NODE_IB_SWITCH ? - IB_SMI_HANDLE: IB_SMI_DISCARD); + IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:4 -- hop_ptr = 0 -> give to SM */ @@ -211,7 +211,7 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp) if (!ib_get_smp_direction(smp)) { /* C14-9:2 -- intermediate hop */ if (hop_ptr && hop_ptr < hop_cnt) - return IB_SMI_SEND; + return IB_SMI_FORWARD; /* C14-9:3 -- at the end of the DR segment of path */ if (hop_ptr == hop_cnt) @@ -224,7 +224,7 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp) } else { /* C14-13:2 -- intermediate hop */ if (2 <= hop_ptr && hop_ptr <= hop_cnt) - return IB_SMI_SEND; + return IB_SMI_FORWARD; /* C14-13:3 -- at the end of the DR segment of path */ if (hop_ptr == 1) @@ -233,3 +233,13 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp) } return IB_SMI_LOCAL; } + +/* + * Return the forwarding port number from initial_path for outgoing SMP and + * from return_path for returning SMP + */ +int smi_get_fwd_port(struct ib_smp *smp) +{ + return (!ib_get_smp_direction(smp) ? smp->initial_path[smp->hop_ptr+1] : + smp->return_path[smp->hop_ptr-1]); +} diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h index 9a4b349..1cfc298 100644 --- a/drivers/infiniband/core/smi.h +++ b/drivers/infiniband/core/smi.h @@ -48,10 +48,12 @@ enum smi_action { enum smi_forward_action { IB_SMI_LOCAL, /* SMP should be completed up the stack */ IB_SMI_SEND, /* received DR SMP should be forwarded to the send queue */ + IB_SMI_FORWARD /* SMP should be forwarded (for switches only) */ }; enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, int port_num, int phys_port_cnt); +int smi_get_fwd_port(struct ib_smp *smp); extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp); extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, u8 node_type, int port_num); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 08c299e..70b77ae 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -311,7 +311,7 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, return sprintf(buf, "N/A (no PMA)\n"); in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); - out_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) { ret = -ENOMEM; goto out; @@ -479,7 +479,6 @@ alloc_group_attrs(ssize_t (*show)(struct ib_port *, element->attr.attr.name = element->name; element->attr.attr.mode = S_IRUGO; - element->attr.attr.owner = THIS_MODULE; element->attr.show = show; element->index = i; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 2586a3e..424983f 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -823,7 +823,6 @@ static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file, param.private_data_len = cmd.len; param.responder_resources = cmd.responder_resources; param.initiator_depth = cmd.initiator_depth; - param.target_ack_delay = cmd.target_ack_delay; param.failover_accepted = cmd.failover_accepted; param.flow_control = cmd.flow_control; param.rnr_retry_count = cmd.rnr_retry_count; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index b4aec51..26d0470 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -121,6 +121,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, cur_base = addr & PAGE_MASK; + ret = 0; while (npages) { ret = get_user_pages(current, current->mm, cur_base, min_t(int, npages, @@ -225,13 +226,15 @@ void ib_umem_release(struct ib_umem *umem) * up here and not be able to take the mmap_sem. In that case * we defer the vm_locked accounting to the system workqueue. */ - if (context->closing && !down_write_trylock(&mm->mmap_sem)) { - INIT_WORK(&umem->work, ib_umem_account); - umem->mm = mm; - umem->diff = diff; - - schedule_work(&umem->work); - return; + if (context->closing) { + if (!down_write_trylock(&mm->mmap_sem)) { + INIT_WORK(&umem->work, ib_umem_account); + umem->mm = mm; + umem->diff = diff; + + schedule_work(&umem->work); + return; + } } else down_write(&mm->mmap_sem); diff --git a/drivers/infiniband/hw/amso1100/Kconfig b/drivers/infiniband/hw/amso1100/Kconfig index 809cb14..e6ce5f2 100644 --- a/drivers/infiniband/hw/amso1100/Kconfig +++ b/drivers/infiniband/hw/amso1100/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_AMSO1100 tristate "Ammasso 1100 HCA support" - depends on PCI && INET && INFINIBAND + depends on PCI && INET ---help--- This is a low-level driver for the Ammasso 1100 host channel adapter (HCA). diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c index 58bc272..0aecea6 100644 --- a/drivers/infiniband/hw/amso1100/c2.c +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -672,7 +672,7 @@ static int c2_up(struct net_device *netdev) * rdma interface. */ in_dev = in_dev_get(netdev); - in_dev->cnf.arp_ignore = 1; + IN_DEV_CONF_SET(in_dev, ARP_IGNORE, 1); in_dev_put(in_dev); return 0; diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig index 77977f5..2acec3f 100644 --- a/drivers/infiniband/hw/cxgb3/Kconfig +++ b/drivers/infiniband/hw/cxgb3/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_CXGB3 tristate "Chelsio RDMA Driver" - depends on CHELSIO_T3 && INFINIBAND && INET + depends on CHELSIO_T3 && INET select GENERIC_ALLOCATOR ---help--- This is an iWARP/RDMA driver for the Chelsio T3 1GbE and diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 76049af..1518b41 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -144,7 +144,7 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) } wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 1, qpid, 7); + build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7); wqe->flags = cpu_to_be32(MODQP_WRITE_EC); sge_cmd = qpid << 8 | 3; wqe->sge_cmd = cpu_to_be64(sge_cmd); @@ -548,7 +548,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32; wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 1, + build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0, T3_CTL_QP_TID, 7); wqe->flags = cpu_to_be32(MODQP_WRITE_EC); sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3; @@ -833,7 +833,7 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) wqe->ird = cpu_to_be32(attr->ird); wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr); wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size); - wqe->rsvd = 0; + wqe->irs = cpu_to_be32(attr->irs); skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */ return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb)); } diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index ff7290e..c84d4ac 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -294,6 +294,7 @@ struct t3_rdma_init_attr { u64 qp_dma_addr; u32 qp_dma_size; u32 flags; + u32 irs; }; struct t3_rdma_init_wr { @@ -314,7 +315,7 @@ struct t3_rdma_init_wr { __be32 ird; __be64 qp_dma_addr; /* 7 */ __be32 qp_dma_size; /* 8 */ - u32 rsvd; + u32 irs; }; struct t3_genbit { diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index b2faff5..3b41dc0 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -254,8 +254,6 @@ static void release_ep_resources(struct iwch_ep *ep) cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); dst_release(ep->dst); l2t_release(L2DATA(ep->com.tdev), ep->l2t); - if (ep->com.tdev->type == T3B) - release_tid(ep->com.tdev, ep->hwtid, NULL); put_ep(&ep->com); } @@ -515,7 +513,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) req->len = htonl(len); req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT); + req->flags = htonl(F_TX_INIT); req->sndseq = htonl(ep->snd_seq); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; @@ -566,7 +564,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) req->len = htonl(mpalen); req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT); + req->flags = htonl(F_TX_INIT); req->sndseq = htonl(ep->snd_seq); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; @@ -618,7 +616,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) req->len = htonl(len); req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_MORE | F_TX_IMM_ACK | F_TX_INIT); + req->flags = htonl(F_TX_INIT); req->sndseq = htonl(ep->snd_seq); ep->mpa_skb = skb; state_set(&ep->com, MPA_REP_SENT); @@ -641,6 +639,7 @@ static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid); ep->snd_seq = ntohl(req->snd_isn); + ep->rcv_seq = ntohl(req->rcv_isn); set_emss(ep, ntohs(req->tcp_opt)); @@ -1023,6 +1022,9 @@ static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) skb_pull(skb, sizeof(*hdr)); skb_trim(skb, dlen); + ep->rcv_seq += dlen; + BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen)); + switch (state_read(&ep->com)) { case MPA_REQ_SENT: process_mpa_reply(ep, skb); @@ -1060,7 +1062,6 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct iwch_ep *ep = ctx; struct cpl_wr_ack *hdr = cplhdr(skb); unsigned int credits = ntohs(hdr->credits); - enum iwch_qp_attr_mask mask; PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits); @@ -1072,30 +1073,6 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) ep->mpa_skb = NULL; dst_confirm(ep->dst); if (state_read(&ep->com) == MPA_REP_SENT) { - struct iwch_qp_attributes attrs; - - /* bind QP to EP and move to RTS */ - attrs.mpa_attr = ep->mpa_attr; - attrs.max_ird = ep->ord; - attrs.max_ord = ep->ord; - attrs.llp_stream_handle = ep; - attrs.next_state = IWCH_QP_STATE_RTS; - - /* bind QP and TID with INIT_WR */ - mask = IWCH_QP_ATTR_NEXT_STATE | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | - IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_MAX_IRD | - IWCH_QP_ATTR_MAX_ORD; - - ep->com.rpl_err = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, mask, &attrs, 1); - - if (!ep->com.rpl_err) { - state_set(&ep->com, FPDU_MODE); - established_upcall(ep); - } - ep->com.rpl_done = 1; PDBG("waking up ep %p\n", ep); wake_up(&ep->com.waitq); @@ -1124,6 +1101,15 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return CPL_RET_BUF_DONE; } +/* + * Return whether a failed active open has allocated a TID + */ +static inline int act_open_has_tid(int status) +{ + return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && + status != CPL_ERR_ARP_MISS; +} + static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) { struct iwch_ep *ep = ctx; @@ -1133,7 +1119,7 @@ static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) status2errno(rpl->status)); connect_reply_upcall(ep, status2errno(rpl->status)); state_set(&ep->com, DEAD); - if (ep->com.tdev->type == T3B) + if (ep->com.tdev->type == T3B && act_open_has_tid(rpl->status)) release_tid(ep->com.tdev, GET_TID(rpl), NULL); cxgb3_free_atid(ep->com.tdev, ep->atid); dst_release(ep->dst); @@ -1378,6 +1364,7 @@ static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) PDBG("%s ep %p\n", __FUNCTION__, ep); ep->snd_seq = ntohl(req->snd_isn); + ep->rcv_seq = ntohl(req->rcv_isn); set_emss(ep, ntohs(req->tcp_opt)); @@ -1485,6 +1472,13 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) int ret; int state; + if (is_neg_adv_abort(req->status)) { + PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep, + ep->hwtid); + t3_l2t_send_event(ep->com.tdev, ep->l2t); + return CPL_RET_BUF_DONE; + } + /* * We get 2 peer aborts from the HW. The first one must * be ignored except for scribbling that we need one more. @@ -1494,13 +1488,6 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return CPL_RET_BUF_DONE; } - if (is_neg_adv_abort(req->status)) { - PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep, - ep->hwtid); - t3_l2t_send_event(ep->com.tdev, ep->l2t); - return CPL_RET_BUF_DONE; - } - state = state_read(&ep->com); PDBG("%s ep %p state %u\n", __FUNCTION__, ep, state); switch (state) { @@ -1732,10 +1719,8 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct iwch_qp *qp = get_qhp(h, conn_param->qpn); PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { - put_ep(&ep->com); + if (state_read(&ep->com) == DEAD) return -ECONNRESET; - } BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); BUG_ON(!qp); @@ -1755,17 +1740,8 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->ird = conn_param->ird; ep->ord = conn_param->ord; PDBG("%s %d ird %d ord %d\n", __FUNCTION__, __LINE__, ep->ird, ep->ord); + get_ep(&ep->com); - err = send_mpa_reply(ep, conn_param->private_data, - conn_param->private_data_len); - if (err) { - ep->com.cm_id = NULL; - ep->com.qp = NULL; - cm_id->rem_ref(cm_id); - abort_connection(ep, NULL, GFP_KERNEL); - put_ep(&ep->com); - return err; - } /* bind QP to EP and move to RTS */ attrs.mpa_attr = ep->mpa_attr; @@ -1783,16 +1759,28 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); + if (err) + goto err; - if (err) { - ep->com.cm_id = NULL; - ep->com.qp = NULL; - cm_id->rem_ref(cm_id); - abort_connection(ep, NULL, GFP_KERNEL); - } else { - state_set(&ep->com, FPDU_MODE); - established_upcall(ep); - } + err = send_mpa_reply(ep, conn_param->private_data, + conn_param->private_data_len); + if (err) + goto err; + + /* wait for wr_ack */ + wait_event(ep->com.waitq, ep->com.rpl_done); + err = ep->com.rpl_err; + if (err) + goto err; + + state_set(&ep->com, FPDU_MODE); + established_upcall(ep); + put_ep(&ep->com); + return 0; +err: + ep->com.cm_id = NULL; + ep->com.qp = NULL; + cm_id->rem_ref(cm_id); put_ep(&ep->com); return err; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h index 21a388c..6107e7c 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h @@ -175,6 +175,7 @@ struct iwch_ep { unsigned int atid; u32 hwtid; u32 snd_seq; + u32 rcv_seq; struct l2t_entry *l2t; struct dst_entry *dst; struct sk_buff *mpa_skb; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index e7c2c39..f0c7775 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1163,9 +1163,10 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.post_recv = iwch_post_receive; - dev->ibdev.iwcm = - (struct iw_cm_verbs *) kmalloc(sizeof(struct iw_cm_verbs), - GFP_KERNEL); + dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); + if (!dev->ibdev.iwcm) + return -ENOMEM; + dev->ibdev.iwcm->connect = iwch_connect; dev->ibdev.iwcm->accept = iwch_accept_cr; dev->ibdev.iwcm->reject = iwch_reject_cr; diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 714dddb..dd89b6b 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -628,9 +628,9 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) /* immediate data starts here. */ term = (struct terminate_message *)wqe->send.sgl; build_term_codes(rsp_msg, &term->layer_etype, &term->ecode); - build_fw_riwrh((void *)wqe, T3_WR_SEND, - T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 1, - qhp->ep->hwtid, 5); + wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) | + V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG)); + wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)); skb->priority = CPL_PRIORITY_DATA; return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); } @@ -732,6 +732,7 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, init_attr.qp_dma_addr = qhp->wq.dma_addr; init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0; + init_attr.irs = qhp->ep->rcv_seq; PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d " "flags 0x%x qpcaps 0x%x\n", __FUNCTION__, init_attr.rq_addr, init_attr.rq_size, diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig index 1a85459..59f807d 100644 --- a/drivers/infiniband/hw/ehca/Kconfig +++ b/drivers/infiniband/hw/ehca/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_EHCA tristate "eHCA support" - depends on IBMEBUS && INFINIBAND + depends on IBMEBUS ---help--- This driver supports the IBM pSeries eHCA InfiniBand adapter. diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 0d6e2c4..3cd6bf3 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -118,7 +118,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } memcpy(&av->av.grh.word_1, &gid, sizeof(gid)); } - av->av.pmtu = EHCA_MAX_MTU; + av->av.pmtu = shca->max_mtu; /* dgid comes in grh.word_3 */ memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid, @@ -137,6 +137,8 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) struct ehca_av *av; struct ehca_ud_av new_ehca_av; struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca, + ib_device); u32 cur_pid = current->tgid; if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && @@ -192,7 +194,7 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid)); } - new_ehca_av.pmtu = EHCA_MAX_MTU; + new_ehca_av.pmtu = shca->max_mtu; memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid, sizeof(ah_attr->grh.dgid)); diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 1d286d3..daf823e 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -5,6 +5,7 @@ * * Authors: Heiko J Schick <schickhj@de.ibm.com> * Christoph Raisch <raisch@de.ibm.com> + * Joachim Fenkes <fenkes@de.ibm.com> * * Copyright (c) 2005 IBM Corporation * @@ -86,11 +87,17 @@ struct ehca_eq { struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE]; }; +struct ehca_sma_attr { + u16 lid, lmc, sm_sl, sm_lid; + u16 pkey_tbl_len, pkeys[16]; +}; + struct ehca_sport { struct ib_cq *ibcq_aqp1; struct ib_qp *ibqp_aqp1; enum ib_rate rate; enum ib_port_state port_state; + struct ehca_sma_attr saved_attr; }; struct ehca_shca { @@ -107,6 +114,8 @@ struct ehca_shca { struct ehca_pd *pd; struct h_galpas galpas; struct mutex modify_mutex; + u64 hca_cap; + int max_mtu; }; struct ehca_pd { @@ -115,9 +124,20 @@ struct ehca_pd { u32 ownpid; }; +enum ehca_ext_qp_type { + EQPT_NORMAL = 0, + EQPT_LLQP = 1, + EQPT_SRQBASE = 2, + EQPT_SRQ = 3, +}; + struct ehca_qp { - struct ib_qp ib_qp; + union { + struct ib_qp ib_qp; + struct ib_srq ib_srq; + }; u32 qp_type; + enum ehca_ext_qp_type ext_type; struct ipz_queue ipz_squeue; struct ipz_queue ipz_rqueue; struct h_galpas galpas; @@ -140,6 +160,10 @@ struct ehca_qp { u32 mm_count_galpa; }; +#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) +#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ) +#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE) + /* must be power of 2 */ #define QP_HASHTAB_LEN 8 @@ -156,8 +180,8 @@ struct ehca_cq { spinlock_t cb_lock; struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; struct list_head entry; - u32 nr_callbacks; /* #events assigned to cpu by scaling code */ - u32 nr_events; /* #events seen */ + u32 nr_callbacks; /* #events assigned to cpu by scaling code */ + atomic_t nr_events; /* #events seen */ wait_queue_head_t wait_completion; spinlock_t task_lock; u32 ownpid; @@ -275,9 +299,8 @@ void ehca_cleanup_av_cache(void); int ehca_init_mrmw_cache(void); void ehca_cleanup_mrmw_cache(void); -extern spinlock_t ehca_qp_idr_lock; -extern spinlock_t ehca_cq_idr_lock; -extern spinlock_t hcall_lock; +extern rwlock_t ehca_qp_idr_lock; +extern rwlock_t ehca_cq_idr_lock; extern struct idr ehca_qp_idr; extern struct idr ehca_cq_idr; @@ -305,6 +328,7 @@ struct ehca_create_qp_resp { u32 qp_num; u32 token; u32 qp_type; + u32 ext_type; u32 qkey; /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ u32 real_qp_num; @@ -320,14 +344,42 @@ struct ehca_alloc_cq_parms { struct ipz_eq_handle eq_handle; }; +enum ehca_service_type { + ST_RC = 0, + ST_UC = 1, + ST_RD = 2, + ST_UD = 3, +}; + +enum ehca_ll_comp_flags { + LLQP_SEND_COMP = 0x20, + LLQP_RECV_COMP = 0x40, + LLQP_COMP_MASK = 0x60, +}; + struct ehca_alloc_qp_parms { - int servicetype; +/* input parameters */ + enum ehca_service_type servicetype; int sigtype; - int daqp_ctrl; - int max_send_sge; - int max_recv_sge; + enum ehca_ext_qp_type ext_type; + enum ehca_ll_comp_flags ll_comp_flags; + + int max_send_wr, max_recv_wr; + int max_send_sge, max_recv_sge; int ud_av_l_key_ctl; + u32 token; + struct ipz_eq_handle eq_handle; + struct ipz_pd pd; + struct ipz_cq_handle send_cq_handle, recv_cq_handle; + + u32 srq_qpn, srq_token, srq_limit; + +/* output parameters */ + u32 real_qp_num; + struct ipz_qp_handle qp_handle; + struct h_galpas galpas; + u16 act_nr_send_wqes; u16 act_nr_recv_wqes; u8 act_nr_recv_sges; @@ -335,9 +387,6 @@ struct ehca_alloc_qp_parms { u32 nr_rq_pages; u32 nr_sq_pages; - - struct ipz_eq_handle ipz_eq_handle; - struct ipz_pd pd; }; int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h index 5665f21..fb3df5c 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h +++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h @@ -228,8 +228,8 @@ struct hcp_modify_qp_control_block { #define MQPCB_QP_NUMBER EHCA_BMASK_IBM(8,31) #define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48,48) #define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31,31) -#define MQPCB_MASK_CURR_SQR_LIMIT EHCA_BMASK_IBM(49,49) -#define MQPCB_CURR_SQR_LIMIT EHCA_BMASK_IBM(15,31) +#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49,49) +#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16,31) #define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50,50) #define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51,51) diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 67f0670..01d4a14 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -56,11 +56,11 @@ int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp) { unsigned int qp_num = qp->real_qp_num; unsigned int key = qp_num & (QP_HASHTAB_LEN-1); - unsigned long spl_flags; + unsigned long flags; - spin_lock_irqsave(&cq->spinlock, spl_flags); + spin_lock_irqsave(&cq->spinlock, flags); hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]); - spin_unlock_irqrestore(&cq->spinlock, spl_flags); + spin_unlock_irqrestore(&cq->spinlock, flags); ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x", cq->cq_number, qp_num); @@ -74,9 +74,9 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); struct hlist_node *iter; struct ehca_qp *qp; - unsigned long spl_flags; + unsigned long flags; - spin_lock_irqsave(&cq->spinlock, spl_flags); + spin_lock_irqsave(&cq->spinlock, flags); hlist_for_each(iter, &cq->qp_hashtab[key]) { qp = hlist_entry(iter, struct ehca_qp, list_entries); if (qp->real_qp_num == real_qp_num) { @@ -88,7 +88,7 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) break; } } - spin_unlock_irqrestore(&cq->spinlock, spl_flags); + spin_unlock_irqrestore(&cq->spinlock, flags); if (ret) ehca_err(cq->ib_cq.device, "qp not found cq_num=%x real_qp_num=%x", @@ -146,6 +146,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, spin_lock_init(&my_cq->spinlock); spin_lock_init(&my_cq->cb_lock); spin_lock_init(&my_cq->task_lock); + atomic_set(&my_cq->nr_events, 0); init_waitqueue_head(&my_cq->wait_completion); my_cq->ownpid = current->tgid; @@ -162,9 +163,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, goto create_cq_exit1; } - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + write_lock_irqsave(&ehca_cq_idr_lock, flags); ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); } while (ret == -EAGAIN); @@ -293,9 +294,9 @@ create_cq_exit3: "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret); create_cq_exit2: - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + write_lock_irqsave(&ehca_cq_idr_lock, flags); idr_remove(&ehca_cq_idr, my_cq->token); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); create_cq_exit1: kmem_cache_free(cq_cache, my_cq); @@ -303,16 +304,6 @@ create_cq_exit1: return cq; } -static int get_cq_nr_events(struct ehca_cq *my_cq) -{ - int ret; - unsigned long flags; - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - ret = my_cq->nr_events; - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); - return ret; -} - int ehca_destroy_cq(struct ib_cq *cq) { u64 h_ret; @@ -339,17 +330,18 @@ int ehca_destroy_cq(struct ib_cq *cq) } } - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - while (my_cq->nr_events) { - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); - wait_event(my_cq->wait_completion, !get_cq_nr_events(my_cq)); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - /* recheck nr_events to assure no cqe has just arrived */ - } - + /* + * remove the CQ from the idr first to make sure + * no more interrupt tasklets will touch this CQ + */ + write_lock_irqsave(&ehca_cq_idr_lock, flags); idr_remove(&ehca_cq_idr, my_cq->token); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + /* now wait until all pending events have completed */ + wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events)); + /* nobody's using our CQ any longer -- we can destroy it */ h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); if (h_ret == H_R_STATE) { /* cq in err: read err data and destroy it forcibly */ diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index 32b55a4..bbd3c6a 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -45,11 +45,25 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { - int ret = 0; + int i, ret = 0; struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); struct hipz_query_hca *rblock; + static const u32 cap_mapping[] = { + IB_DEVICE_RESIZE_MAX_WR, HCA_CAP_WQE_RESIZE, + IB_DEVICE_BAD_PKEY_CNTR, HCA_CAP_BAD_P_KEY_CTR, + IB_DEVICE_BAD_QKEY_CNTR, HCA_CAP_Q_KEY_VIOL_CTR, + IB_DEVICE_RAW_MULTI, HCA_CAP_RAW_PACKET_MCAST, + IB_DEVICE_AUTO_PATH_MIG, HCA_CAP_AUTO_PATH_MIG, + IB_DEVICE_CHANGE_PHY_PORT, HCA_CAP_SQD_RTS_PORT_CHANGE, + IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK, + IB_DEVICE_CURR_QP_STATE_MOD, HCA_CAP_CUR_QP_STATE_MOD, + IB_DEVICE_SHUTDOWN_PORT, HCA_CAP_SHUTDOWN_PORT, + IB_DEVICE_INIT_TYPE, HCA_CAP_INIT_TYPE, + IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT, + }; + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { ehca_err(&shca->ib_device, "Can't allocate rblock memory."); @@ -96,6 +110,13 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) props->max_total_mcast_qp_attach = min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX); + /* translate device capabilities */ + props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ; + for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2) + if (rblock->hca_cap_indicators & cap_mapping[i + 1]) + props->device_cap_flags |= cap_mapping[i]; + query_device1: ehca_free_fw_ctrlblock(rblock); @@ -172,6 +193,40 @@ query_port1: return ret; } +int ehca_query_sma_attr(struct ehca_shca *shca, + u8 port, struct ehca_sma_attr *attr) +{ + int ret = 0; + struct hipz_query_port *rblock; + + rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_sma_attr1; + } + + memset(attr, 0, sizeof(struct ehca_sma_attr)); + + attr->lid = rblock->lid; + attr->lmc = rblock->lmc; + attr->sm_sl = rblock->sm_sl; + attr->sm_lid = rblock->sm_lid; + + attr->pkey_tbl_len = rblock->pkey_tbl_len; + memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys)); + +query_sma_attr1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { int ret = 0; @@ -261,7 +316,7 @@ int ehca_modify_port(struct ib_device *ibdev, } if (mutex_lock_interruptible(&shca->modify_mutex)) - return -ERESTARTSYS; + return -ERESTARTSYS; rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { @@ -290,7 +345,7 @@ modify_port2: ehca_free_fw_ctrlblock(rblock); modify_port1: - mutex_unlock(&shca->modify_mutex); + mutex_unlock(&shca->modify_mutex); return ret; } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 100329b..96eba38 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -5,6 +5,8 @@ * * Authors: Heiko J Schick <schickhj@de.ibm.com> * Khadija Souissi <souissi@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Joachim Fenkes <fenkes@de.ibm.com> * * Copyright (c) 2005 IBM Corporation * @@ -59,6 +61,7 @@ #define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7) #define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15) #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16) +#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16,16) #define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) #define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) @@ -178,12 +181,11 @@ static void qp_event_callback(struct ehca_shca *shca, { struct ib_event event; struct ehca_qp *qp; - unsigned long flags; u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); - spin_lock_irqsave(&ehca_qp_idr_lock, flags); + read_lock(&ehca_qp_idr_lock); qp = idr_find(&ehca_qp_idr, token); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + read_unlock(&ehca_qp_idr_lock); if (!qp) @@ -207,18 +209,22 @@ static void cq_event_callback(struct ehca_shca *shca, u64 eqe) { struct ehca_cq *cq; - unsigned long flags; u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + read_lock(&ehca_cq_idr_lock); cq = idr_find(&ehca_cq_idr, token); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + if (cq) + atomic_inc(&cq->nr_events); + read_unlock(&ehca_cq_idr_lock); if (!cq) return; ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + return; } @@ -281,30 +287,61 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe) return; } -static void parse_ec(struct ehca_shca *shca, u64 eqe) +static void dispatch_port_event(struct ehca_shca *shca, int port_num, + enum ib_event_type type, const char *msg) { struct ib_event event; + + ehca_info(&shca->ib_device, "port %d %s.", port_num, msg); + event.device = &shca->ib_device; + event.event = type; + event.element.port_num = port_num; + ib_dispatch_event(&event); +} + +static void notify_port_conf_change(struct ehca_shca *shca, int port_num) +{ + struct ehca_sma_attr new_attr; + struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr; + + ehca_query_sma_attr(shca, port_num, &new_attr); + + if (new_attr.sm_sl != old_attr->sm_sl || + new_attr.sm_lid != old_attr->sm_lid) + dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE, + "SM changed"); + + if (new_attr.lid != old_attr->lid || + new_attr.lmc != old_attr->lmc) + dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE, + "LID changed"); + + if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len || + memcmp(new_attr.pkeys, old_attr->pkeys, + sizeof(u16) * new_attr.pkey_tbl_len)) + dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE, + "P_Key changed"); + + *old_attr = new_attr; +} + +static void parse_ec(struct ehca_shca *shca, u64 eqe) +{ u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); switch (ec) { case 0x30: /* port availability change */ if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { - ehca_info(&shca->ib_device, - "port %x is active.", port); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ACTIVE; - event.element.port_num = port; shca->sport[port - 1].port_state = IB_PORT_ACTIVE; - ib_dispatch_event(&event); + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + ehca_query_sma_attr(shca, port, + &shca->sport[port - 1].saved_attr); } else { - ehca_info(&shca->ib_device, - "port %x is inactive.", port); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ERR; - event.element.port_num = port; shca->sport[port - 1].port_state = IB_PORT_DOWN; - ib_dispatch_event(&event); + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); } break; case 0x31: @@ -312,24 +349,19 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe) * disruptive change is caused by * LID, PKEY or SM change */ - ehca_warn(&shca->ib_device, - "disruptive port %x configuration change", port); - - ehca_info(&shca->ib_device, - "port %x is inactive.", port); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ERR; - event.element.port_num = port; - shca->sport[port - 1].port_state = IB_PORT_DOWN; - ib_dispatch_event(&event); - - ehca_info(&shca->ib_device, - "port %x is active.", port); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ACTIVE; - event.element.port_num = port; - shca->sport[port - 1].port_state = IB_PORT_ACTIVE; - ib_dispatch_event(&event); + if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) { + ehca_warn(&shca->ib_device, "disruptive port " + "%d configuration change", port); + + shca->sport[port - 1].port_state = IB_PORT_DOWN; + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); + + shca->sport[port - 1].port_state = IB_PORT_ACTIVE; + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + } else + notify_port_conf_change(shca, port); break; case 0x32: /* adapter malfunction */ ehca_err(&shca->ib_device, "Adapter malfunction."); @@ -404,7 +436,6 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) { u64 eqe_value; u32 token; - unsigned long flags; struct ehca_cq *cq; eqe_value = eqe->entry; @@ -412,27 +443,24 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { ehca_dbg(&shca->ib_device, "Got completion event"); token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + read_lock(&ehca_cq_idr_lock); cq = idr_find(&ehca_cq_idr, token); + if (cq) + atomic_inc(&cq->nr_events); + read_unlock(&ehca_cq_idr_lock); if (cq == NULL) { - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); ehca_err(&shca->ib_device, "Invalid eqe for non-existing cq token=%x", token); return; } reset_eq_pending(cq); - cq->nr_events++; - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); if (ehca_scaling_code) queue_comp_task(cq); else { comp_event_callback(cq); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - cq->nr_events--; - if (!cq->nr_events) + if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); } } else { ehca_dbg(&shca->ib_device, "Got non completion event"); @@ -476,17 +504,17 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) eqe_value = eqe_cache[eqe_cnt].eqe->entry; if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); - spin_lock(&ehca_cq_idr_lock); + read_lock(&ehca_cq_idr_lock); eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); + if (eqe_cache[eqe_cnt].cq) + atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events); + read_unlock(&ehca_cq_idr_lock); if (!eqe_cache[eqe_cnt].cq) { - spin_unlock(&ehca_cq_idr_lock); ehca_err(&shca->ib_device, "Invalid eqe for non-existing cq " "token=%x", token); continue; } - eqe_cache[eqe_cnt].cq->nr_events++; - spin_unlock(&ehca_cq_idr_lock); } else eqe_cache[eqe_cnt].cq = NULL; eqe_cnt++; @@ -517,11 +545,8 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) else { struct ehca_cq *cq = eq->eqe_cache[i].cq; comp_event_callback(cq); - spin_lock(&ehca_cq_idr_lock); - cq->nr_events--; - if (!cq->nr_events) + if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_unlock(&ehca_cq_idr_lock); } } else { ehca_dbg(&shca->ib_device, "Got non completion event"); @@ -621,13 +646,10 @@ static void run_comp_task(struct ehca_cpu_comp_task* cct) while (!list_empty(&cct->cq_list)) { cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); spin_unlock_irqrestore(&cct->task_lock, flags); - comp_event_callback(cq); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); - cq->nr_events--; - if (!cq->nr_events) + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); spin_lock_irqsave(&cct->task_lock, flags); spin_lock(&cq->task_lock); diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h index 6ed06ee..3346cb0 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.h +++ b/drivers/infiniband/hw/ehca/ehca_irq.h @@ -47,7 +47,6 @@ struct ehca_shca; #include <linux/interrupt.h> #include <linux/types.h> -#include <asm/atomic.h> int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource); diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index 37e7fe0..77aeca6 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -49,6 +49,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props); int ehca_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); +int ehca_query_sma_attr(struct ehca_shca *shca, u8 port, + struct ehca_sma_attr *attr); + int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey); int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, @@ -154,6 +157,21 @@ int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr); +int ehca_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + +struct ib_srq *ehca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); + +int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); + +int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); + +int ehca_destroy_srq(struct ib_srq *srq); + u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp, struct ib_qp_init_attr *qp_init_attr); diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index c3f99f3..28ba2dd 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -94,17 +94,15 @@ MODULE_PARM_DESC(poll_all_eqs, MODULE_PARM_DESC(static_rate, "set permanent static rate (default: disabled)"); MODULE_PARM_DESC(scaling_code, - "set scaling code (0: disabled, 1: enabled/default)"); + "set scaling code (0: disabled/default, 1: enabled)"); -spinlock_t ehca_qp_idr_lock; -spinlock_t ehca_cq_idr_lock; -spinlock_t hcall_lock; +DEFINE_RWLOCK(ehca_qp_idr_lock); +DEFINE_RWLOCK(ehca_cq_idr_lock); DEFINE_IDR(ehca_qp_idr); DEFINE_IDR(ehca_cq_idr); - -static struct list_head shca_list; /* list of all registered ehcas */ -static spinlock_t shca_list_lock; +static LIST_HEAD(shca_list); /* list of all registered ehcas */ +static DEFINE_SPINLOCK(shca_list_lock); static struct timer_list poll_eqs_timer; @@ -205,11 +203,35 @@ static void ehca_destroy_slab_caches(void) #define EHCA_HCAAVER EHCA_BMASK_IBM(32,39) #define EHCA_REVID EHCA_BMASK_IBM(40,63) +static struct cap_descr { + u64 mask; + char *descr; +} hca_cap_descr[] = { + { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" }, + { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" }, + { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" }, + { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" }, + { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" }, + { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" }, + { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" }, + { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" }, + { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" }, + { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" }, + { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" }, + { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" }, + { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" }, + { HCA_CAP_SRQ, "HCA_CAP_SRQ" }, + { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" }, + { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" }, + { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, +}; + int ehca_sense_attributes(struct ehca_shca *shca) { - int ret = 0; + int i, ret = 0; u64 h_ret; struct hipz_query_hca *rblock; + struct hipz_query_port *port; rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); if (!rblock) { @@ -222,7 +244,7 @@ int ehca_sense_attributes(struct ehca_shca *shca) ehca_gen_err("Cannot query device properties. h_ret=%lx", h_ret); ret = -EPERM; - goto num_ports1; + goto sense_attributes1; } if (ehca_nr_ports == 1) @@ -242,18 +264,44 @@ int ehca_sense_attributes(struct ehca_shca *shca) ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); if ((hcaaver == 1) && (revid == 0)) - shca->hw_level = 0; + shca->hw_level = 0x11; else if ((hcaaver == 1) && (revid == 1)) - shca->hw_level = 1; + shca->hw_level = 0x12; else if ((hcaaver == 1) && (revid == 2)) - shca->hw_level = 2; + shca->hw_level = 0x13; + else if ((hcaaver == 2) && (revid == 0)) + shca->hw_level = 0x21; + else if ((hcaaver == 2) && (revid == 0x10)) + shca->hw_level = 0x22; + else { + ehca_gen_warn("unknown hardware version" + " - assuming default level"); + shca->hw_level = 0x22; + } } ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); shca->sport[0].rate = IB_RATE_30_GBPS; shca->sport[1].rate = IB_RATE_30_GBPS; -num_ports1: + shca->hca_cap = rblock->hca_cap_indicators; + ehca_gen_dbg(" ... HCA capabilities:"); + for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++) + if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) + ehca_gen_dbg(" %s", hca_cap_descr[i].descr); + + port = (struct hipz_query_port *) rblock; + h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); + if (h_ret != H_SUCCESS) { + ehca_gen_err("Cannot query port properties. h_ret=%lx", + h_ret); + ret = -EPERM; + goto sense_attributes1; + } + + shca->max_mtu = port->max_mtu; + +sense_attributes1: ehca_free_fw_ctrlblock(rblock); return ret; } @@ -293,7 +341,7 @@ int ehca_init_device(struct ehca_shca *shca) strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); shca->ib_device.owner = THIS_MODULE; - shca->ib_device.uverbs_abi_ver = 6; + shca->ib_device.uverbs_abi_ver = 7; shca->ib_device.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -361,6 +409,20 @@ int ehca_init_device(struct ehca_shca *shca) /* shca->ib_device.process_mad = ehca_process_mad; */ shca->ib_device.mmap = ehca_mmap; + if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { + shca->ib_device.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + + shca->ib_device.create_srq = ehca_create_srq; + shca->ib_device.modify_srq = ehca_modify_srq; + shca->ib_device.query_srq = ehca_query_srq; + shca->ib_device.destroy_srq = ehca_destroy_srq; + shca->ib_device.post_srq_recv = ehca_post_srq_recv; + } + return ret; } @@ -800,14 +862,6 @@ int __init ehca_module_init(void) printk(KERN_INFO "eHCA Infiniband Device Driver " "(Rel.: SVNEHCA_0023)\n"); - idr_init(&ehca_qp_idr); - idr_init(&ehca_cq_idr); - spin_lock_init(&ehca_qp_idr_lock); - spin_lock_init(&ehca_cq_idr_lock); - spin_lock_init(&hcall_lock); - - INIT_LIST_HEAD(&shca_list); - spin_lock_init(&shca_list_lock); if ((ret = ehca_create_comp_pool())) { ehca_gen_err("Cannot create comp pool."); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index b5bc787..7467125 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -3,7 +3,9 @@ * * QP functions * - * Authors: Waleri Fomin <fomin@de.ibm.com> + * Authors: Joachim Fenkes <fenkes@de.ibm.com> + * Stefan Roscher <stefan.roscher@de.ibm.com> + * Waleri Fomin <fomin@de.ibm.com> * Hoang-Nam Nguyen <hnguyen@de.ibm.com> * Reinhard Ernst <rernst@de.ibm.com> * Heiko J Schick <schickhj@de.ibm.com> @@ -234,13 +236,6 @@ static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate, return index; } -enum ehca_service_type { - ST_RC = 0, - ST_UC = 1, - ST_RD = 2, - ST_UD = 3 -}; - /* * ibqptype2servicetype returns hcp service type corresponding to given * ib qp type used by create_qp() @@ -268,15 +263,34 @@ static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) } /* - * init_qp_queues initializes/constructs r/squeue and registers queue pages. + * init userspace queue info from ipz_queue data */ -static inline int init_qp_queues(struct ehca_shca *shca, - struct ehca_qp *my_qp, - int nr_sq_pages, - int nr_rq_pages, - int swqe_size, - int rwqe_size, - int nr_send_sges, int nr_receive_sges) +static inline void queue2resp(struct ipzu_queue_resp *resp, + struct ipz_queue *queue) +{ + resp->qe_size = queue->qe_size; + resp->act_nr_of_sg = queue->act_nr_of_sg; + resp->queue_length = queue->queue_length; + resp->pagesize = queue->pagesize; + resp->toggle_state = queue->toggle_state; +} + +static inline int ll_qp_msg_size(int nr_sge) +{ + return 128 << nr_sge; +} + +/* + * init_qp_queue initializes/constructs r/squeue and registers queue pages. + */ +static inline int init_qp_queue(struct ehca_shca *shca, + struct ehca_qp *my_qp, + struct ipz_queue *queue, + int q_type, + u64 expected_hret, + int nr_q_pages, + int wqe_size, + int nr_sges) { int ret, cnt, ipz_rc; void *vpage; @@ -284,127 +298,93 @@ static inline int init_qp_queues(struct ehca_shca *shca, struct ib_device *ib_dev = &shca->ib_device; struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; - ipz_rc = ipz_queue_ctor(&my_qp->ipz_squeue, - nr_sq_pages, - EHCA_PAGESIZE, swqe_size, nr_send_sges); + if (!nr_q_pages) + return 0; + + ipz_rc = ipz_queue_ctor(queue, nr_q_pages, EHCA_PAGESIZE, + wqe_size, nr_sges); if (!ipz_rc) { - ehca_err(ib_dev,"Cannot allocate page for squeue. ipz_rc=%x", + ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%x", ipz_rc); return -EBUSY; } - ipz_rc = ipz_queue_ctor(&my_qp->ipz_rqueue, - nr_rq_pages, - EHCA_PAGESIZE, rwqe_size, nr_receive_sges); - if (!ipz_rc) { - ehca_err(ib_dev, "Cannot allocate page for rqueue. ipz_rc=%x", - ipz_rc); - ret = -EBUSY; - goto init_qp_queues0; - } - /* register SQ pages */ - for (cnt = 0; cnt < nr_sq_pages; cnt++) { - vpage = ipz_qpageit_get_inc(&my_qp->ipz_squeue); + /* register queue pages */ + for (cnt = 0; cnt < nr_q_pages; cnt++) { + vpage = ipz_qpageit_get_inc(queue); if (!vpage) { - ehca_err(ib_dev, "SQ ipz_qpageit_get_inc() " + ehca_err(ib_dev, "ipz_qpageit_get_inc() " "failed p_vpage= %p", vpage); ret = -EINVAL; - goto init_qp_queues1; + goto init_qp_queue1; } rpage = virt_to_abs(vpage); h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, my_qp->ipz_qp_handle, - &my_qp->pf, 0, 0, + NULL, 0, q_type, rpage, 1, my_qp->galpas.kernel); - if (h_ret < H_SUCCESS) { - ehca_err(ib_dev, "SQ hipz_qp_register_rpage()" - " failed rc=%lx", h_ret); - ret = ehca2ib_return_code(h_ret); - goto init_qp_queues1; - } - } - - ipz_qeit_reset(&my_qp->ipz_squeue); - - /* register RQ pages */ - for (cnt = 0; cnt < nr_rq_pages; cnt++) { - vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); - if (!vpage) { - ehca_err(ib_dev, "RQ ipz_qpageit_get_inc() " - "failed p_vpage = %p", vpage); - ret = -EINVAL; - goto init_qp_queues1; - } - - rpage = virt_to_abs(vpage); - - h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, 0, 1, - rpage, 1,my_qp->galpas.kernel); - if (h_ret < H_SUCCESS) { - ehca_err(ib_dev, "RQ hipz_qp_register_rpage() failed " - "rc=%lx", h_ret); - ret = ehca2ib_return_code(h_ret); - goto init_qp_queues1; - } - if (cnt == (nr_rq_pages - 1)) { /* last page! */ - if (h_ret != H_SUCCESS) { - ehca_err(ib_dev, "RQ hipz_qp_register_rpage() " + if (cnt == (nr_q_pages - 1)) { /* last page! */ + if (h_ret != expected_hret) { + ehca_err(ib_dev, "hipz_qp_register_rpage() " "h_ret= %lx ", h_ret); ret = ehca2ib_return_code(h_ret); - goto init_qp_queues1; + goto init_qp_queue1; } vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); if (vpage) { ehca_err(ib_dev, "ipz_qpageit_get_inc() " "should not succeed vpage=%p", vpage); ret = -EINVAL; - goto init_qp_queues1; + goto init_qp_queue1; } } else { if (h_ret != H_PAGE_REGISTERED) { - ehca_err(ib_dev, "RQ hipz_qp_register_rpage() " + ehca_err(ib_dev, "hipz_qp_register_rpage() " "h_ret= %lx ", h_ret); ret = ehca2ib_return_code(h_ret); - goto init_qp_queues1; + goto init_qp_queue1; } } } - ipz_qeit_reset(&my_qp->ipz_rqueue); + ipz_qeit_reset(queue); return 0; -init_qp_queues1: - ipz_queue_dtor(&my_qp->ipz_rqueue); -init_qp_queues0: - ipz_queue_dtor(&my_qp->ipz_squeue); +init_qp_queue1: + ipz_queue_dtor(queue); return ret; } -struct ib_qp *ehca_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +/* + * Create an ib_qp struct that is either a QP or an SRQ, depending on + * the value of the is_srq parameter. If init_attr and srq_init_attr share + * fields, the field out of init_attr is used. + */ +struct ehca_qp *internal_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata, int is_srq) { - static int da_rc_msg_size[]={ 128, 256, 512, 1024, 2048, 4096 }; - static int da_ud_sq_msg_size[]={ 128, 384, 896, 1920, 3968 }; struct ehca_qp *my_qp; struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); struct ib_ucontext *context = NULL; u64 h_ret; - int max_send_sge, max_recv_sge, ret; + int is_llqp = 0, has_srq = 0; + int qp_type, max_send_sge, max_recv_sge, ret; /* h_call's out parameters */ struct ehca_alloc_qp_parms parms; - u32 swqe_size = 0, rwqe_size = 0; - u8 daqp_completion, isdaqp; + u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; unsigned long flags; + memset(&parms, 0, sizeof(parms)); + qp_type = init_attr->qp_type; + if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR && init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", @@ -412,41 +392,98 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } - /* save daqp completion bits */ - daqp_completion = init_attr->qp_type & 0x60; - /* save daqp bit */ - isdaqp = (init_attr->qp_type & 0x80) ? 1 : 0; - init_attr->qp_type = init_attr->qp_type & 0x1F; + /* save LLQP info */ + if (qp_type & 0x80) { + is_llqp = 1; + parms.ext_type = EQPT_LLQP; + parms.ll_comp_flags = qp_type & LLQP_COMP_MASK; + } + qp_type &= 0x1F; + init_attr->qp_type &= 0x1F; - if (init_attr->qp_type != IB_QPT_UD && - init_attr->qp_type != IB_QPT_SMI && - init_attr->qp_type != IB_QPT_GSI && - init_attr->qp_type != IB_QPT_UC && - init_attr->qp_type != IB_QPT_RC) { - ehca_err(pd->device, "wrong QP Type=%x", init_attr->qp_type); - return ERR_PTR(-EINVAL); + /* handle SRQ base QPs */ + if (init_attr->srq) { + struct ehca_qp *my_srq = + container_of(init_attr->srq, struct ehca_qp, ib_srq); + + has_srq = 1; + parms.ext_type = EQPT_SRQBASE; + parms.srq_qpn = my_srq->real_qp_num; + parms.srq_token = my_srq->token; } - if ((init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD) - && isdaqp) { - ehca_err(pd->device, "unsupported LL QP Type=%x", - init_attr->qp_type); + + if (is_llqp && has_srq) { + ehca_err(pd->device, "LLQPs can't have an SRQ"); return ERR_PTR(-EINVAL); - } else if (init_attr->qp_type == IB_QPT_RC && isdaqp && - (init_attr->cap.max_send_wr > 255 || - init_attr->cap.max_recv_wr > 255 )) { - ehca_err(pd->device, "Invalid Number of max_sq_wr =%x " - "or max_rq_wr=%x for QP Type=%x", - init_attr->cap.max_send_wr, - init_attr->cap.max_recv_wr,init_attr->qp_type); - return ERR_PTR(-EINVAL); - } else if (init_attr->qp_type == IB_QPT_UD && isdaqp && - init_attr->cap.max_send_wr > 255) { - ehca_err(pd->device, - "Invalid Number of max_send_wr=%x for UD QP_TYPE=%x", - init_attr->cap.max_send_wr, init_attr->qp_type); + } + + /* handle SRQs */ + if (is_srq) { + parms.ext_type = EQPT_SRQ; + parms.srq_limit = srq_init_attr->attr.srq_limit; + if (init_attr->cap.max_recv_sge > 3) { + ehca_err(pd->device, "no more than three SGEs " + "supported for SRQ pd=%p max_sge=%x", + pd, init_attr->cap.max_recv_sge); + return ERR_PTR(-EINVAL); + } + } + + /* check QP type */ + if (qp_type != IB_QPT_UD && + qp_type != IB_QPT_UC && + qp_type != IB_QPT_RC && + qp_type != IB_QPT_SMI && + qp_type != IB_QPT_GSI) { + ehca_err(pd->device, "wrong QP Type=%x", qp_type); return ERR_PTR(-EINVAL); } + if (is_llqp) { + switch (qp_type) { + case IB_QPT_RC: + if ((init_attr->cap.max_send_wr > 255) || + (init_attr->cap.max_recv_wr > 255)) { + ehca_err(pd->device, + "Invalid Number of max_sq_wr=%x " + "or max_rq_wr=%x for RC LLQP", + init_attr->cap.max_send_wr, + init_attr->cap.max_recv_wr); + return ERR_PTR(-EINVAL); + } + break; + case IB_QPT_UD: + if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { + ehca_err(pd->device, "UD LLQP not supported " + "by this adapter"); + return ERR_PTR(-ENOSYS); + } + if (!(init_attr->cap.max_send_sge <= 5 + && init_attr->cap.max_send_sge >= 1 + && init_attr->cap.max_recv_sge <= 5 + && init_attr->cap.max_recv_sge >= 1)) { + ehca_err(pd->device, + "Invalid Number of max_send_sge=%x " + "or max_recv_sge=%x for UD LLQP", + init_attr->cap.max_send_sge, + init_attr->cap.max_recv_sge); + return ERR_PTR(-EINVAL); + } else if (init_attr->cap.max_send_wr > 255) { + ehca_err(pd->device, + "Invalid Number of " + "ax_send_wr=%x for UD QP_TYPE=%x", + init_attr->cap.max_send_wr, qp_type); + return ERR_PTR(-EINVAL); + } + break; + default: + ehca_err(pd->device, "unsupported LL QP Type=%x", + qp_type); + return ERR_PTR(-EINVAL); + break; + } + } + if (pd->uobject && udata) context = pd->uobject->context; @@ -456,16 +493,17 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, return ERR_PTR(-ENOMEM); } - memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms)); spin_lock_init(&my_qp->spinlock_s); spin_lock_init(&my_qp->spinlock_r); + my_qp->qp_type = qp_type; + my_qp->ext_type = parms.ext_type; - my_qp->recv_cq = - container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); - my_qp->send_cq = - container_of(init_attr->send_cq, struct ehca_cq, ib_cq); - - my_qp->init_attr = *init_attr; + if (init_attr->recv_cq) + my_qp->recv_cq = + container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); + if (init_attr->send_cq) + my_qp->send_cq = + container_of(init_attr->send_cq, struct ehca_cq, ib_cq); do { if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) { @@ -474,9 +512,9 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, goto create_qp_exit0; } - spin_lock_irqsave(&ehca_qp_idr_lock, flags); + write_lock_irqsave(&ehca_qp_idr_lock, flags); ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); } while (ret == -EAGAIN); @@ -486,10 +524,10 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, goto create_qp_exit0; } - parms.servicetype = ibqptype2servicetype(init_attr->qp_type); + parms.servicetype = ibqptype2servicetype(qp_type); if (parms.servicetype < 0) { ret = -EINVAL; - ehca_err(pd->device, "Invalid qp_type=%x", init_attr->qp_type); + ehca_err(pd->device, "Invalid qp_type=%x", qp_type); goto create_qp_exit0; } @@ -501,21 +539,25 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, /* UD_AV CIRCUMVENTION */ max_send_sge = init_attr->cap.max_send_sge; max_recv_sge = init_attr->cap.max_recv_sge; - if (IB_QPT_UD == init_attr->qp_type || - IB_QPT_GSI == init_attr->qp_type || - IB_QPT_SMI == init_attr->qp_type) { + if (parms.servicetype == ST_UD && !is_llqp) { max_send_sge += 2; max_recv_sge += 2; } - parms.ipz_eq_handle = shca->eq.ipz_eq_handle; - parms.daqp_ctrl = isdaqp | daqp_completion; + parms.token = my_qp->token; + parms.eq_handle = shca->eq.ipz_eq_handle; parms.pd = my_pd->fw_pd; - parms.max_recv_sge = max_recv_sge; - parms.max_send_sge = max_send_sge; + if (my_qp->send_cq) + parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle; + if (my_qp->recv_cq) + parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle; - h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, my_qp, &parms); + parms.max_send_wr = init_attr->cap.max_send_wr; + parms.max_recv_wr = init_attr->cap.max_recv_wr; + parms.max_send_sge = max_send_sge; + parms.max_recv_sge = max_recv_sge; + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms); if (h_ret != H_SUCCESS) { ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx", h_ret); @@ -523,18 +565,20 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, goto create_qp_exit1; } - my_qp->ib_qp.qp_num = my_qp->real_qp_num; + ib_qp_num = my_qp->real_qp_num = parms.real_qp_num; + my_qp->ipz_qp_handle = parms.qp_handle; + my_qp->galpas = parms.galpas; - switch (init_attr->qp_type) { + switch (qp_type) { case IB_QPT_RC: - if (isdaqp == 0) { + if (!is_llqp) { swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ (parms.act_nr_send_sges)]); rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[ (parms.act_nr_recv_sges)]); - } else { /* for daqp we need to use msg size, not wqe size */ - swqe_size = da_rc_msg_size[max_send_sge]; - rwqe_size = da_rc_msg_size[max_recv_sge]; + } else { /* for LLQP we need to use msg size, not wqe size */ + swqe_size = ll_qp_msg_size(max_send_sge); + rwqe_size = ll_qp_msg_size(max_recv_sge); parms.act_nr_send_sges = 1; parms.act_nr_recv_sges = 1; } @@ -549,29 +593,27 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, case IB_QPT_UD: case IB_QPT_GSI: case IB_QPT_SMI: - /* UD circumvention */ - parms.act_nr_recv_sges -= 2; - parms.act_nr_send_sges -= 2; - if (isdaqp) { - swqe_size = da_ud_sq_msg_size[max_send_sge]; - rwqe_size = da_rc_msg_size[max_recv_sge]; + if (is_llqp) { + swqe_size = ll_qp_msg_size(parms.act_nr_send_sges); + rwqe_size = ll_qp_msg_size(parms.act_nr_recv_sges); parms.act_nr_send_sges = 1; parms.act_nr_recv_sges = 1; } else { + /* UD circumvention */ + parms.act_nr_send_sges -= 2; + parms.act_nr_recv_sges -= 2; swqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[parms.act_nr_send_sges]); rwqe_size = offsetof(struct ehca_wqe, u.ud_av.sg_list[parms.act_nr_recv_sges]); } - if (IB_QPT_GSI == init_attr->qp_type || - IB_QPT_SMI == init_attr->qp_type) { + if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { parms.act_nr_send_wqes = init_attr->cap.max_send_wr; parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr; parms.act_nr_send_sges = init_attr->cap.max_send_sge; parms.act_nr_recv_sges = init_attr->cap.max_recv_sge; - my_qp->ib_qp.qp_num = - (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1; + ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1; } break; @@ -580,108 +622,234 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, break; } - /* initializes r/squeue and registers queue pages */ - ret = init_qp_queues(shca, my_qp, - parms.nr_sq_pages, parms.nr_rq_pages, - swqe_size, rwqe_size, - parms.act_nr_send_sges, parms.act_nr_recv_sges); - if (ret) { - ehca_err(pd->device, - "Couldn't initialize r/squeue and pages ret=%x", ret); - goto create_qp_exit2; + /* initialize r/squeue and register queue pages */ + if (HAS_SQ(my_qp)) { + ret = init_qp_queue( + shca, my_qp, &my_qp->ipz_squeue, 0, + HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS, + parms.nr_sq_pages, swqe_size, + parms.act_nr_send_sges); + if (ret) { + ehca_err(pd->device, "Couldn't initialize squeue " + "and pages ret=%x", ret); + goto create_qp_exit2; + } } - my_qp->ib_qp.pd = &my_pd->ib_pd; - my_qp->ib_qp.device = my_pd->ib_pd.device; + if (HAS_RQ(my_qp)) { + ret = init_qp_queue( + shca, my_qp, &my_qp->ipz_rqueue, 1, + H_SUCCESS, parms.nr_rq_pages, rwqe_size, + parms.act_nr_recv_sges); + if (ret) { + ehca_err(pd->device, "Couldn't initialize rqueue " + "and pages ret=%x", ret); + goto create_qp_exit3; + } + } - my_qp->ib_qp.recv_cq = init_attr->recv_cq; - my_qp->ib_qp.send_cq = init_attr->send_cq; + if (is_srq) { + my_qp->ib_srq.pd = &my_pd->ib_pd; + my_qp->ib_srq.device = my_pd->ib_pd.device; - my_qp->ib_qp.qp_type = init_attr->qp_type; + my_qp->ib_srq.srq_context = init_attr->qp_context; + my_qp->ib_srq.event_handler = init_attr->event_handler; + } else { + my_qp->ib_qp.qp_num = ib_qp_num; + my_qp->ib_qp.pd = &my_pd->ib_pd; + my_qp->ib_qp.device = my_pd->ib_pd.device; + + my_qp->ib_qp.recv_cq = init_attr->recv_cq; + my_qp->ib_qp.send_cq = init_attr->send_cq; - my_qp->qp_type = init_attr->qp_type; - my_qp->ib_qp.srq = init_attr->srq; + my_qp->ib_qp.qp_type = qp_type; + my_qp->ib_qp.srq = init_attr->srq; - my_qp->ib_qp.qp_context = init_attr->qp_context; - my_qp->ib_qp.event_handler = init_attr->event_handler; + my_qp->ib_qp.qp_context = init_attr->qp_context; + my_qp->ib_qp.event_handler = init_attr->event_handler; + } init_attr->cap.max_inline_data = 0; /* not supported yet */ init_attr->cap.max_recv_sge = parms.act_nr_recv_sges; init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes; init_attr->cap.max_send_sge = parms.act_nr_send_sges; init_attr->cap.max_send_wr = parms.act_nr_send_wqes; + my_qp->init_attr = *init_attr; /* NOTE: define_apq0() not supported yet */ - if (init_attr->qp_type == IB_QPT_GSI) { + if (qp_type == IB_QPT_GSI) { h_ret = ehca_define_sqp(shca, my_qp, init_attr); if (h_ret != H_SUCCESS) { ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx", h_ret); ret = ehca2ib_return_code(h_ret); - goto create_qp_exit3; + goto create_qp_exit4; } } - if (init_attr->send_cq) { - struct ehca_cq *cq = container_of(init_attr->send_cq, - struct ehca_cq, ib_cq); - ret = ehca_cq_assign_qp(cq, my_qp); + + if (my_qp->send_cq) { + ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); if (ret) { ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x", ret); - goto create_qp_exit3; + goto create_qp_exit4; } - my_qp->send_cq = cq; } + /* copy queues, galpa data to user space */ if (context && udata) { - struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue; - struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue; struct ehca_create_qp_resp resp; memset(&resp, 0, sizeof(resp)); resp.qp_num = my_qp->real_qp_num; resp.token = my_qp->token; resp.qp_type = my_qp->qp_type; + resp.ext_type = my_qp->ext_type; resp.qkey = my_qp->qkey; resp.real_qp_num = my_qp->real_qp_num; - /* rqueue properties */ - resp.ipz_rqueue.qe_size = ipz_rqueue->qe_size; - resp.ipz_rqueue.act_nr_of_sg = ipz_rqueue->act_nr_of_sg; - resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length; - resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize; - resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state; - /* squeue properties */ - resp.ipz_squeue.qe_size = ipz_squeue->qe_size; - resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg; - resp.ipz_squeue.queue_length = ipz_squeue->queue_length; - resp.ipz_squeue.pagesize = ipz_squeue->pagesize; - resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state; + if (HAS_SQ(my_qp)) + queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue); + if (HAS_RQ(my_qp)) + queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue); + if (ib_copy_to_udata(udata, &resp, sizeof resp)) { ehca_err(pd->device, "Copy to udata failed"); ret = -EINVAL; - goto create_qp_exit3; + goto create_qp_exit4; } } - return &my_qp->ib_qp; + return my_qp; + +create_qp_exit4: + if (HAS_RQ(my_qp)) + ipz_queue_dtor(&my_qp->ipz_rqueue); create_qp_exit3: - ipz_queue_dtor(&my_qp->ipz_rqueue); - ipz_queue_dtor(&my_qp->ipz_squeue); + if (HAS_SQ(my_qp)) + ipz_queue_dtor(&my_qp->ipz_squeue); create_qp_exit2: hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); create_qp_exit1: - spin_lock_irqsave(&ehca_qp_idr_lock, flags); + write_lock_irqsave(&ehca_qp_idr_lock, flags); idr_remove(&ehca_qp_idr, my_qp->token); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); create_qp_exit0: kmem_cache_free(qp_cache, my_qp); return ERR_PTR(ret); } +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata) +{ + struct ehca_qp *ret; + + ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0); + return IS_ERR(ret) ? (struct ib_qp *) ret : &ret->ib_qp; +} + +int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, + struct ib_uobject *uobject); + +struct ib_srq *ehca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) +{ + struct ib_qp_init_attr qp_init_attr; + struct ehca_qp *my_qp; + struct ib_srq *ret; + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + struct hcp_modify_qp_control_block *mqpcb; + u64 hret, update_mask; + + /* For common attributes, internal_create_qp() takes its info + * out of qp_init_attr, so copy all common attrs there. + */ + memset(&qp_init_attr, 0, sizeof(qp_init_attr)); + qp_init_attr.event_handler = srq_init_attr->event_handler; + qp_init_attr.qp_context = srq_init_attr->srq_context; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.qp_type = IB_QPT_RC; + qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr; + qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge; + + my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1); + if (IS_ERR(my_qp)) + return (struct ib_srq *) my_qp; + + /* copy back return values */ + srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; + srq_init_attr->attr.max_sge = qp_init_attr.cap.max_recv_sge; + + /* drive SRQ into RTR state */ + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!mqpcb) { + ehca_err(pd->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); + ret = ERR_PTR(-ENOMEM); + goto create_srq1; + } + + mqpcb->qp_state = EHCA_QPS_INIT; + mqpcb->prim_phys_port = 1; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not modify SRQ to INIT" + "ehca_qp=%p qp_num=%x hret=%lx", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + mqpcb->qp_enable = 1; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not enable SRQ" + "ehca_qp=%p qp_num=%x hret=%lx", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + mqpcb->qp_state = EHCA_QPS_RTR; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not modify SRQ to RTR" + "ehca_qp=%p qp_num=%x hret=%lx", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + return &my_qp->ib_srq; + +create_srq2: + ret = ERR_PTR(ehca2ib_return_code(hret)); + ehca_free_fw_ctrlblock(mqpcb); + +create_srq1: + internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject); + + return ret; +} + /* * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe @@ -765,7 +933,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, u64 h_ret; int bad_wqe_cnt = 0; int squeue_locked = 0; - unsigned long spl_flags = 0; + unsigned long flags = 0; /* do query_qp to obtain current attr values */ mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); @@ -886,6 +1054,17 @@ static int internal_modify_qp(struct ib_qp *ibqp, "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x", my_qp, ibqp->qp_num, statetrans); + /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set + * in non-LL UD QPs. + */ + if ((my_qp->qp_type == IB_QPT_UD) && + (my_qp->ext_type != EQPT_LLQP) && + (statetrans == IB_QPST_INIT2RTR) && + (shca->hw_level >= 0x22)) { + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); + mqpcb->send_grh_flag = 1; + } + /* sqe -> rts: set purge bit of bad wqe before actual trans */ if ((my_qp->qp_type == IB_QPT_UD || my_qp->qp_type == IB_QPT_GSI || @@ -895,7 +1074,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, if (!ibqp->uobject) { struct ehca_wqe *wqe; /* lock send queue */ - spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); + spin_lock_irqsave(&my_qp->spinlock_s, flags); squeue_locked = 1; /* mark next free wqe */ wqe = (struct ehca_wqe*) @@ -1181,7 +1360,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, modify_qp_exit2: if (squeue_locked) { /* this means: sqe -> rts */ - spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags); + spin_unlock_irqrestore(&my_qp->spinlock_s, flags); my_qp->sqerr_purgeflag = 1; } @@ -1312,6 +1491,9 @@ int ehca_query_qp(struct ib_qp *qp, qp_attr->alt_port_num = qpcb->alt_phys_port; qp_attr->alt_timeout = qpcb->timeout_al; + qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res; + qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp; + /* primary av */ qp_attr->ah_attr.sl = qpcb->service_level; @@ -1367,53 +1549,170 @@ query_qp_exit1: return ret; } -int ehca_destroy_qp(struct ib_qp *ibqp) +int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) { - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + struct ehca_qp *my_qp = + container_of(ibsrq, struct ehca_qp, ib_srq); + struct ehca_pd *my_pd = + container_of(ibsrq->pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = + container_of(ibsrq->pd->device, struct ehca_shca, ib_device); + struct hcp_modify_qp_control_block *mqpcb; + u64 update_mask; + u64 h_ret; + int ret = 0; + + u32 cur_pid = current->tgid; + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(ibsrq->pd->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!mqpcb) { + ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); + return -ENOMEM; + } + + update_mask = 0; + if (attr_mask & IB_SRQ_LIMIT) { + attr_mask &= ~IB_SRQ_LIMIT; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1) + | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1); + mqpcb->curr_srq_limit = + EHCA_BMASK_SET(MQPCB_CURR_SRQ_LIMIT, attr->srq_limit); + mqpcb->qp_aff_asyn_ev_log_reg = + EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1); + } + + /* by now, all bits in attr_mask should have been cleared */ + if (attr_mask) { + ehca_err(ibsrq->device, "invalid attribute mask bits set " + "attr_mask=%x", attr_mask); + ret = -EINVAL; + goto modify_srq_exit0; + } + + if (ehca_debug_level) + ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle, + NULL, update_mask, mqpcb, + my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibsrq->device, "hipz_h_modify_qp() failed rc=%lx " + "ehca_qp=%p qp_num=%x", + h_ret, my_qp, my_qp->real_qp_num); + } + +modify_srq_exit0: + ehca_free_fw_ctrlblock(mqpcb); + + return ret; +} + +int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) +{ + struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq); + struct ehca_pd *my_pd = container_of(srq->pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = container_of(srq->device, struct ehca_shca, ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + struct hcp_modify_qp_control_block *qpcb; + u32 cur_pid = current->tgid; + int ret = 0; + u64 h_ret; + + if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context && + my_pd->ownpid != cur_pid) { + ehca_err(srq->device, "Invalid caller pid=%x ownpid=%x", + cur_pid, my_pd->ownpid); + return -EINVAL; + } + + qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!qpcb) { + ehca_err(srq->device, "Out of memory for qpcb " + "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle, + NULL, qpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(srq->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lx", + my_qp, my_qp->real_qp_num, h_ret); + goto query_srq_exit1; + } + + srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; + srq_attr->srq_limit = EHCA_BMASK_GET( + MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit); + + if (ehca_debug_level) + ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); + +query_srq_exit1: + ehca_free_fw_ctrlblock(qpcb); + + return ret; +} + +int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, + struct ib_uobject *uobject) +{ + struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device); struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, ib_pd); u32 cur_pid = current->tgid; - u32 qp_num = ibqp->qp_num; + u32 qp_num = my_qp->real_qp_num; int ret; u64 h_ret; u8 port_num; enum ib_qp_type qp_type; unsigned long flags; - if (ibqp->uobject) { + if (uobject) { if (my_qp->mm_count_galpa || my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { - ehca_err(ibqp->device, "Resources still referenced in " - "user space qp_num=%x", ibqp->qp_num); + ehca_err(dev, "Resources still referenced in " + "user space qp_num=%x", qp_num); return -EINVAL; } if (my_pd->ownpid != cur_pid) { - ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x", + ehca_err(dev, "Invalid caller pid=%x ownpid=%x", cur_pid, my_pd->ownpid); return -EINVAL; } } if (my_qp->send_cq) { - ret = ehca_cq_unassign_qp(my_qp->send_cq, - my_qp->real_qp_num); + ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num); if (ret) { - ehca_err(ibqp->device, "Couldn't unassign qp from " + ehca_err(dev, "Couldn't unassign qp from " "send_cq ret=%x qp_num=%x cq_num=%x", ret, - my_qp->ib_qp.qp_num, my_qp->send_cq->cq_number); + qp_num, my_qp->send_cq->cq_number); return ret; } } - spin_lock_irqsave(&ehca_qp_idr_lock, flags); + write_lock_irqsave(&ehca_qp_idr_lock, flags); idr_remove(&ehca_qp_idr, my_qp->token); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); if (h_ret != H_SUCCESS) { - ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx " + ehca_err(dev, "hipz_h_destroy_qp() failed rc=%lx " "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); return ehca2ib_return_code(h_ret); } @@ -1424,7 +1723,7 @@ int ehca_destroy_qp(struct ib_qp *ibqp) /* no support for IB_QPT_SMI yet */ if (qp_type == IB_QPT_GSI) { struct ib_event event; - ehca_info(ibqp->device, "device %s: port %x is inactive.", + ehca_info(dev, "device %s: port %x is inactive.", shca->ib_device.name, port_num); event.device = &shca->ib_device; event.event = IB_EVENT_PORT_ERR; @@ -1433,12 +1732,28 @@ int ehca_destroy_qp(struct ib_qp *ibqp) ib_dispatch_event(&event); } - ipz_queue_dtor(&my_qp->ipz_rqueue); - ipz_queue_dtor(&my_qp->ipz_squeue); + if (HAS_RQ(my_qp)) + ipz_queue_dtor(&my_qp->ipz_rqueue); + if (HAS_SQ(my_qp)) + ipz_queue_dtor(&my_qp->ipz_squeue); kmem_cache_free(qp_cache, my_qp); return 0; } +int ehca_destroy_qp(struct ib_qp *qp) +{ + return internal_destroy_qp(qp->device, + container_of(qp, struct ehca_qp, ib_qp), + qp->uobject); +} + +int ehca_destroy_srq(struct ib_srq *srq) +{ + return internal_destroy_qp(srq->device, + container_of(srq, struct ehca_qp, ib_srq), + srq->uobject); +} + int ehca_init_qp_cache(void) { qp_cache = kmem_cache_create("ehca_cache_qp", diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index caec9de..61da65e6 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -3,8 +3,9 @@ * * post_send/recv, poll_cq, req_notify * - * Authors: Waleri Fomin <fomin@de.ibm.com> - * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Waleri Fomin <fomin@de.ibm.com> + * Joachim Fenkes <fenkes@de.ibm.com> * Reinhard Ernst <rernst@de.ibm.com> * * Copyright (c) 2005 IBM Corporation @@ -362,10 +363,10 @@ int ehca_post_send(struct ib_qp *qp, struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; - unsigned long spl_flags; + unsigned long flags; /* LOCK the QUEUE */ - spin_lock_irqsave(&my_qp->spinlock_s, spl_flags); + spin_lock_irqsave(&my_qp->spinlock_s, flags); /* loop processes list of send reqs */ for (cur_send_wr = send_wr; cur_send_wr != NULL; @@ -406,26 +407,31 @@ int ehca_post_send(struct ib_qp *qp, } /* eof for cur_send_wr */ post_send_exit0: - /* UNLOCK the QUEUE */ - spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags); iosync(); /* serialize GAL register access */ hipz_update_sqa(my_qp, wqe_cnt); + spin_unlock_irqrestore(&my_qp->spinlock_s, flags); return ret; } -int ehca_post_recv(struct ib_qp *qp, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) +static int internal_post_recv(struct ehca_qp *my_qp, + struct ib_device *dev, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) { - struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); struct ib_recv_wr *cur_recv_wr; struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; - unsigned long spl_flags; + unsigned long flags; + + if (unlikely(!HAS_RQ(my_qp))) { + ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", + my_qp, my_qp->real_qp_num, my_qp->ext_type); + return -ENODEV; + } /* LOCK the QUEUE */ - spin_lock_irqsave(&my_qp->spinlock_r, spl_flags); + spin_lock_irqsave(&my_qp->spinlock_r, flags); /* loop processes list of send reqs */ for (cur_recv_wr = recv_wr; cur_recv_wr != NULL; @@ -439,8 +445,8 @@ int ehca_post_recv(struct ib_qp *qp, *bad_recv_wr = cur_recv_wr; if (wqe_cnt == 0) { ret = -ENOMEM; - ehca_err(qp->device, "Too many posted WQEs " - "qp_num=%x", qp->qp_num); + ehca_err(dev, "Too many posted WQEs " + "qp_num=%x", my_qp->real_qp_num); } goto post_recv_exit0; } @@ -455,23 +461,39 @@ int ehca_post_recv(struct ib_qp *qp, *bad_recv_wr = cur_recv_wr; if (wqe_cnt == 0) { ret = -EINVAL; - ehca_err(qp->device, "Could not write WQE " - "qp_num=%x", qp->qp_num); + ehca_err(dev, "Could not write WQE " + "qp_num=%x", my_qp->real_qp_num); } goto post_recv_exit0; } wqe_cnt++; - ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d", - my_qp, qp->qp_num, wqe_cnt); + ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d", + my_qp, my_qp->real_qp_num, wqe_cnt); } /* eof for cur_recv_wr */ post_recv_exit0: - spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags); iosync(); /* serialize GAL register access */ hipz_update_rqa(my_qp, wqe_cnt); + spin_unlock_irqrestore(&my_qp->spinlock_r, flags); return ret; } +int ehca_post_recv(struct ib_qp *qp, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + return internal_post_recv(container_of(qp, struct ehca_qp, ib_qp), + qp->device, recv_wr, bad_recv_wr); +} + +int ehca_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq), + srq->device, recv_wr, bad_recv_wr); +} + /* * ib_wc_opcode table converts ehca wc opcode to ib * Since we use zero to indicate invalid opcode, the actual ib opcode must @@ -494,6 +516,7 @@ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) int ret = 0; struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); struct ehca_cqe *cqe; + struct ehca_qp *my_qp; int cqe_count = 0; poll_cq_one_read_cqe: @@ -513,7 +536,7 @@ poll_cq_one_read_cqe: if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number); int purgeflag; - unsigned long spl_flags; + unsigned long flags; if (!qp) { ehca_err(cq->device, "cq_num=%x qp_num=%x " "could not find qp -> ignore cqe", @@ -523,9 +546,9 @@ poll_cq_one_read_cqe: /* ignore this purged cqe */ goto poll_cq_one_read_cqe; } - spin_lock_irqsave(&qp->spinlock_s, spl_flags); + spin_lock_irqsave(&qp->spinlock_s, flags); purgeflag = qp->sqerr_purgeflag; - spin_unlock_irqrestore(&qp->spinlock_s, spl_flags); + spin_unlock_irqrestore(&qp->spinlock_s, flags); if (purgeflag) { ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x " @@ -545,7 +568,7 @@ poll_cq_one_read_cqe: } /* tracing cqe */ - if (ehca_debug_level) { + if (unlikely(ehca_debug_level)) { ehca_dbg(cq->device, "Received COMPLETION ehca_cq=%p cq_num=%x -----", my_cq, my_cq->cq_number); @@ -579,7 +602,11 @@ poll_cq_one_read_cqe: } else wc->status = IB_WC_SUCCESS; - wc->qp = NULL; + read_lock(&ehca_qp_idr_lock); + my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); + wc->qp = &my_qp->ib_qp; + read_unlock(&ehca_qp_idr_lock); + wc->byte_len = cqe->nr_bytes_transferred; wc->pkey_index = cqe->pkey_index; wc->slid = cqe->rlid; @@ -589,7 +616,7 @@ poll_cq_one_read_cqe: wc->imm_data = cpu_to_be32(cqe->immediate_data); wc->sl = cqe->service_level; - if (wc->status != IB_WC_SUCCESS) + if (unlikely(wc->status != IB_WC_SUCCESS)) ehca_dbg(cq->device, "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe " "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx " @@ -610,7 +637,7 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) int nr; struct ib_wc *current_wc = wc; int ret = 0; - unsigned long spl_flags; + unsigned long flags; if (num_entries < 1) { ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " @@ -619,14 +646,14 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) goto poll_cq_exit0; } - spin_lock_irqsave(&my_cq->spinlock, spl_flags); + spin_lock_irqsave(&my_cq->spinlock, flags); for (nr = 0; nr < num_entries; nr++) { ret = ehca_poll_cq_one(cq, current_wc); if (ret) break; current_wc++; } /* eof for nr */ - spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); + spin_unlock_irqrestore(&my_cq->spinlock, flags); if (ret == -EAGAIN || !ret) ret = nr; @@ -637,7 +664,6 @@ poll_cq_exit0: int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) { struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - unsigned long spl_flags; int ret = 0; switch (notify_flags & IB_CQ_SOLICITED_MASK) { @@ -652,6 +678,7 @@ int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) } if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + unsigned long spl_flags; spin_lock_irqsave(&my_cq->spinlock, spl_flags); ret = ipz_qeit_is_valid(&my_cq->ipz_queue); spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h index 973c4b5..03b185f 100644 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -59,6 +59,7 @@ #include <linux/cpu.h> #include <linux/device.h> +#include <asm/atomic.h> #include <asm/abs_addr.h> #include <asm/ibmebus.h> #include <asm/io.h> diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c index 73db920..3031b3b 100644 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -253,16 +253,16 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */ u32 cur_pid = current->tgid; u32 ret; - unsigned long flags; struct ehca_cq *cq; struct ehca_qp *qp; struct ehca_pd *pd; + struct ib_uobject *uobject; switch (q_type) { case 1: /* CQ */ - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + read_lock(&ehca_cq_idr_lock); cq = idr_find(&ehca_cq_idr, idr_handle); - spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); + read_unlock(&ehca_cq_idr_lock); /* make sure this mmap really belongs to the authorized user */ if (!cq) @@ -288,9 +288,9 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) break; case 2: /* QP */ - spin_lock_irqsave(&ehca_qp_idr_lock, flags); + read_lock(&ehca_qp_idr_lock); qp = idr_find(&ehca_qp_idr, idr_handle); - spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); + read_unlock(&ehca_qp_idr_lock); /* make sure this mmap really belongs to the authorized user */ if (!qp) @@ -304,7 +304,8 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return -ENOMEM; } - if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context) + uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject; + if (!uobject || uobject->context != context) return -EINVAL; ret = ehca_mmap_qp(vma, qp, rsrc_type); diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 5766ae3..4776a8b 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -5,6 +5,7 @@ * * Authors: Christoph Raisch <raisch@de.ibm.com> * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Joachim Fenkes <fenkes@de.ibm.com> * Gerd Bayer <gerd.bayer@de.ibm.com> * Waleri Fomin <fomin@de.ibm.com> * @@ -62,6 +63,12 @@ #define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39) #define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47) +#define H_ALL_RES_QP_UD_AV_LKEY EHCA_BMASK_IBM(32, 63) +#define H_ALL_RES_QP_SRQ_QP_TOKEN EHCA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_SRQ_QP_HANDLE EHCA_BMASK_IBM(0, 64) +#define H_ALL_RES_QP_SRQ_LIMIT EHCA_BMASK_IBM(48, 63) +#define H_ALL_RES_QP_SRQ_QPN EHCA_BMASK_IBM(40, 63) + #define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) #define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63) #define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15) @@ -74,10 +81,7 @@ #define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) #define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) -/* direct access qp controls */ -#define DAQP_CTRL_ENABLE 0x01 -#define DAQP_CTRL_SEND_COMP 0x20 -#define DAQP_CTRL_RECV_COMP 0x40 +static DEFINE_SPINLOCK(hcall_lock); static u32 get_longbusy_msecs(int longbusy_rc) { @@ -155,7 +159,7 @@ static long ehca_plpar_hcall9(unsigned long opcode, { long ret; int i, sleep_msecs, lock_is_set = 0; - unsigned long flags; + unsigned long flags = 0; ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx", @@ -284,53 +288,53 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, } u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_qp *qp, struct ehca_alloc_qp_parms *parms) { u64 ret; - u64 allocate_controls; - u64 max_r10_reg; + u64 allocate_controls, max_r10_reg, r11, r12; u64 outs[PLPAR_HCALL9_BUFSIZE]; - u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1; - u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1; - int daqp_ctrl = parms->daqp_ctrl; allocate_controls = - EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, - (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0) + EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type) | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, - (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0) + !!(parms->ll_comp_flags & LLQP_RECV_COMP)) | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, - (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0) + !!(parms->ll_comp_flags & LLQP_SEND_COMP)) | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL, parms->ud_av_l_key_ctl) | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1); max_r10_reg = EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, - max_nr_send_wqes) + parms->max_send_wr + 1) | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, - max_nr_receive_wqes) + parms->max_recv_wr + 1) | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, parms->max_send_sge) | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, parms->max_recv_sge); + r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token); + + if (parms->ext_type == EQPT_SRQ) + r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit); + else + r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn); + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, adapter_handle.handle, /* r4 */ allocate_controls, /* r5 */ - qp->send_cq->ipz_cq_handle.handle, - qp->recv_cq->ipz_cq_handle.handle, - parms->ipz_eq_handle.handle, - ((u64)qp->token << 32) | parms->pd.value, - max_r10_reg, /* r10 */ - parms->ud_av_l_key_ctl, /* r11 */ - 0); - qp->ipz_qp_handle.handle = outs[0]; - qp->real_qp_num = (u32)outs[1]; + parms->send_cq_handle.handle, + parms->recv_cq_handle.handle, + parms->eq_handle.handle, + ((u64)parms->token << 32) | parms->pd.value, + max_r10_reg, r11, r12); + + parms->qp_handle.handle = outs[0]; + parms->real_qp_num = (u32)outs[1]; parms->act_nr_send_wqes = (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); parms->act_nr_recv_wqes = @@ -345,7 +349,7 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); if (ret == H_SUCCESS) - hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]); + hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]); if (ret == H_NOT_ENOUGH_RESOURCES) ehca_gen_err("Not enough resources. ret=%lx", ret); diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h index 2869f7d..60ce02b 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -78,7 +78,6 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, * initialize resources, create empty QPPTs (2 rings). */ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_qp *qp, struct ehca_alloc_qp_parms *parms); u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h index fad9136..dad6dea 100644 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -163,6 +163,7 @@ struct hipz_qptemm { #define QPX_SQADDER EHCA_BMASK_IBM(48,63) #define QPX_RQADDER EHCA_BMASK_IBM(48,63) +#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3,3) #define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x) @@ -360,6 +361,24 @@ struct hipz_query_hca { u32 max_neq; } __attribute__ ((packed)); +#define HCA_CAP_AH_PORT_NR_CHECK EHCA_BMASK_IBM( 0, 0) +#define HCA_CAP_ATOMIC EHCA_BMASK_IBM( 1, 1) +#define HCA_CAP_AUTO_PATH_MIG EHCA_BMASK_IBM( 2, 2) +#define HCA_CAP_BAD_P_KEY_CTR EHCA_BMASK_IBM( 3, 3) +#define HCA_CAP_SQD_RTS_PORT_CHANGE EHCA_BMASK_IBM( 4, 4) +#define HCA_CAP_CUR_QP_STATE_MOD EHCA_BMASK_IBM( 5, 5) +#define HCA_CAP_INIT_TYPE EHCA_BMASK_IBM( 6, 6) +#define HCA_CAP_PORT_ACTIVE_EVENT EHCA_BMASK_IBM( 7, 7) +#define HCA_CAP_Q_KEY_VIOL_CTR EHCA_BMASK_IBM( 8, 8) +#define HCA_CAP_WQE_RESIZE EHCA_BMASK_IBM( 9, 9) +#define HCA_CAP_RAW_PACKET_MCAST EHCA_BMASK_IBM(10, 10) +#define HCA_CAP_SHUTDOWN_PORT EHCA_BMASK_IBM(11, 11) +#define HCA_CAP_RC_LL_QP EHCA_BMASK_IBM(12, 12) +#define HCA_CAP_SRQ EHCA_BMASK_IBM(13, 13) +#define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16) +#define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17) +#define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18) + /* query port response block */ struct hipz_query_port { u32 state; diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h index 57f141a..007f088 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h @@ -105,7 +105,6 @@ void *ipz_qpageit_get_inc(struct ipz_queue *queue); * step in struct ipz_queue, will wrap in ringbuffer * returns address (kv) of Queue Entry BEFORE increment * warning don't use in parallel with ipz_qpageit_get_inc() - * warning unpredictable results may occur if steps>act_nr_of_queue_entries */ static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) { @@ -121,31 +120,24 @@ static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) } /* + * return a bool indicating whether current Queue Entry is valid + */ +static inline int ipz_qeit_is_valid(struct ipz_queue *queue) +{ + struct ehca_cqe *cqe = ipz_qeit_get(queue); + return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1)); +} + +/* * return current Queue Entry, increment Queue Entry iterator by one * step in struct ipz_queue, will wrap in ringbuffer * returns address (kv) of Queue Entry BEFORE increment * returns 0 and does not increment, if wrong valid state * warning don't use in parallel with ipz_qpageit_get_inc() - * warning unpredictable results may occur if steps>act_nr_of_queue_entries */ static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) { - struct ehca_cqe *cqe = ipz_qeit_get(queue); - u32 cqe_flags = cqe->cqe_flags; - - if ((cqe_flags >> 7) != (queue->toggle_state & 1)) - return NULL; - - ipz_qeit_get_inc(queue); - return cqe; -} - -static inline int ipz_qeit_is_valid(struct ipz_queue *queue) -{ - struct ehca_cqe *cqe = ipz_qeit_get(queue); - u32 cqe_flags = cqe->cqe_flags; - - return cqe_flags >> 7 == (queue->toggle_state & 1); + return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL; } /* diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig index 90c1454..044da58 100644 --- a/drivers/infiniband/hw/ipath/Kconfig +++ b/drivers/infiniband/hw/ipath/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_IPATH tristate "QLogic InfiniPath Driver" - depends on (PCI_MSI || HT_IRQ) && 64BIT && INFINIBAND && NET + depends on (PCI_MSI || HT_IRQ) && 64BIT && NET ---help--- This is a driver for QLogic InfiniPath host channel adapters, including InfiniBand verbs support. This driver allows these diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index 10c008f..b4b786d 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -189,8 +189,7 @@ typedef enum _ipath_ureg { #define IPATH_RUNTIME_FORCE_WC_ORDER 0x4 #define IPATH_RUNTIME_RCVHDR_COPY 0x8 #define IPATH_RUNTIME_MASTER 0x10 -#define IPATH_RUNTIME_PBC_REWRITE 0x20 -#define IPATH_RUNTIME_LOOSE_DMA_ALIGN 0x40 +/* 0x20 and 0x40 are no longer used, but are reserved for ABI compatibility */ /* * This structure is returned by ipath_userinit() immediately after @@ -432,8 +431,15 @@ struct ipath_user_info { #define IPATH_CMD_UNUSED_1 25 #define IPATH_CMD_UNUSED_2 26 #define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */ +#define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */ -#define IPATH_CMD_MAX 27 +#define IPATH_CMD_MAX 28 + +/* + * Poll types + */ +#define IPATH_POLL_TYPE_URGENT 0x01 +#define IPATH_POLL_TYPE_OVERFLOW 0x02 struct ipath_port_info { __u32 num_active; /* number of active units */ @@ -474,6 +480,8 @@ struct ipath_cmd { __u16 part_key; /* user address of __u32 bitmask of active slaves */ __u64 slave_mask_addr; + /* type of polling we want */ + __u16 poll_type; } cmd; }; @@ -502,13 +510,30 @@ struct __ipath_sendpkt { struct ipath_iovec sps_iov[4]; }; -/* Passed into diag data special file's ->write method. */ +/* + * diagnostics can send a packet by "writing" one of the following + * two structs to diag data special file + * The first is the legacy version for backward compatibility + */ struct ipath_diag_pkt { __u32 unit; __u64 data; __u32 len; }; +/* The second diag_pkt struct is the expanded version that allows + * more control over the packet, specifically, by allowing a custom + * pbc (+ extra) qword, so that special modes and deliberate + * changes to CRCs can be used. The elements were also re-ordered + * for better alignment and to avoid padding issues. + */ +struct ipath_diag_xpkt { + __u64 data; + __u64 pbc_wd; + __u32 unit; + __u32 len; +}; + /* * Data layout in I2C flash (for GUID, etc.) * All fields are little-endian binary unless otherwise stated diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 3e9241b..a6f04d2 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -90,6 +90,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) wc->queue[head].sl = entry->sl; wc->queue[head].dlid_path_bits = entry->dlid_path_bits; wc->queue[head].port_num = entry->port_num; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); wc->head = next; if (cq->notify == IB_CQ_NEXT_COMP || @@ -139,7 +141,8 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) if (tail == wc->head) break; - + /* Make sure entry is read after head index is read. */ + smp_rmb(); qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table, wc->queue[tail].qp_num); entry->qp = &qp->ibqp; diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h index 42bfbdb..19c56e6 100644 --- a/drivers/infiniband/hw/ipath/ipath_debug.h +++ b/drivers/infiniband/hw/ipath/ipath_debug.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index 63e8368..a698f19 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -323,13 +323,14 @@ static ssize_t ipath_diagpkt_write(struct file *fp, { u32 __iomem *piobuf; u32 plen, clen, pbufn; - struct ipath_diag_pkt dp; + struct ipath_diag_pkt odp; + struct ipath_diag_xpkt dp; u32 *tmpbuf = NULL; struct ipath_devdata *dd; ssize_t ret = 0; u64 val; - if (count < sizeof(dp)) { + if (count != sizeof(dp)) { ret = -EINVAL; goto bail; } @@ -339,6 +340,29 @@ static ssize_t ipath_diagpkt_write(struct file *fp, goto bail; } + /* + * Due to padding/alignment issues (lessened with new struct) + * the old and new structs are the same length. We need to + * disambiguate them, which we can do because odp.len has never + * been less than the total of LRH+BTH+DETH so far, while + * dp.unit (same offset) unit is unlikely to get that high. + * Similarly, dp.data, the pointer to user at the same offset + * as odp.unit, is almost certainly at least one (512byte)page + * "above" NULL. The if-block below can be omitted if compatibility + * between a new driver and older diagnostic code is unimportant. + * compatibility the other direction (new diags, old driver) is + * handled in the diagnostic code, with a warning. + */ + if (dp.unit >= 20 && dp.data < 512) { + /* very probable version mismatch. Fix it up */ + memcpy(&odp, &dp, sizeof(odp)); + /* We got a legacy dp, copy elements to dp */ + dp.unit = odp.unit; + dp.data = odp.data; + dp.len = odp.len; + dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */ + } + /* send count must be an exact number of dwords */ if (dp.len & 3) { ret = -EINVAL; @@ -371,9 +395,10 @@ static ssize_t ipath_diagpkt_write(struct file *fp, ret = -ENODEV; goto bail; } + /* Check link state, but not if we have custom PBC */ val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK; - if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM && - val != IPATH_IBSTATE_ACTIVE) { + if (!dp.pbc_wd && val != IPATH_IBSTATE_INIT && + val != IPATH_IBSTATE_ARM && val != IPATH_IBSTATE_ACTIVE) { ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n", dd->ipath_unit, (unsigned long long) val); ret = -EINVAL; @@ -419,9 +444,13 @@ static ssize_t ipath_diagpkt_write(struct file *fp, ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n", dd->ipath_unit, plen - 1, pbufn); + if (dp.pbc_wd == 0) + /* Legacy operation, use computed pbc_wd */ + dp.pbc_wd = plen; + /* we have to flush after the PBC for correctness on some cpus * or WC buffer can be written out of order */ - writeq(plen, piobuf); + writeq(dp.pbc_wd, piobuf); ipath_flush_wc(); /* copy all by the trigger word, then flush, so it's written * to chip before trigger word, then write trigger word, then diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index e3a2232..9361f5a 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -104,6 +104,9 @@ static int __devinit ipath_init_one(struct pci_dev *, #define PCI_DEVICE_ID_INFINIPATH_HT 0xd #define PCI_DEVICE_ID_INFINIPATH_PE800 0x10 +/* Number of seconds before our card status check... */ +#define STATUS_TIMEOUT 60 + static const struct pci_device_id ipath_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) }, { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) }, @@ -119,6 +122,18 @@ static struct pci_driver ipath_driver = { .id_table = ipath_pci_tbl, }; +static void ipath_check_status(struct work_struct *work) +{ + struct ipath_devdata *dd = container_of(work, struct ipath_devdata, + status_work.work); + + /* + * If we don't have any interrupts, let the user know and + * don't bother checking again. + */ + if (dd->ipath_int_counter == 0) + dev_err(&dd->pcidev->dev, "No interrupts detected.\n"); +} static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev, u32 *bar0, u32 *bar1) @@ -187,6 +202,8 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) dd->pcidev = pdev; pci_set_drvdata(pdev, dd); + INIT_DELAYED_WORK(&dd->status_work, ipath_check_status); + list_add(&dd->ipath_list, &ipath_dev_list); bail_unlock: @@ -270,7 +287,6 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, struct ipath_devdata *dd; unsigned long long addr; u32 bar0 = 0, bar1 = 0; - u8 rev; dd = ipath_alloc_devdata(pdev); if (IS_ERR(dd)) { @@ -432,13 +448,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, dd->ipath_deviceid = ent->device; /* save for later use */ dd->ipath_vendorid = ent->vendor; - ret = pci_read_config_byte(pdev, PCI_REVISION_ID, &rev); - if (ret) { - ipath_dev_err(dd, "Failed to read PCI revision ID unit " - "%u: err %d\n", dd->ipath_unit, -ret); - goto bail_regions; /* shouldn't ever happen */ - } - dd->ipath_pcirev = rev; + dd->ipath_pcirev = pdev->revision; #if defined(__powerpc__) /* There isn't a generic way to specify writethrough mappings */ @@ -511,6 +521,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, ipath_diag_add(dd); ipath_register_ib_device(dd); + /* Check that card status in STATUS_TIMEOUT seconds. */ + schedule_delayed_work(&dd->status_work, HZ * STATUS_TIMEOUT); + goto bail; bail_irqsetup: @@ -638,6 +651,9 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev) */ ipath_shutdown_device(dd); + cancel_delayed_work(&dd->status_work); + flush_scheduled_work(); + if (dd->verbs_dev) ipath_unregister_ib_device(dd->verbs_dev); @@ -706,9 +722,9 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, u64 sendctrl, sendorig; ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first); - sendorig = dd->ipath_sendctrl | INFINIPATH_S_DISARM; + sendorig = dd->ipath_sendctrl; for (i = first; i < last; i++) { - sendctrl = sendorig | + sendctrl = sendorig | INFINIPATH_S_DISARM | (i << INFINIPATH_S_DISARMPIOBUF_SHIFT); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, sendctrl); @@ -719,12 +735,12 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, * while we were looping; no critical bits that would require * locking. * - * Write a 0, and then the original value, reading scratch in + * disable PIOAVAILUPD, then re-enable, reading scratch in * between. This seems to avoid a chip timing race that causes * pioavail updates to memory to stop. */ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - 0); + sendorig & ~IPATH_S_PIOBUFAVAILUPD); sendorig = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); @@ -1021,14 +1037,10 @@ void ipath_kreceive(struct ipath_devdata *dd) goto bail; } - /* There is already a thread processing this queue. */ - if (test_and_set_bit(0, &dd->ipath_rcv_pending)) - goto bail; - l = dd->ipath_port0head; hdrqtail = (u32) le64_to_cpu(*dd->ipath_hdrqtailptr); if (l == hdrqtail) - goto done; + goto bail; reloop: for (i = 0; l != hdrqtail; i++) { @@ -1163,10 +1175,6 @@ reloop: ipath_stats.sps_avgpkts_call = ipath_stats.sps_port0pkts / ++totcalls; -done: - clear_bit(0, &dd->ipath_rcv_pending); - smp_mb__after_clear_bit(); - bail:; } @@ -1596,6 +1604,35 @@ int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd) return ret; } + +/* + * Flush all sends that might be in the ready to send state, as well as any + * that are in the process of being sent. Used whenever we need to be + * sure the send side is idle. Cleans up all buffer state by canceling + * all pio buffers, and issuing an abort, which cleans up anything in the + * launch fifo. The cancel is superfluous on some chip versions, but + * it's safer to always do it. + * PIOAvail bits are updated by the chip as if normal send had happened. + */ +void ipath_cancel_sends(struct ipath_devdata *dd) +{ + ipath_dbg("Cancelling all in-progress send buffers\n"); + dd->ipath_lastcancel = jiffies+HZ/2; /* skip armlaunch errs a bit */ + /* + * the abort bit is auto-clearing. We read scratch to be sure + * that cancels and the abort have taken effect in the chip. + */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + INFINIPATH_S_ABORT); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + ipath_disarm_piobufs(dd, 0, + (unsigned)(dd->ipath_piobcnt2k + dd->ipath_piobcnt4k)); + + /* and again, be sure all have hit the chip */ + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); +} + + static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) { static const char *what[4] = { @@ -1617,14 +1654,8 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]); /* flush all queued sends when going to DOWN or INIT, to be sure that * they don't block MAD packets */ - if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) { - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - INFINIPATH_S_ABORT); - ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf, - (unsigned)(dd->ipath_piobcnt2k + - dd->ipath_piobcnt4k) - - dd->ipath_lastport_piobuf); - } + if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) + ipath_cancel_sends(dd); ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, dd->ipath_ibcctrl | which); @@ -1846,6 +1877,87 @@ void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno, ipath_write_kreg(dd, where, value); } +/* + * Following deal with the "obviously simple" task of overriding the state + * of the LEDS, which normally indicate link physical and logical status. + * The complications arise in dealing with different hardware mappings + * and the board-dependent routine being called from interrupts. + * and then there's the requirement to _flash_ them. + */ +#define LED_OVER_FREQ_SHIFT 8 +#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT) +/* Below is "non-zero" to force override, but both actual LEDs are off */ +#define LED_OVER_BOTH_OFF (8) + +void ipath_run_led_override(unsigned long opaque) +{ + struct ipath_devdata *dd = (struct ipath_devdata *)opaque; + int timeoff; + int pidx; + u64 lstate, ltstate, val; + + if (!(dd->ipath_flags & IPATH_INITTED)) + return; + + pidx = dd->ipath_led_override_phase++ & 1; + dd->ipath_led_override = dd->ipath_led_override_vals[pidx]; + timeoff = dd->ipath_led_override_timeoff; + + /* + * below potentially restores the LED values per current status, + * should also possibly setup the traffic-blink register, + * but leave that to per-chip functions. + */ + val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); + ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & + INFINIPATH_IBCS_LINKTRAININGSTATE_MASK; + lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) & + INFINIPATH_IBCS_LINKSTATE_MASK; + + dd->ipath_f_setextled(dd, lstate, ltstate); + mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff); +} + +void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val) +{ + int timeoff, freq; + + if (!(dd->ipath_flags & IPATH_INITTED)) + return; + + /* First check if we are blinking. If not, use 1HZ polling */ + timeoff = HZ; + freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT; + + if (freq) { + /* For blink, set each phase from one nybble of val */ + dd->ipath_led_override_vals[0] = val & 0xF; + dd->ipath_led_override_vals[1] = (val >> 4) & 0xF; + timeoff = (HZ << 4)/freq; + } else { + /* Non-blink set both phases the same. */ + dd->ipath_led_override_vals[0] = val & 0xF; + dd->ipath_led_override_vals[1] = val & 0xF; + } + dd->ipath_led_override_timeoff = timeoff; + + /* + * If the timer has not already been started, do so. Use a "quick" + * timeout so the function will be called soon, to look at our request. + */ + if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) { + /* Need to start timer */ + init_timer(&dd->ipath_led_override_timer); + dd->ipath_led_override_timer.function = + ipath_run_led_override; + dd->ipath_led_override_timer.data = (unsigned long) dd; + dd->ipath_led_override_timer.expires = jiffies + 1; + add_timer(&dd->ipath_led_override_timer); + } else { + atomic_dec(&dd->ipath_led_override_timer_active); + } +} + /** * ipath_shutdown_device - shut down a device * @dd: the infinipath device @@ -1886,17 +1998,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd) */ udelay(5); - /* - * abort any armed or launched PIO buffers that didn't go. (self - * clearing). Will cause any packet currently being transmitted to - * go out with an EBP, and may also cause a short packet error on - * the receiver. - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - INFINIPATH_S_ABORT); - ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE << INFINIPATH_IBCC_LINKINITCMD_SHIFT); + ipath_cancel_sends(dd); /* disable IBC */ dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; @@ -1909,7 +2013,6 @@ void ipath_shutdown_device(struct ipath_devdata *dd) * Turn the LEDs off explictly for the same reason. */ dd->ipath_f_quiet_serdes(dd); - dd->ipath_f_setextled(dd, 0, 0); if (dd->ipath_stats_timer_active) { del_timer_sync(&dd->ipath_stats_timer); @@ -1925,6 +2028,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd) ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED); ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); + + ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n"); + ipath_update_eeprom_log(dd); } /** @@ -2085,6 +2191,16 @@ int ipath_reset_device(int unit) goto bail; } + if (atomic_read(&dd->ipath_led_override_timer_active)) { + /* Need to stop LED timer, _then_ shut off LEDs */ + del_timer_sync(&dd->ipath_led_override_timer); + atomic_set(&dd->ipath_led_override_timer_active, 0); + } + + /* Shut off LEDs after we are sure timer is not running */ + dd->ipath_led_override = LED_OVER_BOTH_OFF; + dd->ipath_f_setextled(dd, 0, 0); + dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit); if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) { diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index 030185f..6b91479 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -95,39 +95,37 @@ static int i2c_gpio_set(struct ipath_devdata *dd, enum i2c_type line, enum i2c_state new_line_state) { - u64 read_val, write_val, mask, *gpioval; + u64 out_mask, dir_mask, *gpioval; + unsigned long flags = 0; gpioval = &dd->ipath_gpio_out; - read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); - if (line == i2c_line_scl) - mask = dd->ipath_gpio_scl; - else - mask = dd->ipath_gpio_sda; - if (new_line_state == i2c_line_high) + if (line == i2c_line_scl) { + dir_mask = dd->ipath_gpio_scl; + out_mask = (1UL << dd->ipath_gpio_scl_num); + } else { + dir_mask = dd->ipath_gpio_sda; + out_mask = (1UL << dd->ipath_gpio_sda_num); + } + + spin_lock_irqsave(&dd->ipath_gpio_lock, flags); + if (new_line_state == i2c_line_high) { /* tri-state the output rather than force high */ - write_val = read_val & ~mask; - else + dd->ipath_extctrl &= ~dir_mask; + } else { /* config line to be an output */ - write_val = read_val | mask; - ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val); + dd->ipath_extctrl |= dir_mask; + } + ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl); - /* set high and verify */ + /* set output as well (no real verify) */ if (new_line_state == i2c_line_high) - write_val = 0x1UL; + *gpioval |= out_mask; else - write_val = 0x0UL; + *gpioval &= ~out_mask; - if (line == i2c_line_scl) { - write_val <<= dd->ipath_gpio_scl_num; - *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_scl_num); - *gpioval |= write_val; - } else { - write_val <<= dd->ipath_gpio_sda_num; - *gpioval = *gpioval & ~(1UL << dd->ipath_gpio_sda_num); - *gpioval |= write_val; - } ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval); + spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); return 0; } @@ -145,8 +143,9 @@ static int i2c_gpio_get(struct ipath_devdata *dd, enum i2c_type line, enum i2c_state *curr_statep) { - u64 read_val, write_val, mask; + u64 read_val, mask; int ret; + unsigned long flags = 0; /* check args */ if (curr_statep == NULL) { @@ -154,15 +153,21 @@ static int i2c_gpio_get(struct ipath_devdata *dd, goto bail; } - read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); /* config line to be an input */ if (line == i2c_line_scl) mask = dd->ipath_gpio_scl; else mask = dd->ipath_gpio_sda; - write_val = read_val & ~mask; - ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val); + + spin_lock_irqsave(&dd->ipath_gpio_lock, flags); + dd->ipath_extctrl &= ~mask; + ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl); + /* + * Below is very unlikely to reflect true input state if Output + * Enable actually changed. + */ read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); + spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); if (read_val & mask) *curr_statep = i2c_line_high; @@ -192,6 +197,7 @@ static void i2c_wait_for_writes(struct ipath_devdata *dd) static void scl_out(struct ipath_devdata *dd, u8 bit) { + udelay(1); i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low); i2c_wait_for_writes(dd); @@ -314,12 +320,18 @@ static int eeprom_reset(struct ipath_devdata *dd) int clock_cycles_left = 9; u64 *gpioval = &dd->ipath_gpio_out; int ret; + unsigned long flags; - eeprom_init = 1; + spin_lock_irqsave(&dd->ipath_gpio_lock, flags); + /* Make sure shadows are consistent */ + dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); *gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out); + spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); + ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg " "is %llx\n", (unsigned long long) *gpioval); + eeprom_init = 1; /* * This is to get the i2c into a known state, by first going low, * then tristate sda (and then tristate scl as first thing @@ -355,8 +367,8 @@ bail: * @len: number of bytes to receive */ -int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset, - void *buffer, int len) +static int ipath_eeprom_internal_read(struct ipath_devdata *dd, + u8 eeprom_offset, void *buffer, int len) { /* compiler complains unless initialized */ u8 single_byte = 0; @@ -406,6 +418,7 @@ bail: return ret; } + /** * ipath_eeprom_write - writes data to the eeprom via I2C * @dd: the infinipath device @@ -413,8 +426,8 @@ bail: * @buffer: data to write * @len: number of bytes to write */ -int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset, - const void *buffer, int len) +int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset, + const void *buffer, int len) { u8 single_byte; int sub_len; @@ -488,6 +501,38 @@ bail: return ret; } +/* + * The public entry-points ipath_eeprom_read() and ipath_eeprom_write() + * are now just wrappers around the internal functions. + */ +int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset, + void *buff, int len) +{ + int ret; + + ret = down_interruptible(&dd->ipath_eep_sem); + if (!ret) { + ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len); + up(&dd->ipath_eep_sem); + } + + return ret; +} + +int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset, + const void *buff, int len) +{ + int ret; + + ret = down_interruptible(&dd->ipath_eep_sem); + if (!ret) { + ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len); + up(&dd->ipath_eep_sem); + } + + return ret; +} + static u8 flash_csum(struct ipath_flash *ifp, int adjust) { u8 *ip = (u8 *) ifp; @@ -515,7 +560,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) void *buf; struct ipath_flash *ifp; __be64 guid; - int len; + int len, eep_stat; u8 csum, *bguid; int t = dd->ipath_unit; struct ipath_devdata *dd0 = ipath_lookup(0); @@ -559,7 +604,11 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) goto bail; } - if (ipath_eeprom_read(dd, 0, buf, len)) { + down(&dd->ipath_eep_sem); + eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len); + up(&dd->ipath_eep_sem); + + if (eep_stat) { ipath_dev_err(dd, "Failed reading GUID from eeprom\n"); goto done; } @@ -634,8 +683,192 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", (unsigned long long) be64_to_cpu(dd->ipath_guid)); + memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT); + /* + * Power-on (actually "active") hours are kept as little-endian value + * in EEPROM, but as seconds in a (possibly as small as 24-bit) + * atomic_t while running. + */ + atomic_set(&dd->ipath_active_time, 0); + dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8); + done: vfree(buf); bail:; } + +/** + * ipath_update_eeprom_log - copy active-time and error counters to eeprom + * @dd: the infinipath device + * + * Although the time is kept as seconds in the ipath_devdata struct, it is + * rounded to hours for re-write, as we have only 16 bits in EEPROM. + * First-cut code reads whole (expected) struct ipath_flash, modifies, + * re-writes. Future direction: read/write only what we need, assuming + * that the EEPROM had to have been "good enough" for driver init, and + * if not, we aren't making it worse. + * + */ + +int ipath_update_eeprom_log(struct ipath_devdata *dd) +{ + void *buf; + struct ipath_flash *ifp; + int len, hi_water; + uint32_t new_time, new_hrs; + u8 csum; + int ret, idx; + unsigned long flags; + + /* first, check if we actually need to do anything. */ + ret = 0; + for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { + if (dd->ipath_eep_st_new_errs[idx]) { + ret = 1; + break; + } + } + new_time = atomic_read(&dd->ipath_active_time); + + if (ret == 0 && new_time < 3600) + return 0; + + /* + * The quick-check above determined that there is something worthy + * of logging, so get current contents and do a more detailed idea. + */ + len = offsetof(struct ipath_flash, if_future); + buf = vmalloc(len); + ret = 1; + if (!buf) { + ipath_dev_err(dd, "Couldn't allocate memory to read %u " + "bytes from eeprom for logging\n", len); + goto bail; + } + + /* Grab semaphore and read current EEPROM. If we get an + * error, let go, but if not, keep it until we finish write. + */ + ret = down_interruptible(&dd->ipath_eep_sem); + if (ret) { + ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n"); + goto free_bail; + } + ret = ipath_eeprom_internal_read(dd, 0, buf, len); + if (ret) { + up(&dd->ipath_eep_sem); + ipath_dev_err(dd, "Unable read EEPROM for logging\n"); + goto free_bail; + } + ifp = (struct ipath_flash *)buf; + + csum = flash_csum(ifp, 0); + if (csum != ifp->if_csum) { + up(&dd->ipath_eep_sem); + ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n", + csum, ifp->if_csum); + ret = 1; + goto free_bail; + } + hi_water = 0; + spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); + for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { + int new_val = dd->ipath_eep_st_new_errs[idx]; + if (new_val) { + /* + * If we have seen any errors, add to EEPROM values + * We need to saturate at 0xFF (255) and we also + * would need to adjust the checksum if we were + * trying to minimize EEPROM traffic + * Note that we add to actual current count in EEPROM, + * in case it was altered while we were running. + */ + new_val += ifp->if_errcntp[idx]; + if (new_val > 0xFF) + new_val = 0xFF; + if (ifp->if_errcntp[idx] != new_val) { + ifp->if_errcntp[idx] = new_val; + hi_water = offsetof(struct ipath_flash, + if_errcntp) + idx; + } + /* + * update our shadow (used to minimize EEPROM + * traffic), to match what we are about to write. + */ + dd->ipath_eep_st_errs[idx] = new_val; + dd->ipath_eep_st_new_errs[idx] = 0; + } + } + /* + * now update active-time. We would like to round to the nearest hour + * but unless atomic_t are sure to be proper signed ints we cannot, + * because we need to account for what we "transfer" to EEPROM and + * if we log an hour at 31 minutes, then we would need to set + * active_time to -29 to accurately count the _next_ hour. + */ + if (new_time > 3600) { + new_hrs = new_time / 3600; + atomic_sub((new_hrs * 3600), &dd->ipath_active_time); + new_hrs += dd->ipath_eep_hrs; + if (new_hrs > 0xFFFF) + new_hrs = 0xFFFF; + dd->ipath_eep_hrs = new_hrs; + if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) { + ifp->if_powerhour[0] = new_hrs & 0xFF; + hi_water = offsetof(struct ipath_flash, if_powerhour); + } + if ((new_hrs >> 8) != ifp->if_powerhour[1]) { + ifp->if_powerhour[1] = new_hrs >> 8; + hi_water = offsetof(struct ipath_flash, if_powerhour) + + 1; + } + } + /* + * There is a tiny possibility that we could somehow fail to write + * the EEPROM after updating our shadows, but problems from holding + * the spinlock too long are a much bigger issue. + */ + spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); + if (hi_water) { + /* we made some change to the data, uopdate cksum and write */ + csum = flash_csum(ifp, 1); + ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1); + } + up(&dd->ipath_eep_sem); + if (ret) + ipath_dev_err(dd, "Failed updating EEPROM\n"); + +free_bail: + vfree(buf); +bail: + return ret; + +} + +/** + * ipath_inc_eeprom_err - increment one of the four error counters + * that are logged to EEPROM. + * @dd: the infinipath device + * @eidx: 0..3, the counter to increment + * @incr: how much to add + * + * Each counter is 8-bits, and saturates at 255 (0xFF). They + * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log() + * is called, but it can only be called in a context that allows sleep. + * This function can be called even at interrupt level. + */ + +void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr) +{ + uint new_val; + unsigned long flags; + + spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); + new_val = dd->ipath_eep_st_new_errs[eidx] + incr; + if (new_val > 255) + new_val = 255; + dd->ipath_eep_st_new_errs[eidx] = new_val; + spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); + return; +} diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 1272aaf..33ab0d6 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -396,7 +396,8 @@ static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp, "TID %u, vaddr %lx, physaddr %llx pgp %p\n", tid, vaddr, (unsigned long long) physaddr, pagep[i]); - dd->ipath_f_put_tid(dd, &tidbase[tid], 1, physaddr); + dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED, + physaddr); /* * don't check this tid in ipath_portshadow, since we * just filled it in; start with the next one. @@ -422,7 +423,8 @@ static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp, if (dd->ipath_pageshadow[porttid + tid]) { ipath_cdbg(VERBOSE, "Freeing TID %u\n", tid); - dd->ipath_f_put_tid(dd, &tidbase[tid], 1, + dd->ipath_f_put_tid(dd, &tidbase[tid], + RCVHQ_RCV_TYPE_EXPECTED, dd->ipath_tidinvalid); pci_unmap_page(dd->pcidev, dd->ipath_physshadow[porttid + tid], @@ -538,7 +540,8 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport, if (dd->ipath_pageshadow[porttid + tid]) { ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", pd->port_pid, tid); - dd->ipath_f_put_tid(dd, &tidbase[tid], 1, + dd->ipath_f_put_tid(dd, &tidbase[tid], + RCVHQ_RCV_TYPE_EXPECTED, dd->ipath_tidinvalid); pci_unmap_page(dd->pcidev, dd->ipath_physshadow[porttid + tid], @@ -921,7 +924,8 @@ static int ipath_create_user_egr(struct ipath_portdata *pd) (u64 __iomem *) ((char __iomem *) dd->ipath_kregbase + - dd->ipath_rcvegrbase), 0, pa); + dd->ipath_rcvegrbase), + RCVHQ_RCV_TYPE_EAGER, pa); pa += egrsize; } cond_resched(); /* don't hog the cpu */ @@ -1337,68 +1341,133 @@ bail: return ret; } -static unsigned int ipath_poll(struct file *fp, - struct poll_table_struct *pt) +static unsigned int ipath_poll_urgent(struct ipath_portdata *pd, + struct file *fp, + struct poll_table_struct *pt) { - struct ipath_portdata *pd; - u32 head, tail; - int bit; unsigned pollflag = 0; struct ipath_devdata *dd; - pd = port_fp(fp); - if (!pd) - goto bail; dd = pd->port_dd; - bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT; - set_bit(bit, &dd->ipath_rcvctrl); + if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) { + pollflag |= POLLERR; + clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag); + } - /* - * Before blocking, make sure that head is still == tail, - * reading from the chip, so we can be sure the interrupt - * enable has made it to the chip. If not equal, disable - * interrupt again and return immediately. This avoids races, - * and the overhead of the chip read doesn't matter much at - * this point, since we are waiting for something anyway. - */ + if (test_bit(IPATH_PORT_WAITING_URG, &pd->int_flag)) { + pollflag |= POLLIN | POLLRDNORM; + clear_bit(IPATH_PORT_WAITING_URG, &pd->int_flag); + } - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); + if (!pollflag) { + set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag); + if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) + set_bit(IPATH_PORT_WAITING_OVERFLOW, + &pd->port_flag); + + poll_wait(fp, &pd->port_wait, pt); + } + + return pollflag; +} + +static unsigned int ipath_poll_next(struct ipath_portdata *pd, + struct file *fp, + struct poll_table_struct *pt) +{ + u32 head, tail; + unsigned pollflag = 0; + struct ipath_devdata *dd; + + dd = pd->port_dd; head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port); - tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); + tail = *(volatile u64 *)pd->port_rcvhdrtail_kvaddr; - if (tail == head) { + if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) { + pollflag |= POLLERR; + clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag); + } + + if (tail != head || + test_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag)) { + pollflag |= POLLIN | POLLRDNORM; + clear_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag); + } + + if (!pollflag) { set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); + if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) + set_bit(IPATH_PORT_WAITING_OVERFLOW, + &pd->port_flag); + + set_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT, + &dd->ipath_rcvctrl); + + ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, + dd->ipath_rcvctrl); + if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ - (void)ipath_write_ureg(dd, ur_rcvhdrhead, - dd->ipath_rhdrhead_intr_off - | head, pd->port_port); - poll_wait(fp, &pd->port_wait, pt); + ipath_write_ureg(dd, ur_rcvhdrhead, + dd->ipath_rhdrhead_intr_off | head, + pd->port_port); - if (test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) { - /* timed out, no packets received */ - clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); - pd->port_rcvwait_to++; - } - else - pollflag = POLLIN | POLLRDNORM; - } - else { - /* it's already happened; don't do wait_event overhead */ - pollflag = POLLIN | POLLRDNORM; - pd->port_rcvnowait++; + poll_wait(fp, &pd->port_wait, pt); } - clear_bit(bit, &dd->ipath_rcvctrl); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); + return pollflag; +} + +static unsigned int ipath_poll(struct file *fp, + struct poll_table_struct *pt) +{ + struct ipath_portdata *pd; + unsigned pollflag; + + pd = port_fp(fp); + if (!pd) + pollflag = 0; + else if (pd->poll_type & IPATH_POLL_TYPE_URGENT) + pollflag = ipath_poll_urgent(pd, fp, pt); + else + pollflag = ipath_poll_next(pd, fp, pt); -bail: return pollflag; } +static int ipath_supports_subports(int user_swmajor, int user_swminor) +{ + /* no subport implementation prior to software version 1.3 */ + return (user_swmajor > 1) || (user_swminor >= 3); +} + +static int ipath_compatible_subports(int user_swmajor, int user_swminor) +{ + /* this code is written long-hand for clarity */ + if (IPATH_USER_SWMAJOR != user_swmajor) { + /* no promise of compatibility if major mismatch */ + return 0; + } + if (IPATH_USER_SWMAJOR == 1) { + switch (IPATH_USER_SWMINOR) { + case 0: + case 1: + case 2: + /* no subport implementation so cannot be compatible */ + return 0; + case 3: + /* 3 is only compatible with itself */ + return user_swminor == 3; + default: + /* >= 4 are compatible (or are expected to be) */ + return user_swminor >= 4; + } + } + /* make no promises yet for future major versions */ + return 0; +} + static int init_subports(struct ipath_devdata *dd, struct ipath_portdata *pd, const struct ipath_user_info *uinfo) @@ -1408,20 +1477,32 @@ static int init_subports(struct ipath_devdata *dd, size_t size; /* - * If the user is requesting zero or one port, + * If the user is requesting zero subports, * skip the subport allocation. */ - if (uinfo->spu_subport_cnt <= 1) + if (uinfo->spu_subport_cnt <= 0) + goto bail; + + /* Self-consistency check for ipath_compatible_subports() */ + if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) && + !ipath_compatible_subports(IPATH_USER_SWMAJOR, + IPATH_USER_SWMINOR)) { + dev_info(&dd->pcidev->dev, + "Inconsistent ipath_compatible_subports()\n"); goto bail; + } - /* Old user binaries don't know about new subport implementation */ - if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) { + /* Check for subport compatibility */ + if (!ipath_compatible_subports(uinfo->spu_userversion >> 16, + uinfo->spu_userversion & 0xffff)) { dev_info(&dd->pcidev->dev, - "Mismatched user minor version (%d) and driver " - "minor version (%d) while port sharing. Ensure " + "Mismatched user version (%d.%d) and driver " + "version (%d.%d) while port sharing. Ensure " "that driver and library are from the same " "release.\n", + (int) (uinfo->spu_userversion >> 16), (int) (uinfo->spu_userversion & 0xffff), + IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR); goto bail; } @@ -1725,14 +1806,13 @@ static int ipath_open(struct inode *in, struct file *fp) return fp->private_data ? 0 : -ENOMEM; } - /* Get port early, so can set affinity prior to memory allocation */ static int ipath_assign_port(struct file *fp, const struct ipath_user_info *uinfo) { int ret; int i_minor; - unsigned swminor; + unsigned swmajor, swminor; /* Check to be sure we haven't already initialized this file */ if (port_fp(fp)) { @@ -1741,7 +1821,8 @@ static int ipath_assign_port(struct file *fp, } /* for now, if major version is different, bail */ - if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) { + swmajor = uinfo->spu_userversion >> 16; + if (swmajor != IPATH_USER_SWMAJOR) { ipath_dbg("User major version %d not same as driver " "major %d\n", uinfo->spu_userversion >> 16, IPATH_USER_SWMAJOR); @@ -1756,7 +1837,8 @@ static int ipath_assign_port(struct file *fp, mutex_lock(&ipath_mutex); - if (swminor == IPATH_USER_SWMINOR && uinfo->spu_subport_cnt && + if (ipath_compatible_subports(swmajor, swminor) && + uinfo->spu_subport_cnt && (ret = find_shared_port(fp, uinfo))) { mutex_unlock(&ipath_mutex); if (ret > 0) @@ -2020,7 +2102,8 @@ static int ipath_port_info(struct ipath_portdata *pd, u16 subport, info.port = pd->port_port; info.subport = subport; /* Don't return new fields if old library opened the port. */ - if ((pd->userversion & 0xffff) == IPATH_USER_SWMINOR) { + if (ipath_supports_subports(pd->userversion >> 16, + pd->userversion & 0xffff)) { /* Number of user ports available for this device. */ info.num_ports = pd->port_dd->ipath_cfgports - 1; info.num_subports = pd->port_subport_cnt; @@ -2123,6 +2206,11 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, src = NULL; dest = NULL; break; + case IPATH_CMD_POLL_TYPE: + copy = sizeof(cmd.cmd.poll_type); + dest = &cmd.cmd.poll_type; + src = &ucmd->cmd.poll_type; + break; default: ret = -EINVAL; goto bail; @@ -2195,6 +2283,9 @@ static ssize_t ipath_write(struct file *fp, const char __user *data, case IPATH_CMD_PIOAVAILUPD: ret = ipath_force_pio_avail_update(pd->port_dd); break; + case IPATH_CMD_POLL_TYPE: + pd->poll_type = cmd.cmd.poll_type; + break; } if (ret >= 0) diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index ebd5c7b..2e689b9 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -257,9 +257,14 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf, /* Notimpl InitType (actually, an SMA decision) */ /* VLHighLimit is 0 (only one VL) */ ; /* VLArbitrationHighCap is 0 (only one VL) */ + /* + * Note: the chips support a maximum MTU of 4096, but the driver + * hasn't implemented this feature yet, so set the maximum + * to 2048. + */ portinfo[10] = /* VLArbitrationLowCap is 0 (only one VL) */ /* InitTypeReply is SMA decision */ - (5 << 16) /* MTUCap 4096 */ + (4 << 16) /* MTUCap 2048 */ | (7 << 13) /* VLStallCount */ | (0x1f << 8) /* HOQLife */ | (1 << 4) diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 4171198..650745d 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -36,6 +36,7 @@ * HT chip. */ +#include <linux/vmalloc.h> #include <linux/pci.h> #include <linux/delay.h> #include <linux/htirq.h> @@ -439,6 +440,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, u32 bits, ctrl; int isfatal = 0; char bitsmsg[64]; + int log_idx; hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); @@ -467,6 +469,11 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, hwerrs &= dd->ipath_hwerrmask; + /* We log some errors to EEPROM, check if we have any of those. */ + for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) + if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log) + ipath_inc_eeprom_err(dd, log_idx, 1); + /* * make sure we get this much out, unless told to be quiet, * it's a parity error we may recover from, @@ -502,9 +509,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, if (!hwerrs) { ipath_dbg("Clearing freezemode on ignored or " "recovered hardware error\n"); - ctrl &= ~INFINIPATH_C_FREEZEMODE; - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - ctrl); + ipath_clear_freeze(dd); } } @@ -672,10 +677,16 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, if (n) snprintf(name, namelen, "%s", n); + if (dd->ipath_boardrev != 6 && dd->ipath_boardrev != 7 && + dd->ipath_boardrev != 11) { + ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name); + ret = 1; + goto bail; + } if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || - dd->ipath_minrev > 3)) { + dd->ipath_minrev > 4)) { /* - * This version of the driver only supports Rev 3.2 and 3.3 + * This version of the driver only supports Rev 3.2 - 3.4 */ ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n", @@ -689,36 +700,11 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, * copies */ dd->ipath_flags |= IPATH_32BITCOUNTERS; + dd->ipath_flags |= IPATH_GPIO_INTR; if (dd->ipath_htspeed != 800) ipath_dev_err(dd, "Incorrectly configured for HT @ %uMHz\n", dd->ipath_htspeed); - if (dd->ipath_boardrev == 7 || dd->ipath_boardrev == 11 || - dd->ipath_boardrev == 6) - dd->ipath_flags |= IPATH_GPIO_INTR; - else - dd->ipath_flags |= IPATH_POLL_RX_INTR; - if (dd->ipath_boardrev == 8) { /* LS/X-1 */ - u64 val; - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); - if (val & INFINIPATH_EXTS_SERDESSEL) { - /* - * hardware disabled - * - * This means that the chip is hardware disabled, - * and will not be able to bring up the link, - * in any case. We special case this and abort - * early, to avoid later messages. We also set - * the DISABLED status bit - */ - ipath_dbg("Unit %u is hardware-disabled\n", - dd->ipath_unit); - *dd->ipath_statusp |= IPATH_STATUS_DISABLED; - /* this value is handled differently */ - ret = 2; - goto bail; - } - } ret = 0; bail: @@ -1058,12 +1044,24 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd, u64 lst, u64 ltst) { u64 extctl; + unsigned long flags = 0; /* the diags use the LED to indicate diag info, so we leave * the external LED alone when the diags are running */ if (ipath_diag_inuse) return; + /* Allow override of LED display for, e.g. Locating system in rack */ + if (dd->ipath_led_override) { + ltst = (dd->ipath_led_override & IPATH_LED_PHYS) + ? INFINIPATH_IBCS_LT_STATE_LINKUP + : INFINIPATH_IBCS_LT_STATE_DISABLED; + lst = (dd->ipath_led_override & IPATH_LED_LOG) + ? INFINIPATH_IBCS_L_STATE_ACTIVE + : INFINIPATH_IBCS_L_STATE_DOWN; + } + + spin_lock_irqsave(&dd->ipath_gpio_lock, flags); /* * start by setting both LED control bits to off, then turn * on the appropriate bit(s). @@ -1092,6 +1090,7 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd, } dd->ipath_extctrl = extctl; ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl); + spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); } static void ipath_init_ht_variables(struct ipath_devdata *dd) @@ -1157,6 +1156,22 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd) dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; + + /* + * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. + * 2 is Some Misc, 3 is reserved for future. + */ + dd->ipath_eep_st_masks[0].hwerrs_to_log = + INFINIPATH_HWE_TXEMEMPARITYERR_MASK << + INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT; + + dd->ipath_eep_st_masks[1].hwerrs_to_log = + INFINIPATH_HWE_RXEMEMPARITYERR_MASK << + INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT; + + dd->ipath_eep_st_masks[2].errs_to_log = + INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET; + } /** @@ -1372,7 +1387,7 @@ static void ipath_ht_quiet_serdes(struct ipath_devdata *dd) * ipath_pe_put_tid - write a TID in chip * @dd: the infinipath device * @tidptr: pointer to the expected TID (in chip) to udpate - * @tidtype: 0 for eager, 1 for expected + * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing * * This exists as a separate routine to allow for special locking etc. @@ -1393,7 +1408,7 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd, "40 bits, using only 40!!!\n", pa); pa &= INFINIPATH_RT_ADDR_MASK; } - if (type == 0) + if (type == RCVHQ_RCV_TYPE_EAGER) pa |= dd->ipath_tidtemplate; else { /* in words (fixed, full page). */ @@ -1433,7 +1448,8 @@ static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port) port * dd->ipath_rcvtidcnt * sizeof(*tidbase)); for (i = 0; i < dd->ipath_rcvtidcnt; i++) - ipath_ht_put_tid(dd, &tidbase[i], 1, dd->ipath_tidinvalid); + ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED, + dd->ipath_tidinvalid); tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + dd->ipath_rcvegrbase + @@ -1441,7 +1457,8 @@ static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port) sizeof(*tidbase)); for (i = 0; i < dd->ipath_rcvegrcnt; i++) - ipath_ht_put_tid(dd, &tidbase[i], 0, dd->ipath_tidinvalid); + ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER, + dd->ipath_tidinvalid); } /** @@ -1528,11 +1545,6 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) writel(16, piobuf); piobuf += pioincr; } - /* - * self-clearing - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - INFINIPATH_S_ABORT); ipath_get_eeprom_info(dd); if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && @@ -1543,8 +1555,10 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) * with 128, rather than 112. */ dd->ipath_flags |= IPATH_GPIO_INTR; - dd->ipath_flags &= ~IPATH_POLL_RX_INTR; - } + } else + ipath_dev_err(dd, "Unsupported InfiniPath serial " + "number %.16s!\n", dd->ipath_serial); + return 0; } @@ -1561,7 +1575,6 @@ static int ipath_ht_txe_recover(struct ipath_devdata *dd) } dev_info(&dd->pcidev->dev, "Recovering from TXE PIO parity error\n"); - ipath_disarm_senderrbufs(dd, 1); return 1; } diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 4e2e3df..9868ccd 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -296,13 +296,6 @@ static const struct ipath_cregs ipath_pe_cregs = { #define IPATH_GPIO_SCL (1ULL << \ (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) -/* - * Rev2 silicon allows suppressing check for ArmLaunch errors. - * this can speed up short packet sends on systems that do - * not guaranteee write-order. - */ -#define INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR (1ULL<<63) - /* 6120 specific hardware errors... */ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"), @@ -347,6 +340,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, u32 bits, ctrl; int isfatal = 0; char bitsmsg[64]; + int log_idx; hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); if (!hwerrs) { @@ -374,6 +368,11 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, hwerrs &= dd->ipath_hwerrmask; + /* We log some errors to EEPROM, check if we have any of those. */ + for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) + if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log) + ipath_inc_eeprom_err(dd, log_idx, 1); + /* * make sure we get this much out, unless told to be quiet, * or it's occurred within the last 5 seconds @@ -431,10 +430,12 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, *dd->ipath_statusp |= IPATH_STATUS_HWERROR; dd->ipath_flags &= ~IPATH_INITTED; } else { - ipath_dbg("Clearing freezemode on ignored hardware " - "error\n"); - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control); + static u32 freeze_cnt; + + freeze_cnt++; + ipath_dbg("Clearing freezemode on ignored or recovered " + "hardware error (%u)\n", freeze_cnt); + ipath_clear_freeze(dd); } } @@ -680,17 +681,6 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) val |= dd->ipath_rx_pol_inv << INFINIPATH_XGXS_RX_POL_SHIFT; } - if (dd->ipath_minrev >= 2) { - /* Rev 2. can tolerate multiple writes to PBC, and - * allowing them can provide lower latency on some - * CPUs, but this feature is off by default, only - * turned on by setting D63 of XGXSconfig reg. - * May want to make this conditional more - * fine-grained in future. This is not exactly - * related to XGXS, but where the bit ended up. - */ - val |= INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR; - } if (val != prev_val) ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); @@ -791,12 +781,24 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst, u64 ltst) { u64 extctl; + unsigned long flags = 0; /* the diags use the LED to indicate diag info, so we leave * the external LED alone when the diags are running */ if (ipath_diag_inuse) return; + /* Allow override of LED display for, e.g. Locating system in rack */ + if (dd->ipath_led_override) { + ltst = (dd->ipath_led_override & IPATH_LED_PHYS) + ? INFINIPATH_IBCS_LT_STATE_LINKUP + : INFINIPATH_IBCS_LT_STATE_DISABLED; + lst = (dd->ipath_led_override & IPATH_LED_LOG) + ? INFINIPATH_IBCS_L_STATE_ACTIVE + : INFINIPATH_IBCS_L_STATE_DOWN; + } + + spin_lock_irqsave(&dd->ipath_gpio_lock, flags); extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON | INFINIPATH_EXTC_LED2PRIPORT_ON); @@ -806,6 +808,7 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst, extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON; dd->ipath_extctrl = extctl; ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl); + spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); } /** @@ -955,6 +958,27 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd) dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; + + /* + * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. + * 2 is Some Misc, 3 is reserved for future. + */ + dd->ipath_eep_st_masks[0].hwerrs_to_log = + INFINIPATH_HWE_TXEMEMPARITYERR_MASK << + INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT; + + /* Ignore errors in PIO/PBC on systems with unordered write-combining */ + if (ipath_unordered_wc()) + dd->ipath_eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY; + + dd->ipath_eep_st_masks[1].hwerrs_to_log = + INFINIPATH_HWE_RXEMEMPARITYERR_MASK << + INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT; + + dd->ipath_eep_st_masks[2].errs_to_log = + INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET; + + } /* setup the MSI stuff again after a reset. I'd like to just call @@ -1082,7 +1106,7 @@ bail: * ipath_pe_put_tid - write a TID in chip * @dd: the infinipath device * @tidptr: pointer to the expected TID (in chip) to udpate - * @tidtype: 0 for eager, 1 for expected + * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing * * This exists as a separate routine to allow for special locking etc. @@ -1108,7 +1132,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, "BUG: Physical page address 0x%lx " "has bits set in 31-29\n", pa); - if (type == 0) + if (type == RCVHQ_RCV_TYPE_EAGER) pa |= dd->ipath_tidtemplate; else /* for now, always full 4KB page */ pa |= 2 << 29; @@ -1132,7 +1156,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher * @dd: the infinipath device * @tidptr: pointer to the expected TID (in chip) to udpate - * @tidtype: 0 for eager, 1 for expected + * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing * * This exists as a separate routine to allow for selection of the @@ -1157,7 +1181,7 @@ static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr, "BUG: Physical page address 0x%lx " "has bits set in 31-29\n", pa); - if (type == 0) + if (type == RCVHQ_RCV_TYPE_EAGER) pa |= dd->ipath_tidtemplate; else /* for now, always full 4KB page */ pa |= 2 << 29; @@ -1196,7 +1220,8 @@ static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port) port * dd->ipath_rcvtidcnt * sizeof(*tidbase)); for (i = 0; i < dd->ipath_rcvtidcnt; i++) - ipath_pe_put_tid(dd, &tidbase[i], 0, tidinv); + ipath_pe_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED, + tidinv); tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + @@ -1204,7 +1229,8 @@ static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port) port * dd->ipath_rcvegrcnt * sizeof(*tidbase)); for (i = 0; i < dd->ipath_rcvegrcnt; i++) - ipath_pe_put_tid(dd, &tidbase[i], 1, tidinv); + ipath_pe_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER, + tidinv); } /** @@ -1311,13 +1337,6 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase) dd = pd->port_dd; - if (dd != NULL && dd->ipath_minrev >= 2) { - ipath_cdbg(PROC, "IBA6120 Rev2, allow multiple PBC write\n"); - kinfo->spi_runtime_flags |= IPATH_RUNTIME_PBC_REWRITE; - ipath_cdbg(PROC, "IBA6120 Rev2, allow loose DMA alignment\n"); - kinfo->spi_runtime_flags |= IPATH_RUNTIME_LOOSE_DMA_ALIGN; - } - done: kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE; return 0; @@ -1354,7 +1373,6 @@ static int ipath_pe_txe_recover(struct ipath_devdata *dd) dev_info(&dd->pcidev->dev, "Recovering from TXE PIO parity error\n"); } - ipath_disarm_senderrbufs(dd, 1); return 1; } diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 7045ba6..49951d5 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -133,7 +133,8 @@ static int create_port0_egr(struct ipath_devdata *dd) dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE); dd->ipath_f_put_tid(dd, e + (u64 __iomem *) ((char __iomem *) dd->ipath_kregbase + - dd->ipath_rcvegrbase), 0, + dd->ipath_rcvegrbase), + RCVHQ_RCV_TYPE_EAGER, dd->ipath_port0_skbinfo[e].phys); } @@ -310,7 +311,12 @@ static int init_chip_first(struct ipath_devdata *dd, val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize); dd->ipath_piosize2k = val & ~0U; dd->ipath_piosize4k = val >> 32; - dd->ipath_ibmtu = 4096; /* default to largest legal MTU */ + /* + * Note: the chips support a maximum MTU of 4096, but the driver + * hasn't implemented this feature yet, so set the initial value + * to 2048. + */ + dd->ipath_ibmtu = 2048; val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt); dd->ipath_piobcnt2k = val & ~0U; dd->ipath_piobcnt4k = val >> 32; @@ -340,6 +346,10 @@ static int init_chip_first(struct ipath_devdata *dd, spin_lock_init(&dd->ipath_tid_lock); + spin_lock_init(&dd->ipath_gpio_lock); + spin_lock_init(&dd->ipath_eep_st_lock); + sema_init(&dd->ipath_eep_sem, 1); + done: *pdp = pd; return ret; @@ -646,7 +656,7 @@ static int init_housekeeping(struct ipath_devdata *dd, ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn); snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion), - "Driver %u.%u, %s, InfiniPath%u %u.%u, PCI %u, " + "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, " "SW Compat %u\n", IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn, (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) & @@ -727,7 +737,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0; if (ipath_kpiobufs == 0) { /* not set by user (this is default) */ - if (piobufs >= (uports * IPATH_MIN_USER_PORT_BUFCNT) + 32) + if (piobufs > 144) kpiobufs = 32; else kpiobufs = 16; @@ -767,6 +777,12 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) piobufs, dd->ipath_pbufsport, uports); dd->ipath_f_early_init(dd); + /* + * cancel any possible active sends from early driver load. + * Follows early_init because some chips have to initialize + * PIO buffers in early_init to avoid false parity errors. + */ + ipath_cancel_sends(dd); /* early_init sets rcvhdrentsize and rcvhdrsize, so this must be * done after early_init */ diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index a90d3b5..47aa434 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -93,7 +93,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { int i; - if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG)) { + if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) && + dd->ipath_lastcancel > jiffies) { __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG, "SendbufErrs %lx %lx", sbuf[0], sbuf[1]); @@ -108,7 +109,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) ipath_clrpiobuf(dd, i); ipath_disarm_piobufs(dd, i, 1); } - dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */ + /* ignore armlaunch errs for a bit */ + dd->ipath_lastcancel = jiffies+3; } } @@ -131,6 +133,17 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) INFINIPATH_E_INVALIDADDR) /* + * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore + * errors not related to freeze and cancelling buffers. Can't ignore + * armlaunch because could get more while still cleaning up, and need + * to cancel those as they happen. + */ +#define E_SPKT_ERRS_IGNORE \ + (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ + INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \ + INFINIPATH_E_SPKTLEN) + +/* * these are errors that can occur when the link changes state while * a packet is being sent or received. This doesn't cover things * like EBP or VCRC that can be the result of a sending having the @@ -290,12 +303,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, * Flush all queued sends when link went to DOWN or INIT, * to be sure that they don't block SMA and other MAD packets */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, - INFINIPATH_S_ABORT); - ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf, - (unsigned)(dd->ipath_piobcnt2k + - dd->ipath_piobcnt4k) - - dd->ipath_lastport_piobuf); + ipath_cancel_sends(dd); } else if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM || lstate == IPATH_IBSTATE_ACTIVE) { @@ -505,6 +513,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) int i, iserr = 0; int chkerrpkts = 0, noprint = 0; unsigned supp_msgs; + int log_idx; supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint); @@ -518,6 +527,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) if (errs & INFINIPATH_E_HARDWARE) { /* reuse same msg buf */ dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg); + } else { + u64 mask; + for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) { + mask = dd->ipath_eep_st_masks[log_idx].errs_to_log; + if (errs & mask) + ipath_inc_eeprom_err(dd, log_idx, 1); + } } if (!noprint && (errs & ~dd->ipath_e_bitsextant)) @@ -675,6 +691,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) chkerrpkts = 1; dd->ipath_lastrcvhdrqtails[i] = tl; pd->port_hdrqfull++; + if (test_bit(IPATH_PORT_WAITING_OVERFLOW, + &pd->port_flag)) { + clear_bit( + IPATH_PORT_WAITING_OVERFLOW, + &pd->port_flag); + set_bit( + IPATH_PORT_WAITING_OVERFLOW, + &pd->int_flag); + wake_up_interruptible( + &pd->port_wait); + } } } } @@ -744,6 +771,72 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) return chkerrpkts; } + +/* + * try to cleanup as much as possible for anything that might have gone + * wrong while in freeze mode, such as pio buffers being written by user + * processes (causing armlaunch), send errors due to going into freeze mode, + * etc., and try to avoid causing extra interrupts while doing so. + * Forcibly update the in-memory pioavail register copies after cleanup + * because the chip won't do it for anything changing while in freeze mode + * (we don't want to wait for the next pio buffer state change). + * Make sure that we don't lose any important interrupts by using the chip + * feature that says that writing 0 to a bit in *clear that is set in + * *status will cause an interrupt to be generated again (if allowed by + * the *mask value). + */ +void ipath_clear_freeze(struct ipath_devdata *dd) +{ + int i, im; + __le64 val; + + /* disable error interrupts, to avoid confusion */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); + + /* + * clear all sends, because they have may been + * completed by usercode while in freeze mode, and + * therefore would not be sent, and eventually + * might cause the process to run out of bufs + */ + ipath_cancel_sends(dd); + ipath_write_kreg(dd, dd->ipath_kregs->kr_control, + dd->ipath_control); + + /* ensure pio avail updates continue */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl & ~IPATH_S_PIOBUFAVAILUPD); + ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); + ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, + dd->ipath_sendctrl); + + /* + * We just enabled pioavailupdate, so dma copy is almost certainly + * not yet right, so read the registers directly. Similar to init + */ + for (i = 0; i < dd->ipath_pioavregs; i++) { + /* deal with 6110 chip bug */ + im = i > 3 ? ((i&1) ? i-1 : i+1) : i; + val = ipath_read_kreg64(dd, 0x1000+(im*sizeof(u64))); + dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i] + = le64_to_cpu(val); + } + + /* + * force new interrupt if any hwerr, error or interrupt bits are + * still set, and clear "safe" send packet errors related to freeze + * and cancelling sends. Re-enable error interrupts before possible + * force of re-interrupt on pending interrupts. + */ + ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL); + ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, + E_SPKT_ERRS_IGNORE); + ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, + ~dd->ipath_maskederrs); + ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); +} + + /* this is separate to allow for better optimization of ipath_intr() */ static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp) @@ -872,14 +965,25 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat) dd->ipath_i_rcvurg_mask); for (i = 1; i < dd->ipath_cfgports; i++) { struct ipath_portdata *pd = dd->ipath_pd[i]; - if (portr & (1 << i) && pd && pd->port_cnt && - test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) { - clear_bit(IPATH_PORT_WAITING_RCV, - &pd->port_flag); - clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT, - &dd->ipath_rcvctrl); - wake_up_interruptible(&pd->port_wait); - rcvdint = 1; + if (portr & (1 << i) && pd && pd->port_cnt) { + if (test_bit(IPATH_PORT_WAITING_RCV, + &pd->port_flag)) { + clear_bit(IPATH_PORT_WAITING_RCV, + &pd->port_flag); + set_bit(IPATH_PORT_WAITING_RCV, + &pd->int_flag); + clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT, + &dd->ipath_rcvctrl); + wake_up_interruptible(&pd->port_wait); + rcvdint = 1; + } else if (test_bit(IPATH_PORT_WAITING_URG, + &pd->port_flag)) { + clear_bit(IPATH_PORT_WAITING_URG, + &pd->port_flag); + set_bit(IPATH_PORT_WAITING_URG, + &pd->int_flag); + wake_up_interruptible(&pd->port_wait); + } } } if (rcvdint) { @@ -905,6 +1009,9 @@ irqreturn_t ipath_intr(int irq, void *data) ipath_stats.sps_ints++; + if (dd->ipath_int_counter != (u32) -1) + dd->ipath_int_counter++; + if (!(dd->ipath_flags & IPATH_PRESENT)) { /* * This return value is not great, but we do not want the diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 12194f3..3105005 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -1,7 +1,7 @@ #ifndef _IPATH_KERNEL_H #define _IPATH_KERNEL_H /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -57,6 +57,24 @@ extern struct infinipath_stats ipath_stats; #define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ +/* + * First-cut critierion for "device is active" is + * two thousand dwords combined Tx, Rx traffic per + * 5-second interval. SMA packets are 64 dwords, + * and occur "a few per second", presumably each way. + */ +#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000) +/* + * Struct used to indicate which errors are logged in each of the + * error-counters that are logged to EEPROM. A counter is incremented + * _once_ (saturating at 255) for each event with any bits set in + * the error or hwerror register masks below. + */ +#define IPATH_EEP_LOG_CNT (4) +struct ipath_eep_log_mask { + u64 errs_to_log; + u64 hwerrs_to_log; +}; struct ipath_portdata { void **port_rcvegrbuf; @@ -109,6 +127,8 @@ struct ipath_portdata { u32 port_tidcursor; /* next expected TID to check */ unsigned long port_flag; + /* what happened */ + unsigned long int_flag; /* WAIT_RCV that timed out, no interrupt */ u32 port_rcvwait_to; /* WAIT_PIO that timed out, no interrupt */ @@ -137,6 +157,8 @@ struct ipath_portdata { u32 userversion; /* Bitmask of active slaves */ u32 active_slaves; + /* Type of packets or conditions we want to poll for */ + u16 poll_type; }; struct sk_buff; @@ -275,6 +297,8 @@ struct ipath_devdata { u32 ipath_lastport_piobuf; /* is a stats timer active */ u32 ipath_stats_timer_active; + /* number of interrupts for this device -- saturates... */ + u32 ipath_int_counter; /* dwords sent read from counter */ u32 ipath_lastsword; /* dwords received read from counter */ @@ -369,9 +393,6 @@ struct ipath_devdata { struct class_device *diag_class_dev; /* timer used to prevent stats overflow, error throttling, etc. */ struct timer_list ipath_stats_timer; - /* check for stale messages in rcv queue */ - /* only allow one intr at a time. */ - unsigned long ipath_rcv_pending; void *ipath_dummy_hdrq; /* used after port close */ dma_addr_t ipath_dummy_hdrq_phys; @@ -399,6 +420,8 @@ struct ipath_devdata { u64 ipath_gpio_out; /* shadow the gpio mask register */ u64 ipath_gpio_mask; + /* shadow the gpio output enable, etc... */ + u64 ipath_extctrl; /* kr_revision shadow */ u64 ipath_revision; /* @@ -473,8 +496,6 @@ struct ipath_devdata { u32 ipath_cregbase; /* shadow the control register contents */ u32 ipath_control; - /* shadow the gpio output contents */ - u32 ipath_extctrl; /* PCI revision register (HTC rev on FPGA) */ u32 ipath_pcirev; @@ -552,6 +573,9 @@ struct ipath_devdata { u32 ipath_overrun_thresh_errs; u32 ipath_lli_errs; + /* status check work */ + struct delayed_work status_work; + /* * Not all devices managed by a driver instance are the same * type, so these fields must be per-device. @@ -575,6 +599,37 @@ struct ipath_devdata { u16 ipath_gpio_scl_num; u64 ipath_gpio_sda; u64 ipath_gpio_scl; + + /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */ + spinlock_t ipath_gpio_lock; + + /* used to override LED behavior */ + u8 ipath_led_override; /* Substituted for normal value, if non-zero */ + u16 ipath_led_override_timeoff; /* delta to next timer event */ + u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */ + u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */ + atomic_t ipath_led_override_timer_active; + /* Used to flash LEDs in override mode */ + struct timer_list ipath_led_override_timer; + + /* Support (including locks) for EEPROM logging of errors and time */ + /* control access to actual counters, timer */ + spinlock_t ipath_eep_st_lock; + /* control high-level access to EEPROM */ + struct semaphore ipath_eep_sem; + /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */ + uint64_t ipath_traffic_wds; + /* active time is kept in seconds, but logged in hours */ + atomic_t ipath_active_time; + /* Below are nominal shadow of EEPROM, new since last EEPROM update */ + uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT]; + uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT]; + uint16_t ipath_eep_hrs; + /* + * masks for which bits of errs, hwerrs that cause + * each of the counters to increment. + */ + struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT]; }; /* Private data for file operations */ @@ -592,6 +647,7 @@ int ipath_enable_wc(struct ipath_devdata *dd); void ipath_disable_wc(struct ipath_devdata *dd); int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp); void ipath_shutdown_device(struct ipath_devdata *); +void ipath_clear_freeze(struct ipath_devdata *); struct file_operations; int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, @@ -627,6 +683,7 @@ int ipath_unordered_wc(void); void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first, unsigned cnt); +void ipath_cancel_sends(struct ipath_devdata *); int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *); void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *); @@ -685,7 +742,6 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); * are 64bit */ #define IPATH_32BITCOUNTERS 0x20000 /* can miss port0 rx interrupts */ -#define IPATH_POLL_RX_INTR 0x40000 #define IPATH_DISABLED 0x80000 /* administratively disabled */ /* Use GPIO interrupts for new counters */ #define IPATH_GPIO_ERRINTRS 0x100000 @@ -704,6 +760,10 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); #define IPATH_PORT_WAITING_PIO 3 /* master has not finished initializing */ #define IPATH_PORT_MASTER_UNINIT 4 + /* waiting for an urgent packet to arrive */ +#define IPATH_PORT_WAITING_URG 5 + /* waiting for a header overflow */ +#define IPATH_PORT_WAITING_OVERFLOW 6 /* free up any allocated data at closes */ void ipath_free_data(struct ipath_portdata *dd); @@ -713,10 +773,21 @@ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *); void ipath_init_iba6120_funcs(struct ipath_devdata *); void ipath_init_iba6110_funcs(struct ipath_devdata *); void ipath_get_eeprom_info(struct ipath_devdata *); +int ipath_update_eeprom_log(struct ipath_devdata *dd); +void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); void ipath_disarm_senderrbufs(struct ipath_devdata *, int); /* + * Set LED override, only the two LSBs have "public" meaning, but + * any non-zero value substitutes them for the Link and LinkTrain + * LED states. + */ +#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */ +#define IPATH_LED_LOG 2 /* Logical (link) YELLOW LED */ +void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val); + +/* * number of words used for protocol header if not set by ipath_userinit(); */ #define IPATH_DFLT_RCVHDRSIZE 9 diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c index dd487c1..85a4aef 100644 --- a/drivers/infiniband/hw/ipath/ipath_keys.c +++ b/drivers/infiniband/hw/ipath/ipath_keys.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c index 05a1d2b..82616b7 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.c +++ b/drivers/infiniband/hw/ipath/ipath_layer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h index 3854a4e..415709c 100644 --- a/drivers/infiniband/hw/ipath/ipath_layer.h +++ b/drivers/infiniband/hw/ipath/ipath_layer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 25908b0..d61c030 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -103,7 +103,7 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp, /* This is already in network order */ nip->sys_guid = to_idev(ibdev)->sys_image_guid; nip->node_guid = dd->ipath_guid; - nip->port_guid = nip->sys_guid; + nip->port_guid = dd->ipath_guid; nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd)); nip->device_id = cpu_to_be16(dd->ipath_deviceid); majrev = dd->ipath_majrev; @@ -292,7 +292,12 @@ static int recv_subn_get_portinfo(struct ib_smp *smp, /* pip->vl_arb_high_cap; // only one VL */ /* pip->vl_arb_low_cap; // only one VL */ /* InitTypeReply = 0 */ - pip->inittypereply_mtucap = IB_MTU_4096; + /* + * Note: the chips support a maximum MTU of 4096, but the driver + * hasn't implemented this feature yet, so set the maximum value + * to 2048. + */ + pip->inittypereply_mtucap = IB_MTU_2048; // HCAs ignore VLStallCount and HOQLife /* pip->vlstallcnt_hoqlife; */ pip->operationalvl_pei_peo_fpi_fpo = 0x10; /* OVLs = 1 */ diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c index 937bc33..fa830e2 100644 --- a/drivers/infiniband/hw/ipath/ipath_mmap.c +++ b/drivers/infiniband/hw/ipath/ipath_mmap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index bdeef8d..e442470 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index bfef08e..1324b35 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -336,7 +336,7 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->qkey = 0; qp->qp_access_flags = 0; qp->s_busy = 0; - qp->s_flags &= ~IPATH_S_SIGNAL_REQ_WR; + qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_psn = 0; qp->r_psn = 0; @@ -507,16 +507,13 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->port_num > ibqp->device->phys_port_cnt) goto inval; + /* + * Note: the chips support a maximum MTU of 4096, but the driver + * hasn't implemented this feature yet, so don't allow Path MTU + * values greater than 2048. + */ if (attr_mask & IB_QP_PATH_MTU) - if (attr->path_mtu > IB_MTU_4096) - goto inval; - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) - if (attr->max_dest_rd_atomic > 1) - goto inval; - - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) - if (attr->max_rd_atomic > 1) + if (attr->path_mtu > IB_MTU_2048) goto inval; if (attr_mask & IB_QP_PATH_MIG_STATE) diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 1915771..46744ea 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -125,8 +125,10 @@ static int ipath_make_rc_ack(struct ipath_qp *qp, if (len > pmtu) { len = pmtu; qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); - } else + } else { qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); + e->sent = 1; + } ohdr->u.aeth = ipath_compute_aeth(qp); hwords++; qp->s_ack_rdma_psn = e->psn; @@ -143,6 +145,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp, cpu_to_be32(e->atomic_data); hwords += sizeof(ohdr->u.at) / sizeof(u32); bth2 = e->psn; + e->sent = 1; } bth0 = qp->s_ack_state << 24; break; @@ -158,6 +161,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp, ohdr->u.aeth = ipath_compute_aeth(qp); hwords++; qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); + qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1; } bth0 = qp->s_ack_state << 24; bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK; @@ -188,7 +192,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp, } qp->s_hdrwords = hwords; qp->s_cur_size = len; - *bth0p = bth0; + *bth0p = bth0 | (1 << 22); /* Set M bit */ *bth2p = bth2; return 1; @@ -240,7 +244,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; - bth0 = 0; + bth0 = 1 << 22; /* Set M bit */ /* Send a request. */ wqe = get_swqe_ptr(qp, qp->s_cur); @@ -604,7 +608,7 @@ static void send_rc_ack(struct ipath_qp *qp) } /* read pkey_index w/o lock (its atomic) */ bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) | - OP(ACKNOWLEDGE) << 24; + (OP(ACKNOWLEDGE) << 24) | (1 << 22); if (qp->r_nak_state) ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | (qp->r_nak_state << @@ -806,13 +810,15 @@ static inline void update_last_psn(struct ipath_qp *qp, u32 psn) * Called at interrupt level with the QP s_lock held and interrupts disabled. * Returns 1 if OK, 0 if current operation should be aborted (NAK). */ -static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) +static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, + u64 val) { struct ipath_ibdev *dev = to_idev(qp->ibqp.device); struct ib_wc wc; struct ipath_swqe *wqe; int ret = 0; u32 ack_psn; + int diff; /* * Remove the QP from the timeout queue (or RNR timeout queue). @@ -840,7 +846,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) * The MSN might be for a later WQE than the PSN indicates so * only complete WQEs that the PSN finishes. */ - while (ipath_cmp24(ack_psn, wqe->lpsn) >= 0) { + while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) { + /* + * RDMA_READ_RESPONSE_ONLY is a special case since + * we want to generate completion events for everything + * before the RDMA read, copy the data, then generate + * the completion for the read. + */ + if (wqe->wr.opcode == IB_WR_RDMA_READ && + opcode == OP(RDMA_READ_RESPONSE_ONLY) && + diff == 0) { + ret = 1; + goto bail; + } /* * If this request is a RDMA read or atomic, and the ACK is * for a later operation, this ACK NAKs the RDMA read or @@ -851,12 +869,10 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) * is sent but before the response is received. */ if ((wqe->wr.opcode == IB_WR_RDMA_READ && - (opcode != OP(RDMA_READ_RESPONSE_LAST) || - ipath_cmp24(ack_psn, wqe->lpsn) != 0)) || + (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) || ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && - (opcode != OP(ATOMIC_ACKNOWLEDGE) || - ipath_cmp24(wqe->psn, psn) != 0))) { + (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) { /* * The last valid PSN seen is the previous * request's. @@ -870,6 +886,9 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) */ goto bail; } + if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + *(u64 *) wqe->sg_list[0].vaddr = val; if (qp->s_num_rd_atomic && (wqe->wr.opcode == IB_WR_RDMA_READ || wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || @@ -1079,6 +1098,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, int diff; u32 pad; u32 aeth; + u64 val; spin_lock_irqsave(&qp->s_lock, flags); @@ -1118,8 +1138,6 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, data += sizeof(__be32); } if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { - u64 val; - if (!header_in_data) { __be32 *p = ohdr->u.at.atomic_ack_eth; @@ -1127,12 +1145,13 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, be32_to_cpu(p[1]); } else val = be64_to_cpu(((__be64 *) data)[0]); - *(u64 *) wqe->sg_list[0].vaddr = val; - } - if (!do_rc_ack(qp, aeth, psn, opcode) || + } else + val = 0; + if (!do_rc_ack(qp, aeth, psn, opcode, val) || opcode != OP(RDMA_READ_RESPONSE_FIRST)) goto ack_done; hdrsize += 4; + wqe = get_swqe_ptr(qp, qp->s_last); if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) goto ack_op_err; /* @@ -1176,13 +1195,12 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, goto bail; case OP(RDMA_READ_RESPONSE_ONLY): - if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { - dev->n_rdma_seq++; - ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); + if (!header_in_data) + aeth = be32_to_cpu(ohdr->u.aeth); + else + aeth = be32_to_cpu(((__be32 *) data)[0]); + if (!do_rc_ack(qp, aeth, psn, opcode, 0)) goto ack_done; - } - if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) - goto ack_op_err; /* Get the number of bytes the message was padded by. */ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; /* @@ -1197,6 +1215,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, * have to be careful to copy the data to the right * location. */ + wqe = get_swqe_ptr(qp, qp->s_last); qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, wqe, psn, pmtu); goto read_last; @@ -1230,7 +1249,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, data += sizeof(__be32); } ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen); - (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST)); + (void) do_rc_ack(qp, aeth, psn, + OP(RDMA_READ_RESPONSE_LAST), 0); goto ack_done; } @@ -1344,8 +1364,11 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, e = NULL; break; } - if (ipath_cmp24(psn, e->psn) >= 0) + if (ipath_cmp24(psn, e->psn) >= 0) { + if (prev == qp->s_tail_ack_queue) + old_req = 0; break; + } } switch (opcode) { case OP(RDMA_READ_REQUEST): { @@ -1460,6 +1483,22 @@ static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) spin_unlock_irqrestore(&qp->s_lock, flags); } +static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) +{ + unsigned long flags; + unsigned next; + + next = n + 1; + if (next > IPATH_MAX_RDMA_ATOMIC) + next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + if (n == qp->s_tail_ack_queue) { + qp->s_tail_ack_queue = next; + qp->s_ack_state = OP(ACKNOWLEDGE); + } + spin_unlock_irqrestore(&qp->s_lock, flags); +} + /** * ipath_rc_rcv - process an incoming RC packet * @dev: the device this packet came in on @@ -1672,6 +1711,9 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, case OP(RDMA_WRITE_FIRST): case OP(RDMA_WRITE_ONLY): case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_WRITE))) + goto nack_inv; /* consume RWQE */ /* RETH comes after BTH */ if (!header_in_data) @@ -1701,9 +1743,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, qp->r_sge.sge.length = 0; qp->r_sge.sge.sge_length = 0; } - if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_WRITE))) - goto nack_acc; if (opcode == OP(RDMA_WRITE_FIRST)) goto send_middle; else if (opcode == OP(RDMA_WRITE_ONLY)) @@ -1717,13 +1756,17 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, u32 len; u8 next; - if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) - goto nack_acc; + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_READ))) + goto nack_inv; next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; - if (unlikely(next == qp->s_tail_ack_queue)) - goto nack_inv; + if (unlikely(next == qp->s_tail_ack_queue)) { + if (!qp->s_ack_queue[next].sent) + goto nack_inv; + ipath_update_ack_queue(qp, next); + } e = &qp->s_ack_queue[qp->r_head_ack_queue]; /* RETH comes after BTH */ if (!header_in_data) @@ -1758,6 +1801,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, e->rdma_sge.sge.sge_length = 0; } e->opcode = opcode; + e->sent = 0; e->psn = psn; /* * We need to increment the MSN here instead of when we @@ -1789,12 +1833,15 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) - goto nack_acc; + goto nack_inv; next = qp->r_head_ack_queue + 1; if (next > IPATH_MAX_RDMA_ATOMIC) next = 0; - if (unlikely(next == qp->s_tail_ack_queue)) - goto nack_inv; + if (unlikely(next == qp->s_tail_ack_queue)) { + if (!qp->s_ack_queue[next].sent) + goto nack_inv; + ipath_update_ack_queue(qp, next); + } if (!header_in_data) ateth = &ohdr->u.atomic_eth; else @@ -1819,6 +1866,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, be64_to_cpu(ateth->compare_data), sdata); e->opcode = opcode; + e->sent = 0; e->psn = psn & IPATH_PSN_MASK; qp->r_msn++; qp->r_psn++; diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h index c182bcd..708eba3 100644 --- a/drivers/infiniband/hw/ipath/ipath_registers.h +++ b/drivers/infiniband/hw/ipath/ipath_registers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index d9c2a9b..8525674 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -194,6 +194,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) ret = 0; goto bail; } + /* Make sure entry is read after head index is read. */ + smp_rmb(); wqe = get_rwqe_ptr(rq, tail); if (++tail >= rq->size) tail = 0; @@ -267,7 +269,7 @@ again: spin_lock_irqsave(&sqp->s_lock, flags); if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) || - qp->s_rnr_timeout) { + sqp->s_rnr_timeout) { spin_unlock_irqrestore(&sqp->s_lock, flags); goto done; } @@ -319,12 +321,22 @@ again: break; case IB_WR_RDMA_WRITE_WITH_IMM: + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_WRITE))) { + wc.status = IB_WC_REM_INV_REQ_ERR; + goto err; + } wc.wc_flags = IB_WC_WITH_IMM; wc.imm_data = wqe->wr.imm_data; if (!ipath_get_rwqe(qp, 1)) goto rnr_nak; /* FALLTHROUGH */ case IB_WR_RDMA_WRITE: + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_WRITE))) { + wc.status = IB_WC_REM_INV_REQ_ERR; + goto err; + } if (wqe->length == 0) break; if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, @@ -354,8 +366,10 @@ again: case IB_WR_RDMA_READ: if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_READ))) - goto acc_err; + IB_ACCESS_REMOTE_READ))) { + wc.status = IB_WC_REM_INV_REQ_ERR; + goto err; + } if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, wqe->wr.wr.rdma.remote_addr, wqe->wr.wr.rdma.rkey, @@ -369,8 +383,10 @@ again: case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: if (unlikely(!(qp->qp_access_flags & - IB_ACCESS_REMOTE_ATOMIC))) - goto acc_err; + IB_ACCESS_REMOTE_ATOMIC))) { + wc.status = IB_WC_REM_INV_REQ_ERR; + goto err; + } if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), wqe->wr.wr.atomic.remote_addr, wqe->wr.wr.atomic.rkey, @@ -396,6 +412,8 @@ again: if (len > sge->length) len = sge->length; + if (len > sge->sge_length) + len = sge->sge_length; BUG_ON(len == 0); ipath_copy_sge(&qp->r_sge, sge->vaddr, len); sge->vaddr += len; @@ -503,11 +521,9 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) * could be called. If we are still in the tasklet function, * tasklet_hi_schedule() will not call us until the next time * tasklet_hi_schedule() is called. - * We clear the tasklet flag now since we are committing to return - * from the tasklet function. + * We leave the busy flag set so that another post send doesn't + * try to put the same QP on the piowait list again. */ - clear_bit(IPATH_S_BUSY, &qp->s_busy); - tasklet_unlock(&qp->s_task); want_buffer(dev->dd); dev->n_piowait++; } diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index 03acae6..40c36ec 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -80,6 +80,8 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, wqe->num_sge = wr->num_sge; for (i = 0; i < wr->num_sge; i++) wqe->sg_list[i] = wr->sg_list[i]; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); wq->head = next; spin_unlock_irqrestore(&srq->rq.lock, flags); } diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index d8b5e4c..73ed17d 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -55,6 +55,7 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg) u64 val64; unsigned long t0, t1; u64 ret; + unsigned long flags; t0 = jiffies; /* If fast increment counters are only 32 bits, snapshot them, @@ -91,12 +92,18 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg) if (creg == dd->ipath_cregs->cr_wordsendcnt) { if (val != dd->ipath_lastsword) { dd->ipath_sword += val - dd->ipath_lastsword; + spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); + dd->ipath_traffic_wds += val - dd->ipath_lastsword; + spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); dd->ipath_lastsword = val; } val64 = dd->ipath_sword; } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) { if (val != dd->ipath_lastrword) { dd->ipath_rword += val - dd->ipath_lastrword; + spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); + dd->ipath_traffic_wds += val - dd->ipath_lastrword; + spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); dd->ipath_lastrword = val; } val64 = dd->ipath_rword; @@ -200,6 +207,7 @@ void ipath_get_faststats(unsigned long opaque) struct ipath_devdata *dd = (struct ipath_devdata *) opaque; u32 val; static unsigned cnt; + unsigned long flags; /* * don't access the chip while running diags, or memory diags can @@ -210,9 +218,20 @@ void ipath_get_faststats(unsigned long opaque) /* but re-arm the timer, for diags case; won't hurt other */ goto done; + /* + * We now try to maintain a "active timer", based on traffic + * exceeding a threshold, so we need to check the word-counts + * even if they are 64-bit. + */ + ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); + ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); + spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); + if (dd->ipath_traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD) + atomic_add(5, &dd->ipath_active_time); /* S/B #define */ + dd->ipath_traffic_wds = 0; + spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); + if (dd->ipath_flags & IPATH_32BITCOUNTERS) { - ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); - ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); } diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index 4dc398d..16238cd 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -596,6 +596,43 @@ bail: return ret; } +static ssize_t store_led_override(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int ret; + u16 val; + + ret = ipath_parse_ushort(buf, &val); + if (ret > 0) + ipath_set_led_override(dd, val); + else + ipath_dev_err(dd, "attempt to set invalid LED override\n"); + return ret; +} + +static ssize_t show_logged_errs(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ipath_devdata *dd = dev_get_drvdata(dev); + int idx, count; + + /* force consistency with actual EEPROM */ + if (ipath_update_eeprom_log(dd) != 0) + return -ENXIO; + + count = 0; + for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { + count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c", + dd->ipath_eep_st_errs[idx], + idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' '); + } + + return count; +} static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); @@ -625,6 +662,8 @@ static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL); static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); +static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override); +static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL); static struct attribute *dev_attributes[] = { &dev_attr_guid.attr, @@ -641,6 +680,8 @@ static struct attribute *dev_attributes[] = { &dev_attr_unit.attr, &dev_attr_enabled.attr, &dev_attr_rx_pol_inv.attr, + &dev_attr_led_override.attr, + &dev_attr_logged_errors.attr, NULL }; diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index 1c2b03c..8380fbc 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -58,7 +58,6 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe, wc->port_num = 0; ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0); } - wqe = get_swqe_ptr(qp, qp->s_last); } /** @@ -87,7 +86,7 @@ int ipath_make_uc_req(struct ipath_qp *qp, /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; - bth0 = 0; + bth0 = 1 << 22; /* Set M bit */ /* Get the next send request. */ wqe = get_swqe_ptr(qp, qp->s_last); @@ -97,8 +96,10 @@ int ipath_make_uc_req(struct ipath_qp *qp, * Signal the completion of the last send * (if there is one). */ - if (qp->s_last != qp->s_tail) + if (qp->s_last != qp->s_tail) { complete_last_send(qp, wqe, &wc); + wqe = get_swqe_ptr(qp, qp->s_last); + } /* Check if send work queue is empty. */ if (qp->s_tail == qp->s_head) diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index a518f7c..f9a3338 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -176,6 +176,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, dev->n_pkt_drops++; goto bail_sge; } + /* Make sure entry is read after head index is read. */ + smp_rmb(); wqe = get_rwqe_ptr(rq, tail); if (++tail >= rq->size) tail = 0; @@ -231,6 +233,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, if (len > length) len = length; + if (len > sge->sge_length) + len = sge->sge_length; BUG_ON(len == 0); ipath_copy_sge(&rsge, sge->vaddr, len); sge->vaddr += len; diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c index 8536aeb..27034d3 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index bb70845..65f7181 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -164,9 +164,11 @@ void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length) while (length) { u32 len = sge->length; - BUG_ON(len == 0); if (len > length) len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); memcpy(sge->vaddr, data, len); sge->vaddr += len; sge->length -= len; @@ -202,9 +204,11 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) while (length) { u32 len = sge->length; - BUG_ON(len == 0); if (len > length) len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); sge->vaddr += len; sge->length -= len; sge->sge_length -= len; @@ -323,6 +327,8 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, wqe->num_sge = wr->num_sge; for (i = 0; i < wr->num_sge; i++) wqe->sg_list[i] = wr->sg_list[i]; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); wq->head = next; spin_unlock_irqrestore(&qp->r_rq.lock, flags); } @@ -948,6 +954,7 @@ int ipath_ib_piobufavail(struct ipath_ibdev *dev) qp = list_entry(dev->piowait.next, struct ipath_qp, piowait); list_del_init(&qp->piowait); + clear_bit(IPATH_S_BUSY, &qp->s_busy); tasklet_hi_schedule(&qp->s_task); } spin_unlock_irqrestore(&dev->pending_lock, flags); @@ -981,6 +988,8 @@ static int ipath_query_device(struct ib_device *ibdev, props->max_ah = ib_ipath_max_ahs; props->max_cqe = ib_ipath_max_cqes; props->max_mr = dev->lk_table.max; + props->max_fmr = dev->lk_table.max; + props->max_map_per_fmr = 32767; props->max_pd = ib_ipath_max_pds; props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC; props->max_qp_init_rd_atom = 255; @@ -1051,7 +1060,12 @@ static int ipath_query_port(struct ib_device *ibdev, props->max_vl_num = 1; /* VLCap = VL0 */ props->init_type_reply = 0; - props->max_mtu = IB_MTU_4096; + /* + * Note: the chips support a maximum MTU of 4096, but the driver + * hasn't implemented this feature yet, so set the maximum value + * to 2048. + */ + props->max_mtu = IB_MTU_2048; switch (dev->dd->ipath_ibmtu) { case 4096: mtu = IB_MTU_4096; @@ -1361,13 +1375,6 @@ static void __verbs_timer(unsigned long arg) { struct ipath_devdata *dd = (struct ipath_devdata *) arg; - /* - * If port 0 receive packet interrupts are not available, or - * can be missed, poll the receive queue - */ - if (dd->ipath_flags & IPATH_POLL_RX_INTR) - ipath_kreceive(dd); - /* Handle verbs layer timeouts. */ ipath_ib_timer(dd->verbs_dev); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 088b837..f3d1f2c 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -321,6 +321,7 @@ struct ipath_sge_state { */ struct ipath_ack_entry { u8 opcode; + u8 sent; u32 psn; union { struct ipath_sge_state rdma_sge; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c index dd691cf..9e5abf9 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c index 0095bb7..1d7bd82 100644 --- a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c +++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c index 04696e6..3428acb 100644 --- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c +++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 QLogic, Inc. All rights reserved. + * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -63,12 +63,29 @@ int ipath_enable_wc(struct ipath_devdata *dd) * of 2 address matching the length (which has to be a power of 2). * For rev1, that means the base address, for rev2, it will be just * the PIO buffers themselves. + * For chips with two sets of buffers, the calculations are + * somewhat more complicated; we need to sum, and the piobufbase + * register has both offsets, 2K in low 32 bits, 4K in high 32 bits. + * The buffers are still packed, so a single range covers both. */ - pioaddr = addr + dd->ipath_piobufbase; - piolen = (dd->ipath_piobcnt2k + - dd->ipath_piobcnt4k) * - ALIGN(dd->ipath_piobcnt2k + - dd->ipath_piobcnt4k, dd->ipath_palign); + if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */ + unsigned long pio2kbase, pio4kbase; + pio2kbase = dd->ipath_piobufbase & 0xffffffffUL; + pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL; + if (pio2kbase < pio4kbase) { /* all, for now */ + pioaddr = addr + pio2kbase; + piolen = pio4kbase - pio2kbase + + dd->ipath_piobcnt4k * dd->ipath_4kalign; + } else { + pioaddr = addr + pio4kbase; + piolen = pio2kbase - pio4kbase + + dd->ipath_piobcnt2k * dd->ipath_palign; + } + } else { /* single buffer size (2K, currently) */ + pioaddr = addr + dd->ipath_piobufbase; + piolen = dd->ipath_piobcnt2k * dd->ipath_palign + + dd->ipath_piobcnt4k * dd->ipath_4kalign; + } for (bits = 0; !(piolen & (1ULL << bits)); bits++) /* do nothing */ ; diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig index b8912cd..4175a4b 100644 --- a/drivers/infiniband/hw/mlx4/Kconfig +++ b/drivers/infiniband/hw/mlx4/Kconfig @@ -1,6 +1,5 @@ config MLX4_INFINIBAND tristate "Mellanox ConnectX HCA support" - depends on INFINIBAND select MLX4_CORE ---help--- This driver provides low-level InfiniBand support for diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index b2a290c..660b27a 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -354,8 +354,8 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, if (is_send) { wq = &(*cur_qp)->sq; wqe_ctr = be16_to_cpu(cqe->wqe_index); - wq->tail += wqe_ctr - (u16) wq->tail; - wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)]; + wq->tail += (u16) (wqe_ctr - (u16) wq->tail); + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; } else if ((*cur_qp)->ibqp.srq) { srq = to_msrq((*cur_qp)->ibqp.srq); @@ -364,7 +364,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, mlx4_ib_free_srq_wqe(srq, wqe_ctr); } else { wq = &(*cur_qp)->rq; - wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)]; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; } @@ -478,7 +478,8 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) { u32 prod_index; int nfreed = 0; - struct mlx4_cqe *cqe; + struct mlx4_cqe *cqe, *dest; + u8 owner_bit; /* * First we need to find the current producer index, so we @@ -501,9 +502,13 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); ++nfreed; - } else if (nfreed) - memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe), - cqe, sizeof *cqe); + } else if (nfreed) { + dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); + owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; + memcpy(dest, cqe, sizeof *cqe); + dest->owner_sr_opcode = owner_bit | + (dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); + } } if (nfreed) { diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 402f3a2..dde8fe9 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -120,12 +120,12 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma; props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; - props->max_srq_wr = dev->dev->caps.max_srq_wqes; + props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; props->max_srq_sge = dev->dev->caps.max_srq_sge; props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; - props->max_pkeys = dev->dev->caps.pkey_table_len; + props->max_pkeys = dev->dev->caps.pkey_table_len[1]; props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * @@ -168,9 +168,9 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, props->state = out_mad->data[32] & 0xf; props->phys_state = out_mad->data[33] >> 4; props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); - props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len; - props->max_msg_sz = 0x80000000; - props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len; + props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; + props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; + props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port]; props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; @@ -280,8 +280,14 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, return PTR_ERR(mailbox); memset(mailbox->buf, 0, 256); - *(u8 *) mailbox->buf = !!reset_qkey_viols << 6; - ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask); + + if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { + *(u8 *) mailbox->buf = !!reset_qkey_viols << 6; + ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask); + } else { + ((u8 *) mailbox->buf)[3] = !!reset_qkey_viols; + ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask); + } err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B); @@ -517,11 +523,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); ibdev->ib_dev.query_device = mlx4_ib_query_device; @@ -540,10 +548,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah; ibdev->ib_dev.create_srq = mlx4_ib_create_srq; ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq; + ibdev->ib_dev.query_srq = mlx4_ib_query_srq; ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq; ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv; ibdev->ib_dev.create_qp = mlx4_ib_create_qp; ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; + ibdev->ib_dev.query_qp = mlx4_ib_query_qp; ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; ibdev->ib_dev.post_send = mlx4_ib_post_send; ibdev->ib_dev.post_recv = mlx4_ib_post_recv; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 93dac71..705ff2f 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -35,6 +35,7 @@ #include <linux/compiler.h> #include <linux/list.h> +#include <linux/mutex.h> #include <rdma/ib_verbs.h> #include <rdma/ib_umem.h> @@ -95,7 +96,8 @@ struct mlx4_ib_mr { struct mlx4_ib_wq { u64 *wrid; spinlock_t lock; - int max; + int wqe_cnt; + int max_post; int max_gs; int offset; int wqe_shift; @@ -113,6 +115,7 @@ struct mlx4_ib_qp { u32 doorbell_qpn; __be32 sq_signal_bits; + int sq_spare_wqes; struct mlx4_ib_wq sq; struct ib_umem *umem; @@ -123,6 +126,7 @@ struct mlx4_ib_qp { u8 alt_port; u8 atomic_rd_en; u8 resp_depth; + u8 sq_no_prefetch; u8 state; }; @@ -252,6 +256,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, struct ib_udata *udata); int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); +int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int mlx4_ib_destroy_srq(struct ib_srq *srq); void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, @@ -263,6 +268,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, int mlx4_ib_destroy_qp(struct ib_qp *qp); int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); +int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index dc137de..4004218 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -109,6 +109,20 @@ static void *get_send_wqe(struct mlx4_ib_qp *qp, int n) return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift)); } +/* + * Stamp a SQ WQE so that it is invalid if prefetched by marking the + * first four bytes of every 64 byte chunk with 0xffffffff, except for + * the very first chunk of the WQE. + */ +static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n) +{ + u32 *wqe = get_send_wqe(qp, n); + int i; + + for (i = 16; i < 1 << (qp->sq.wqe_shift - 2); i += 16) + wqe[i] = 0xffffffff; +} + static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) { struct ib_event event; @@ -178,6 +192,8 @@ static int send_wqe_overhead(enum ib_qp_type type) case IB_QPT_GSI: return sizeof (struct mlx4_wqe_ctrl_seg) + ALIGN(MLX4_IB_UD_HEADER_SIZE + + DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, + MLX4_INLINE_ALIGN) * sizeof (struct mlx4_wqe_inline_seg), sizeof (struct mlx4_wqe_data_seg)) + ALIGN(4 + @@ -189,20 +205,30 @@ static int send_wqe_overhead(enum ib_qp_type type) } static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - struct mlx4_ib_qp *qp) + int is_user, int has_srq, struct mlx4_ib_qp *qp) { /* Sanity check RQ size before proceeding */ if (cap->max_recv_wr > dev->dev->caps.max_wqes || cap->max_recv_sge > dev->dev->caps.max_rq_sg) return -EINVAL; - qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0; + if (has_srq) { + /* QPs attached to an SRQ should have no RQ */ + if (cap->max_recv_wr) + return -EINVAL; + + qp->rq.wqe_cnt = qp->rq.max_gs = 0; + } else { + /* HW requires >= 1 RQ entry with >= 1 gather entry */ + if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) + return -EINVAL; - qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge * - sizeof (struct mlx4_wqe_data_seg))); - qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg); + qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr)); + qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); + qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); + } - cap->max_recv_wr = qp->rq.max; + cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt; cap->max_recv_sge = qp->rq.max_gs; return 0; @@ -226,8 +252,6 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) return -EINVAL; - qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 1; - qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg), cap->max_inline_data + @@ -236,20 +260,27 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, qp->sq.max_gs = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) / sizeof (struct mlx4_wqe_data_seg); - qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) + - (qp->sq.max << qp->sq.wqe_shift); + /* + * We need to leave 2 KB + 1 WQE of headroom in the SQ to + * allow HW to prefetch. + */ + qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1; + qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes); + + qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + + (qp->sq.wqe_cnt << qp->sq.wqe_shift); if (qp->rq.wqe_shift > qp->sq.wqe_shift) { qp->rq.offset = 0; - qp->sq.offset = qp->rq.max << qp->rq.wqe_shift; + qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; } else { - qp->rq.offset = qp->sq.max << qp->sq.wqe_shift; + qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift; qp->sq.offset = 0; } - cap->max_send_wr = qp->sq.max; - cap->max_send_sge = qp->sq.max_gs; - cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) - - sizeof (struct mlx4_wqe_inline_seg); + cap->max_send_wr = qp->sq.max_post = qp->sq.wqe_cnt - qp->sq_spare_wqes; + cap->max_send_sge = qp->sq.max_gs; + /* We don't support inline sends for kernel QPs (yet) */ + cap->max_inline_data = 0; return 0; } @@ -257,11 +288,11 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, static int set_user_sq_size(struct mlx4_ib_qp *qp, struct mlx4_ib_create_qp *ucmd) { - qp->sq.max = 1 << ucmd->log_sq_bb_count; + qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; qp->sq.wqe_shift = ucmd->log_sq_stride; - qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) + - (qp->sq.max << qp->sq.wqe_shift); + qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + + (qp->sq.wqe_cnt << qp->sq.wqe_shift); return 0; } @@ -285,7 +316,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->sq.head = 0; qp->sq.tail = 0; - err = set_rq_size(dev, &init_attr->cap, qp); + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp); if (err) goto err; @@ -297,6 +328,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; } + qp->sq_no_prefetch = ucmd.sq_no_prefetch; + err = set_user_sq_size(qp, &ucmd); if (err) goto err; @@ -324,6 +357,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_mtt; } } else { + qp->sq_no_prefetch = 0; + err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); if (err) goto err; @@ -350,16 +385,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_mtt; - qp->sq.wrid = kmalloc(qp->sq.max * sizeof (u64), GFP_KERNEL); - qp->rq.wrid = kmalloc(qp->rq.max * sizeof (u64), GFP_KERNEL); + qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL); + qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL); if (!qp->sq.wrid || !qp->rq.wrid) { err = -ENOMEM; goto err_wrid; } - - /* We don't support inline sends for kernel QPs (yet) */ - init_attr->cap.max_inline_data = 0; } err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp); @@ -573,24 +605,6 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp) return 0; } -static void init_port(struct mlx4_ib_dev *dev, int port) -{ - struct mlx4_init_port_param param; - int err; - - memset(¶m, 0, sizeof param); - - param.port_width_cap = dev->dev->caps.port_width_cap; - param.vl_cap = dev->dev->caps.vl_cap; - param.mtu = ib_mtu_enum_to_int(dev->dev->caps.mtu_cap); - param.max_gid = dev->dev->caps.gid_table_len; - param.max_pkey = dev->dev->caps.pkey_table_len; - - err = mlx4_INIT_PORT(dev->dev, ¶m, port); - if (err) - printk(KERN_WARNING "INIT_PORT failed, return code %d.\n", err); -} - static int to_mlx4_st(enum ib_qp_type type) { switch (type) { @@ -664,9 +678,9 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, path->counter_index = 0xff; if (ah->ah_flags & IB_AH_GRH) { - if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len) { + if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) { printk(KERN_ERR "sgid_index (%u) too large. max is %d\n", - ah->grh.sgid_index, dev->dev->caps.gid_table_len - 1); + ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1); return -1; } @@ -733,14 +747,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->mtu_msgmax = (attr->path_mtu << 5) | 31; } - if (qp->rq.max) - context->rq_size_stride = ilog2(qp->rq.max) << 3; + if (qp->rq.wqe_cnt) + context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3; context->rq_size_stride |= qp->rq.wqe_shift - 4; - if (qp->sq.max) - context->sq_size_stride = ilog2(qp->sq.max) << 3; + if (qp->sq.wqe_cnt) + context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; context->sq_size_stride |= qp->sq.wqe_shift - 4; + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + context->sq_size_stride |= !!qp->sq_no_prefetch << 7; + if (qp->ibqp.uobject) context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index); else @@ -762,11 +779,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; } - if (attr_mask & IB_QP_RNR_RETRY) { - context->params1 |= cpu_to_be32(attr->rnr_retry << 13); - optpar |= MLX4_QP_OPTPAR_RNR_RETRY; - } - if (attr_mask & IB_QP_AV) { if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) { @@ -784,13 +796,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_ALT_PATH) { - if (attr->alt_pkey_index >= dev->dev->caps.pkey_table_len) - return -EINVAL; - if (attr->alt_port_num == 0 || attr->alt_port_num > dev->dev->caps.num_ports) return -EINVAL; + if (attr->alt_pkey_index >= + dev->dev->caps.pkey_table_len[attr->alt_port_num]) + return -EINVAL; + if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path, attr->alt_port_num)) return -EINVAL; @@ -802,6 +815,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn); context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); + + if (attr_mask & IB_QP_RNR_RETRY) { + context->params1 |= cpu_to_be32(attr->rnr_retry << 13); + optpar |= MLX4_QP_OPTPAR_RNR_RETRY; + } + if (attr_mask & IB_QP_RETRY_CNT) { context->params1 |= cpu_to_be32(attr->retry_cnt << 16); optpar |= MLX4_QP_OPTPAR_RETRY_COUNT; @@ -873,16 +892,19 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, /* * Before passing a kernel QP to the HW, make sure that the - * ownership bits of the send queue are set so that the - * hardware doesn't start processing stale work requests. + * ownership bits of the send queue are set and the SQ + * headroom is stamped so that the hardware doesn't start + * processing stale work requests. */ if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { struct mlx4_wqe_ctrl_seg *ctrl; int i; - for (i = 0; i < qp->sq.max; ++i) { + for (i = 0; i < qp->sq.wqe_cnt; ++i) { ctrl = get_send_wqe(qp, i); ctrl->owner_opcode = cpu_to_be32(1 << 31); + + stamp_send_wqe(qp, i); } } @@ -912,7 +934,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, */ if (is_qp0(dev, qp)) { if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR) - init_port(dev, qp->port); + if (mlx4_INIT_PORT(dev->dev, qp->port)) + printk(KERN_WARNING "INIT_PORT failed for port %d\n", + qp->port); if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) @@ -975,16 +999,17 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) goto out; - if ((attr_mask & IB_QP_PKEY_INDEX) && - attr->pkey_index >= dev->dev->caps.pkey_table_len) { - goto out; - } - if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { goto out; } + if (attr_mask & IB_QP_PKEY_INDEX) { + int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) + goto out; + } + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { goto out; @@ -1026,6 +1051,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, u16 pkey; int send_size; int header_size; + int spc; int i; send_size = 0; @@ -1101,10 +1127,43 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, printk("\n"); } - inl->byte_count = cpu_to_be32(1 << 31 | header_size); - memcpy(inl + 1, sqp->header_buf, header_size); + /* + * Inline data segments may not cross a 64 byte boundary. If + * our UD header is bigger than the space available up to the + * next 64 byte boundary in the WQE, use two inline data + * segments to hold the UD header. + */ + spc = MLX4_INLINE_ALIGN - + ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); + if (header_size <= spc) { + inl->byte_count = cpu_to_be32(1 << 31 | header_size); + memcpy(inl + 1, sqp->header_buf, header_size); + i = 1; + } else { + inl->byte_count = cpu_to_be32(1 << 31 | spc); + memcpy(inl + 1, sqp->header_buf, spc); - return ALIGN(sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); + inl = (void *) (inl + 1) + spc; + memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); + /* + * Need a barrier here to make sure all the data is + * visible before the byte_count field is set. + * Otherwise the HCA prefetcher could grab the 64-byte + * chunk with this inline segment and get a valid (!= + * 0xffffffff) byte count but stale data, and end up + * generating a packet with bad headers. + * + * The first inline segment's byte_count field doesn't + * need a barrier, because it comes after a + * control/MLX segment and therefore is at an offset + * of 16 mod 64. + */ + wmb(); + inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc)); + i = 2; + } + + return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); } static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq) @@ -1113,7 +1172,7 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq struct mlx4_ib_cq *cq; cur = wq->head - wq->tail; - if (likely(cur + nreq < wq->max)) + if (likely(cur + nreq < wq->max_post)) return 0; cq = to_mcq(ib_cq); @@ -1121,7 +1180,7 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq cur = wq->head - wq->tail; spin_unlock(&cq->lock); - return cur + nreq >= wq->max; + return cur + nreq >= wq->max_post; } int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, @@ -1154,8 +1213,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.max - 1)); - qp->sq.wrid[ind & (qp->sq.max - 1)] = wr->wr_id; + ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); + qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id; ctrl->srcrb_flags = (wr->send_flags & IB_SEND_SIGNALED ? @@ -1290,7 +1349,16 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | - (ind & qp->sq.max ? cpu_to_be32(1 << 31) : 0); + (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); + + /* + * We can improve latency by not stamping the last + * send queue WQE until after ringing the doorbell, so + * only stamp here if there are still more WQEs to post. + */ + if (wr->next) + stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) & + (qp->sq.wqe_cnt - 1)); ++ind; } @@ -1313,6 +1381,9 @@ out: * and reach the HCA out of order. */ mmiowb(); + + stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) & + (qp->sq.wqe_cnt - 1)); } spin_unlock_irqrestore(&qp->rq.lock, flags); @@ -1333,7 +1404,7 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qp->rq.lock, flags); - ind = qp->rq.head & (qp->rq.max - 1); + ind = qp->rq.head & (qp->rq.wqe_cnt - 1); for (nreq = 0; wr; ++nreq, wr = wr->next) { if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) { @@ -1364,7 +1435,7 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, qp->rq.wrid[ind] = wr->wr_id; - ind = (ind + 1) & (qp->rq.max - 1); + ind = (ind + 1) & (qp->rq.wqe_cnt - 1); } out: @@ -1384,3 +1455,140 @@ out: return err; } + +static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state) +{ + switch (mlx4_state) { + case MLX4_QP_STATE_RST: return IB_QPS_RESET; + case MLX4_QP_STATE_INIT: return IB_QPS_INIT; + case MLX4_QP_STATE_RTR: return IB_QPS_RTR; + case MLX4_QP_STATE_RTS: return IB_QPS_RTS; + case MLX4_QP_STATE_SQ_DRAINING: + case MLX4_QP_STATE_SQD: return IB_QPS_SQD; + case MLX4_QP_STATE_SQER: return IB_QPS_SQE; + case MLX4_QP_STATE_ERR: return IB_QPS_ERR; + default: return -1; + } +} + +static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state) +{ + switch (mlx4_mig_state) { + case MLX4_QP_PM_ARMED: return IB_MIG_ARMED; + case MLX4_QP_PM_REARM: return IB_MIG_REARM; + case MLX4_QP_PM_MIGRATED: return IB_MIG_MIGRATED; + default: return -1; + } +} + +static int to_ib_qp_access_flags(int mlx4_flags) +{ + int ib_flags = 0; + + if (mlx4_flags & MLX4_QP_BIT_RRE) + ib_flags |= IB_ACCESS_REMOTE_READ; + if (mlx4_flags & MLX4_QP_BIT_RWE) + ib_flags |= IB_ACCESS_REMOTE_WRITE; + if (mlx4_flags & MLX4_QP_BIT_RAE) + ib_flags |= IB_ACCESS_REMOTE_ATOMIC; + + return ib_flags; +} + +static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, + struct mlx4_qp_path *path) +{ + memset(ib_ah_attr, 0, sizeof *path); + ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; + + if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) + return; + + ib_ah_attr->dlid = be16_to_cpu(path->rlid); + ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; + ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f; + ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; + ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0; + if (ib_ah_attr->ah_flags) { + ib_ah_attr->grh.sgid_index = path->mgid_index; + ib_ah_attr->grh.hop_limit = path->hop_limit; + ib_ah_attr->grh.traffic_class = + (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff; + ib_ah_attr->grh.flow_label = + be32_to_cpu(path->tclass_flowlabel) & 0xffffff; + memcpy(ib_ah_attr->grh.dgid.raw, + path->rgid, sizeof ib_ah_attr->grh.dgid.raw); + } +} + +int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct mlx4_ib_dev *dev = to_mdev(ibqp->device); + struct mlx4_ib_qp *qp = to_mqp(ibqp); + struct mlx4_qp_context context; + int mlx4_state; + int err; + + if (qp->state == IB_QPS_RESET) { + qp_attr->qp_state = IB_QPS_RESET; + goto done; + } + + err = mlx4_qp_query(dev->dev, &qp->mqp, &context); + if (err) + return -EINVAL; + + mlx4_state = be32_to_cpu(context.flags) >> 28; + + qp_attr->qp_state = to_ib_qp_state(mlx4_state); + qp_attr->path_mtu = context.mtu_msgmax >> 5; + qp_attr->path_mig_state = + to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3); + qp_attr->qkey = be32_to_cpu(context.qkey); + qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff; + qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff; + qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff; + qp_attr->qp_access_flags = + to_ib_qp_access_flags(be32_to_cpu(context.params2)); + + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path); + to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path); + qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; + qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; + } + + qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f; + qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; + + /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ + qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING; + + qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7); + + qp_attr->max_dest_rd_atomic = + 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7); + qp_attr->min_rnr_timer = + (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f; + qp_attr->timeout = context.pri_path.ackto >> 3; + qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7; + qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7; + qp_attr->alt_timeout = context.alt_path.ackto >> 3; + +done: + qp_attr->cur_qp_state = qp_attr->qp_state; + if (!ibqp->uobject) { + qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; + qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; + qp_attr->cap.max_send_sge = qp->sq.max_gs; + qp_attr->cap.max_recv_sge = qp->rq.max_gs; + qp_attr->cap.max_inline_data = (1 << qp->sq.wqe_shift) - + send_wqe_overhead(qp->ibqp.qp_type) - + sizeof (struct mlx4_wqe_inline_seg); + qp_init_attr->cap = qp_attr->cap; + } + + return 0; +} + diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 12fac1c..408748f 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -240,6 +240,24 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, return 0; } +int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct mlx4_ib_dev *dev = to_mdev(ibsrq->device); + struct mlx4_ib_srq *srq = to_msrq(ibsrq); + int ret; + int limit_watermark; + + ret = mlx4_srq_query(dev->dev, &srq->msrq, &limit_watermark); + if (ret) + return ret; + + srq_attr->srq_limit = be16_to_cpu(limit_watermark); + srq_attr->max_wr = srq->msrq.max - 1; + srq_attr->max_sge = srq->msrq.max_gs; + + return 0; +} + int mlx4_ib_destroy_srq(struct ib_srq *srq) { struct mlx4_ib_dev *dev = to_mdev(srq->device); diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h index 88c72d5..e2d11be 100644 --- a/drivers/infiniband/hw/mlx4/user.h +++ b/drivers/infiniband/hw/mlx4/user.h @@ -39,7 +39,7 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define MLX4_IB_UVERBS_ABI_VERSION 2 +#define MLX4_IB_UVERBS_ABI_VERSION 3 /* * Make sure that all structs defined in this file remain laid out so @@ -87,9 +87,10 @@ struct mlx4_ib_create_srq_resp { struct mlx4_ib_create_qp { __u64 buf_addr; __u64 db_addr; - __u8 log_sq_bb_count; - __u8 log_sq_stride; - __u8 reserved[6]; + __u8 log_sq_bb_count; + __u8 log_sq_stride; + __u8 sq_no_prefetch; + __u8 reserved[5]; }; #endif /* MLX4_IB_USER_H */ diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig index 9aa5a44..03efc07 100644 --- a/drivers/infiniband/hw/mthca/Kconfig +++ b/drivers/infiniband/hw/mthca/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_MTHCA tristate "Mellanox HCA support" - depends on PCI && INFINIBAND + depends on PCI ---help--- This is a low-level driver for Mellanox InfiniHost host channel adapters (HCAs), including the MT23108 PCI-X HCA diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c index f930e55..a763067 100644 --- a/drivers/infiniband/hw/mthca/mthca_allocator.c +++ b/drivers/infiniband/hw/mthca/mthca_allocator.c @@ -255,7 +255,7 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, dma_list[i] = t; pci_unmap_addr_set(&buf->page_list[i], mapping, t); - memset(buf->page_list[i].buf, 0, PAGE_SIZE); + clear_page(buf->page_list[i].buf); } } diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 3810252..f40558d 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -772,7 +772,7 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) MTHCA_GET(dev->fw_ver, outbox, QUERY_FW_VER_OFFSET); /* - * FW subminor version is at more signifant bits than minor + * FW subminor version is at more significant bits than minor * version, so swap here. */ dev->fw_ver = (dev->fw_ver & 0xffff00000000ull) | diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 8ec9fa1..8592b26 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -522,7 +522,7 @@ static int mthca_create_eq(struct mthca_dev *dev, dma_list[i] = t; pci_unmap_addr_set(&eq->page_list[i], mapping, t); - memset(eq->page_list[i].buf, 0, PAGE_SIZE); + clear_page(eq->page_list[i].buf); } for (i = 0; i < eq->nent; ++i) diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index af78ccc..1f76bad 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_IPOIB tristate "IP-over-InfiniBand" - depends on INFINIBAND && NETDEVICES && INET && (IPV6 || IPV6=n) + depends on NETDEVICES && INET && (IPV6 || IPV6=n) ---help--- Support for the IP-over-InfiniBand protocol (IPoIB). This transports IP packets over InfiniBand so you can use your IB diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 076a0bb..08b4676 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -56,13 +56,6 @@ MODULE_PARM_DESC(cm_data_debug_level, #define IPOIB_CM_RX_DELAY (3 * 256 * HZ) #define IPOIB_CM_RX_UPDATE_MASK (0x3) -struct ipoib_cm_id { - struct ib_cm_id *id; - int flags; - u32 remote_qpn; - u32 remote_mtu; -}; - static struct ib_qp_attr ipoib_cm_err_attr = { .qp_state = IB_QPS_ERR }; @@ -155,8 +148,8 @@ partial_error: ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); - for (; i >= 0; --i) - ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); + for (; i > 0; --i) + ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); return NULL; @@ -288,7 +281,6 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, rep.private_data_len = sizeof data; rep.flow_control = 0; rep.rnr_retry_count = req->rnr_retry_count; - rep.target_ack_delay = 20; /* FIXME */ rep.srq = 1; rep.qp_num = qp->qp_num; rep.starting_psn = psn; @@ -309,6 +301,11 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even return -ENOMEM; p->dev = dev; p->id = cm_id; + cm_id->context = p; + p->state = IPOIB_CM_RX_LIVE; + p->jiffies = jiffies; + INIT_LIST_HEAD(&p->list); + p->qp = ipoib_cm_create_rx_qp(dev, p); if (IS_ERR(p->qp)) { ret = PTR_ERR(p->qp); @@ -320,24 +317,24 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even if (ret) goto err_modify; + spin_lock_irq(&priv->lock); + queue_delayed_work(ipoib_workqueue, + &priv->cm.stale_task, IPOIB_CM_RX_DELAY); + /* Add this entry to passive ids list head, but do not re-add it + * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */ + p->jiffies = jiffies; + if (p->state == IPOIB_CM_RX_LIVE) + list_move(&p->list, &priv->cm.passive_ids); + spin_unlock_irq(&priv->lock); + ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn); if (ret) { ipoib_warn(priv, "failed to send REP: %d\n", ret); - goto err_rep; + if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE)) + ipoib_warn(priv, "unable to move qp to error state\n"); } - - cm_id->context = p; - p->jiffies = jiffies; - p->state = IPOIB_CM_RX_LIVE; - spin_lock_irq(&priv->lock); - if (list_empty(&priv->cm.passive_ids)) - queue_delayed_work(ipoib_workqueue, - &priv->cm.stale_task, IPOIB_CM_RX_DELAY); - list_add(&p->list, &priv->cm.passive_ids); - spin_unlock_irq(&priv->lock); return 0; -err_rep: err_modify: ib_destroy_qp(p->qp); err_qp: @@ -754,9 +751,9 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even p->mtu = be32_to_cpu(data->mtu); - if (p->mtu < priv->dev->mtu + IPOIB_ENCAP_LEN) { - ipoib_warn(priv, "Rejecting connection: mtu %d < device mtu %d + 4\n", - p->mtu, priv->dev->mtu); + if (p->mtu <= IPOIB_ENCAP_LEN) { + ipoib_warn(priv, "Rejecting connection: mtu %d <= %d\n", + p->mtu, IPOIB_ENCAP_LEN); return -EINVAL; } @@ -1150,7 +1147,6 @@ static void ipoib_cm_skb_reap(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, cm.skb_task); - struct net_device *dev = priv->dev; struct sk_buff *skb; unsigned mtu = priv->mcast_mtu; @@ -1164,7 +1160,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, priv->dev); #endif dev_kfree_skb_any(skb); spin_lock_irq(&priv->tx_lock); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 8404f05b..1094488 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -197,6 +197,13 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } /* + * Drop packets that this interface sent, ie multicast packets + * that the HCA has replicated. + */ + if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num) + goto repost; + + /* * If we can't allocate a new RX buffer, dump * this packet and reuse the old buffer. */ @@ -213,24 +220,18 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) skb_put(skb, wc->byte_len); skb_pull(skb, IB_GRH_BYTES); - if (wc->slid != priv->local_lid || - wc->src_qp != priv->qp->qp_num) { - skb->protocol = ((struct ipoib_header *) skb->data)->proto; - skb_reset_mac_header(skb); - skb_pull(skb, IPOIB_ENCAP_LEN); + skb->protocol = ((struct ipoib_header *) skb->data)->proto; + skb_reset_mac_header(skb); + skb_pull(skb, IPOIB_ENCAP_LEN); - dev->last_rx = jiffies; - ++priv->stats.rx_packets; - priv->stats.rx_bytes += skb->len; + dev->last_rx = jiffies; + ++priv->stats.rx_packets; + priv->stats.rx_bytes += skb->len; - skb->dev = dev; - /* XXX get correct PACKET_ type here */ - skb->pkt_type = PACKET_HOST; - netif_receive_skb(skb); - } else { - ipoib_dbg_data(priv, "dropping loopback packet\n"); - dev_kfree_skb_any(skb); - } + skb->dev = dev; + /* XXX get correct PACKET_ type here */ + skb->pkt_type = PACKET_HOST; + netif_receive_skb(skb); repost: if (unlikely(ipoib_ib_post_receive(dev, wr_id))) diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig index aecbb90..fe604c8 100644 --- a/drivers/infiniband/ulp/iser/Kconfig +++ b/drivers/infiniband/ulp/iser/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_ISER tristate "iSCSI Extensions for RDMA (iSER)" - depends on INFINIBAND && SCSI && INET + depends on SCSI && INET select SCSI_ISCSI_ATTRS ---help--- Support for the iSCSI Extensions for RDMA (iSER) Protocol diff --git a/drivers/infiniband/ulp/srp/Kconfig b/drivers/infiniband/ulp/srp/Kconfig index 8fe3be4..3432dce 100644 --- a/drivers/infiniband/ulp/srp/Kconfig +++ b/drivers/infiniband/ulp/srp/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_SRP tristate "InfiniBand SCSI RDMA Protocol" - depends on INFINIBAND && SCSI + depends on SCSI ---help--- Support for the SCSI RDMA Protocol over InfiniBand. This allows you to access storage devices that speak SRP over |