diff options
47 files changed, 2735 insertions, 1811 deletions
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 5ac86f5..0c3c695 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -37,58 +37,41 @@ * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $ */ -#include <linux/dma-mapping.h> - -#include <asm/bug.h> +#include "agent.h" +#include "smi.h" -#include <rdma/ib_smi.h> +#define SPFX "ib_agent: " -#include "smi.h" -#include "agent_priv.h" -#include "mad_priv.h" -#include "agent.h" +struct ib_agent_port_private { + struct list_head port_list; + struct ib_mad_agent *agent[2]; +}; -spinlock_t ib_agent_port_list_lock; +static DEFINE_SPINLOCK(ib_agent_port_list_lock); static LIST_HEAD(ib_agent_port_list); -/* - * Caller must hold ib_agent_port_list_lock - */ -static inline struct ib_agent_port_private * -__ib_get_agent_port(struct ib_device *device, int port_num, - struct ib_mad_agent *mad_agent) +static struct ib_agent_port_private * +__ib_get_agent_port(struct ib_device *device, int port_num) { struct ib_agent_port_private *entry; - BUG_ON(!(!!device ^ !!mad_agent)); /* Exactly one MUST be (!NULL) */ - - if (device) { - list_for_each_entry(entry, &ib_agent_port_list, port_list) { - if (entry->smp_agent->device == device && - entry->port_num == port_num) - return entry; - } - } else { - list_for_each_entry(entry, &ib_agent_port_list, port_list) { - if ((entry->smp_agent == mad_agent) || - (entry->perf_mgmt_agent == mad_agent)) - return entry; - } + list_for_each_entry(entry, &ib_agent_port_list, port_list) { + if (entry->agent[0]->device == device && + entry->agent[0]->port_num == port_num) + return entry; } return NULL; } -static inline struct ib_agent_port_private * -ib_get_agent_port(struct ib_device *device, int port_num, - struct ib_mad_agent *mad_agent) +static struct ib_agent_port_private * +ib_get_agent_port(struct ib_device *device, int port_num) { struct ib_agent_port_private *entry; unsigned long flags; spin_lock_irqsave(&ib_agent_port_list_lock, flags); - entry = __ib_get_agent_port(device, port_num, mad_agent); + entry = __ib_get_agent_port(device, port_num); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); - return entry; } @@ -100,192 +83,76 @@ int smi_check_local_dr_smp(struct ib_smp *smp, if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) return 1; - port_priv = ib_get_agent_port(device, port_num, NULL); + + port_priv = ib_get_agent_port(device, port_num); if (!port_priv) { printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d " - "not open\n", - device->name, port_num); + "not open\n", device->name, port_num); return 1; } - return smi_check_local_smp(port_priv->smp_agent, smp); + return smi_check_local_smp(port_priv->agent[0], smp); } -static int agent_mad_send(struct ib_mad_agent *mad_agent, - struct ib_agent_port_private *port_priv, - struct ib_mad_private *mad_priv, - struct ib_grh *grh, - struct ib_wc *wc) +int agent_send_response(struct ib_mad *mad, struct ib_grh *grh, + struct ib_wc *wc, struct ib_device *device, + int port_num, int qpn) { - struct ib_agent_send_wr *agent_send_wr; - struct ib_sge gather_list; - struct ib_send_wr send_wr; - struct ib_send_wr *bad_send_wr; - struct ib_ah_attr ah_attr; - unsigned long flags; - int ret = 1; - - agent_send_wr = kmalloc(sizeof(*agent_send_wr), GFP_KERNEL); - if (!agent_send_wr) - goto out; - agent_send_wr->mad = mad_priv; - - gather_list.addr = dma_map_single(mad_agent->device->dma_device, - &mad_priv->mad, - sizeof(mad_priv->mad), - DMA_TO_DEVICE); - gather_list.length = sizeof(mad_priv->mad); - gather_list.lkey = mad_agent->mr->lkey; - - send_wr.next = NULL; - send_wr.opcode = IB_WR_SEND; - send_wr.sg_list = &gather_list; - send_wr.num_sge = 1; - send_wr.wr.ud.remote_qpn = wc->src_qp; /* DQPN */ - send_wr.wr.ud.timeout_ms = 0; - send_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED; + struct ib_agent_port_private *port_priv; + struct ib_mad_agent *agent; + struct ib_mad_send_buf *send_buf; + struct ib_ah *ah; + int ret; - ah_attr.dlid = wc->slid; - ah_attr.port_num = mad_agent->port_num; - ah_attr.src_path_bits = wc->dlid_path_bits; - ah_attr.sl = wc->sl; - ah_attr.static_rate = 0; - ah_attr.ah_flags = 0; /* No GRH */ - if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { - if (wc->wc_flags & IB_WC_GRH) { - ah_attr.ah_flags = IB_AH_GRH; - /* Should sgid be looked up ? */ - ah_attr.grh.sgid_index = 0; - ah_attr.grh.hop_limit = grh->hop_limit; - ah_attr.grh.flow_label = be32_to_cpu( - grh->version_tclass_flow) & 0xfffff; - ah_attr.grh.traffic_class = (be32_to_cpu( - grh->version_tclass_flow) >> 20) & 0xff; - memcpy(ah_attr.grh.dgid.raw, - grh->sgid.raw, - sizeof(ah_attr.grh.dgid)); - } + port_priv = ib_get_agent_port(device, port_num); + if (!port_priv) { + printk(KERN_ERR SPFX "Unable to find port agent\n"); + return -ENODEV; } - agent_send_wr->ah = ib_create_ah(mad_agent->qp->pd, &ah_attr); - if (IS_ERR(agent_send_wr->ah)) { - printk(KERN_ERR SPFX "No memory for address handle\n"); - kfree(agent_send_wr); - goto out; + agent = port_priv->agent[qpn]; + ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); + printk(KERN_ERR SPFX "ib_create_ah_from_wc error:%d\n", ret); + return ret; } - send_wr.wr.ud.ah = agent_send_wr->ah; - if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { - send_wr.wr.ud.pkey_index = wc->pkey_index; - send_wr.wr.ud.remote_qkey = IB_QP1_QKEY; - } else { /* for SMPs */ - send_wr.wr.ud.pkey_index = 0; - send_wr.wr.ud.remote_qkey = 0; + send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0, + IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, + GFP_KERNEL); + if (IS_ERR(send_buf)) { + ret = PTR_ERR(send_buf); + printk(KERN_ERR SPFX "ib_create_send_mad error:%d\n", ret); + goto err1; } - send_wr.wr.ud.mad_hdr = &mad_priv->mad.mad.mad_hdr; - send_wr.wr_id = (unsigned long)agent_send_wr; - pci_unmap_addr_set(agent_send_wr, mapping, gather_list.addr); - - /* Send */ - spin_lock_irqsave(&port_priv->send_list_lock, flags); - if (ib_post_send_mad(mad_agent, &send_wr, &bad_send_wr)) { - spin_unlock_irqrestore(&port_priv->send_list_lock, flags); - dma_unmap_single(mad_agent->device->dma_device, - pci_unmap_addr(agent_send_wr, mapping), - sizeof(mad_priv->mad), - DMA_TO_DEVICE); - ib_destroy_ah(agent_send_wr->ah); - kfree(agent_send_wr); - } else { - list_add_tail(&agent_send_wr->send_list, - &port_priv->send_posted_list); - spin_unlock_irqrestore(&port_priv->send_list_lock, flags); - ret = 0; + memcpy(send_buf->mad, mad, sizeof *mad); + send_buf->ah = ah; + if ((ret = ib_post_send_mad(send_buf, NULL))) { + printk(KERN_ERR SPFX "ib_post_send_mad error:%d\n", ret); + goto err2; } - -out: + return 0; +err2: + ib_free_send_mad(send_buf); +err1: + ib_destroy_ah(ah); return ret; } -int agent_send(struct ib_mad_private *mad, - struct ib_grh *grh, - struct ib_wc *wc, - struct ib_device *device, - int port_num) -{ - struct ib_agent_port_private *port_priv; - struct ib_mad_agent *mad_agent; - - port_priv = ib_get_agent_port(device, port_num, NULL); - if (!port_priv) { - printk(KERN_DEBUG SPFX "agent_send %s port %d not open\n", - device->name, port_num); - return 1; - } - - /* Get mad agent based on mgmt_class in MAD */ - switch (mad->mad.mad.mad_hdr.mgmt_class) { - case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: - case IB_MGMT_CLASS_SUBN_LID_ROUTED: - mad_agent = port_priv->smp_agent; - break; - case IB_MGMT_CLASS_PERF_MGMT: - mad_agent = port_priv->perf_mgmt_agent; - break; - default: - return 1; - } - - return agent_mad_send(mad_agent, port_priv, mad, grh, wc); -} - static void agent_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { - struct ib_agent_port_private *port_priv; - struct ib_agent_send_wr *agent_send_wr; - unsigned long flags; - - /* Find matching MAD agent */ - port_priv = ib_get_agent_port(NULL, 0, mad_agent); - if (!port_priv) { - printk(KERN_ERR SPFX "agent_send_handler: no matching MAD " - "agent %p\n", mad_agent); - return; - } - - agent_send_wr = (struct ib_agent_send_wr *)(unsigned long)mad_send_wc->wr_id; - spin_lock_irqsave(&port_priv->send_list_lock, flags); - /* Remove completed send from posted send MAD list */ - list_del(&agent_send_wr->send_list); - spin_unlock_irqrestore(&port_priv->send_list_lock, flags); - - dma_unmap_single(mad_agent->device->dma_device, - pci_unmap_addr(agent_send_wr, mapping), - sizeof(agent_send_wr->mad->mad), - DMA_TO_DEVICE); - - ib_destroy_ah(agent_send_wr->ah); - - /* Release allocated memory */ - kmem_cache_free(ib_mad_cache, agent_send_wr->mad); - kfree(agent_send_wr); + ib_destroy_ah(mad_send_wc->send_buf->ah); + ib_free_send_mad(mad_send_wc->send_buf); } int ib_agent_port_open(struct ib_device *device, int port_num) { - int ret; struct ib_agent_port_private *port_priv; unsigned long flags; - - /* First, check if port already open for SMI */ - port_priv = ib_get_agent_port(device, port_num, NULL); - if (port_priv) { - printk(KERN_DEBUG SPFX "%s port %d already open\n", - device->name, port_num); - return 0; - } + int ret; /* Create new device info */ port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL); @@ -294,32 +161,25 @@ int ib_agent_port_open(struct ib_device *device, int port_num) ret = -ENOMEM; goto error1; } - memset(port_priv, 0, sizeof *port_priv); - port_priv->port_num = port_num; - spin_lock_init(&port_priv->send_list_lock); - INIT_LIST_HEAD(&port_priv->send_posted_list); - /* Obtain send only MAD agent for SM class (SMI QP) */ - port_priv->smp_agent = ib_register_mad_agent(device, port_num, - IB_QPT_SMI, - NULL, 0, + /* Obtain send only MAD agent for SMI QP */ + port_priv->agent[0] = ib_register_mad_agent(device, port_num, + IB_QPT_SMI, NULL, 0, &agent_send_handler, - NULL, NULL); - - if (IS_ERR(port_priv->smp_agent)) { - ret = PTR_ERR(port_priv->smp_agent); + NULL, NULL); + if (IS_ERR(port_priv->agent[0])) { + ret = PTR_ERR(port_priv->agent[0]); goto error2; } - /* Obtain send only MAD agent for PerfMgmt class (GSI QP) */ - port_priv->perf_mgmt_agent = ib_register_mad_agent(device, port_num, - IB_QPT_GSI, - NULL, 0, - &agent_send_handler, - NULL, NULL); - if (IS_ERR(port_priv->perf_mgmt_agent)) { - ret = PTR_ERR(port_priv->perf_mgmt_agent); + /* Obtain send only MAD agent for GSI QP */ + port_priv->agent[1] = ib_register_mad_agent(device, port_num, + IB_QPT_GSI, NULL, 0, + &agent_send_handler, + NULL, NULL); + if (IS_ERR(port_priv->agent[1])) { + ret = PTR_ERR(port_priv->agent[1]); goto error3; } @@ -330,7 +190,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num) return 0; error3: - ib_unregister_mad_agent(port_priv->smp_agent); + ib_unregister_mad_agent(port_priv->agent[0]); error2: kfree(port_priv); error1: @@ -343,7 +203,7 @@ int ib_agent_port_close(struct ib_device *device, int port_num) unsigned long flags; spin_lock_irqsave(&ib_agent_port_list_lock, flags); - port_priv = __ib_get_agent_port(device, port_num, NULL); + port_priv = __ib_get_agent_port(device, port_num); if (port_priv == NULL) { spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); printk(KERN_ERR SPFX "Port %d not found\n", port_num); @@ -352,9 +212,8 @@ int ib_agent_port_close(struct ib_device *device, int port_num) list_del(&port_priv->port_list); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); - ib_unregister_mad_agent(port_priv->perf_mgmt_agent); - ib_unregister_mad_agent(port_priv->smp_agent); + ib_unregister_mad_agent(port_priv->agent[1]); + ib_unregister_mad_agent(port_priv->agent[0]); kfree(port_priv); - return 0; } diff --git a/drivers/infiniband/core/agent.h b/drivers/infiniband/core/agent.h index d942684..c5f3cfe 100644 --- a/drivers/infiniband/core/agent.h +++ b/drivers/infiniband/core/agent.h @@ -39,17 +39,14 @@ #ifndef __AGENT_H_ #define __AGENT_H_ -extern spinlock_t ib_agent_port_list_lock; +#include <rdma/ib_mad.h> -extern int ib_agent_port_open(struct ib_device *device, - int port_num); +extern int ib_agent_port_open(struct ib_device *device, int port_num); extern int ib_agent_port_close(struct ib_device *device, int port_num); -extern int agent_send(struct ib_mad_private *mad, - struct ib_grh *grh, - struct ib_wc *wc, - struct ib_device *device, - int port_num); +extern int agent_send_response(struct ib_mad *mad, struct ib_grh *grh, + struct ib_wc *wc, struct ib_device *device, + int port_num, int qpn); #endif /* __AGENT_H_ */ diff --git a/drivers/infiniband/core/agent_priv.h b/drivers/infiniband/core/agent_priv.h deleted file mode 100644 index 2ec6d7f..0000000 --- a/drivers/infiniband/core/agent_priv.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved. - * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. - * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. - * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. - * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id: agent_priv.h 1640 2005-01-24 22:39:02Z halr $ - */ - -#ifndef __IB_AGENT_PRIV_H__ -#define __IB_AGENT_PRIV_H__ - -#include <linux/pci.h> - -#define SPFX "ib_agent: " - -struct ib_agent_send_wr { - struct list_head send_list; - struct ib_ah *ah; - struct ib_mad_private *mad; - DECLARE_PCI_UNMAP_ADDR(mapping) -}; - -struct ib_agent_port_private { - struct list_head port_list; - struct list_head send_posted_list; - spinlock_t send_list_lock; - int port_num; - struct ib_mad_agent *smp_agent; /* SM class */ - struct ib_mad_agent *perf_mgmt_agent; /* PerfMgmt class */ -}; - -#endif /* __IB_AGENT_PRIV_H__ */ diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 54db6d4..580c3a2 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -135,6 +135,7 @@ struct cm_id_private { __be64 tid; __be32 local_qpn; __be32 remote_qpn; + enum ib_qp_type qp_type; __be32 sq_psn; __be32 rq_psn; int timeout_ms; @@ -175,8 +176,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, cm_id_priv->av.pkey_index, - ah, 0, sizeof(struct ib_mad_hdr), - sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr), + 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC); if (IS_ERR(m)) { ib_destroy_ah(ah); @@ -184,7 +184,8 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, } /* Timeout set by caller if response is expected. */ - m->send_wr.wr.ud.retries = cm_id_priv->max_cm_retries; + m->ah = ah; + m->retries = cm_id_priv->max_cm_retries; atomic_inc(&cm_id_priv->refcount); m->context[0] = cm_id_priv; @@ -205,20 +206,20 @@ static int cm_alloc_response_msg(struct cm_port *port, return PTR_ERR(ah); m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, - ah, 0, sizeof(struct ib_mad_hdr), - sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr), + 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC); if (IS_ERR(m)) { ib_destroy_ah(ah); return PTR_ERR(m); } + m->ah = ah; *msg = m; return 0; } static void cm_free_msg(struct ib_mad_send_buf *msg) { - ib_destroy_ah(msg->send_wr.wr.ud.ah); + ib_destroy_ah(msg->ah); if (msg->context[0]) cm_deref_id(msg->context[0]); ib_free_send_mad(msg); @@ -366,9 +367,15 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); if ((cur_cm_id_priv->id.service_mask & service_id) == - (service_mask & cur_cm_id_priv->id.service_id)) - return cm_id_priv; - if (service_id < cur_cm_id_priv->id.service_id) + (service_mask & cur_cm_id_priv->id.service_id) && + (cm_id_priv->id.device == cur_cm_id_priv->id.device)) + return cur_cm_id_priv; + + if (cm_id_priv->id.device < cur_cm_id_priv->id.device) + link = &(*link)->rb_left; + else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) + link = &(*link)->rb_right; + else if (service_id < cur_cm_id_priv->id.service_id) link = &(*link)->rb_left; else link = &(*link)->rb_right; @@ -378,7 +385,8 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) return NULL; } -static struct cm_id_private * cm_find_listen(__be64 service_id) +static struct cm_id_private * cm_find_listen(struct ib_device *device, + __be64 service_id) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; @@ -386,9 +394,15 @@ static struct cm_id_private * cm_find_listen(__be64 service_id) while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); if ((cm_id_priv->id.service_mask & service_id) == - (cm_id_priv->id.service_mask & cm_id_priv->id.service_id)) + cm_id_priv->id.service_id && + (cm_id_priv->id.device == device)) return cm_id_priv; - if (service_id < cm_id_priv->id.service_id) + + if (device < cm_id_priv->id.device) + node = node->rb_left; + else if (device > cm_id_priv->id.device) + node = node->rb_right; + else if (service_id < cm_id_priv->id.service_id) node = node->rb_left; else node = node->rb_right; @@ -523,7 +537,8 @@ static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv, ib_send_cm_sidr_rep(&cm_id_priv->id, ¶m); } -struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, +struct ib_cm_id *ib_create_cm_id(struct ib_device *device, + ib_cm_handler cm_handler, void *context) { struct cm_id_private *cm_id_priv; @@ -535,6 +550,7 @@ struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, memset(cm_id_priv, 0, sizeof *cm_id_priv); cm_id_priv->id.state = IB_CM_IDLE; + cm_id_priv->id.device = device; cm_id_priv->id.cm_handler = cm_handler; cm_id_priv->id.context = context; cm_id_priv->id.remote_cm_qpn = 1; @@ -662,8 +678,7 @@ retest: break; case IB_CM_SIDR_REQ_SENT: cm_id->state = IB_CM_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irqrestore(&cm_id_priv->lock, flags); break; case IB_CM_SIDR_REQ_RCVD: @@ -674,8 +689,7 @@ retest: case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* Fall through */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: @@ -692,8 +706,7 @@ retest: ib_send_cm_dreq(cm_id, NULL, 0); goto retest; case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_enter_timewait(cm_id_priv); spin_unlock_irqrestore(&cm_id_priv->lock, flags); break; @@ -867,7 +880,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, struct ib_cm_req_param *param) { struct cm_id_private *cm_id_priv; - struct ib_send_wr *bad_send_wr; struct cm_req_msg *req_msg; unsigned long flags; int ret; @@ -911,6 +923,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->retry_count = param->retry_count; cm_id_priv->path_mtu = param->primary_path->mtu; + cm_id_priv->qp_type = param->qp_type; ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); if (ret) @@ -919,7 +932,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; cm_format_req(req_msg, cm_id_priv, param); cm_id_priv->tid = req_msg->hdr.tid; - cm_id_priv->msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms; cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); @@ -928,8 +941,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_req_get_primary_local_ack_timeout(req_msg); spin_lock_irqsave(&cm_id_priv->lock, flags); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &cm_id_priv->msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(cm_id_priv->msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); goto error2; @@ -952,7 +964,6 @@ static int cm_issue_rej(struct cm_port *port, void *ari, u8 ari_length) { struct ib_mad_send_buf *msg = NULL; - struct ib_send_wr *bad_send_wr; struct cm_rej_msg *rej_msg, *rcv_msg; int ret; @@ -975,7 +986,7 @@ static int cm_issue_rej(struct cm_port *port, memcpy(rej_msg->ari, ari, ari_length); } - ret = ib_post_send_mad(port->mad_agent, &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); @@ -1047,7 +1058,6 @@ static void cm_format_req_event(struct cm_work *work, req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.req_rcvd; param->listen_id = listen_id; - param->device = cm_id_priv->av.port->mad_agent->device; param->port = cm_id_priv->av.port->port_num; param->primary_path = &work->path[0]; if (req_msg->alt_local_lid) @@ -1156,7 +1166,6 @@ static void cm_dup_req_handler(struct cm_work *work, struct cm_id_private *cm_id_priv) { struct ib_mad_send_buf *msg = NULL; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1185,8 +1194,7 @@ static void cm_dup_req_handler(struct cm_work *work, } spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, - &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) goto free; return; @@ -1226,7 +1234,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, } /* Find matching listen request. */ - listen_cm_id_priv = cm_find_listen(req_msg->service_id); + listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, + req_msg->service_id); if (!listen_cm_id_priv) { spin_unlock_irqrestore(&cm.lock, flags); cm_issue_rej(work->port, work->mad_recv_wc, @@ -1254,7 +1263,7 @@ static int cm_req_handler(struct cm_work *work) req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; - cm_id = ib_create_cm_id(NULL, NULL); + cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); @@ -1305,6 +1314,7 @@ static int cm_req_handler(struct cm_work *work) cm_req_get_primary_local_ack_timeout(req_msg); cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); + cm_id_priv->qp_type = cm_req_get_qp_type(req_msg); cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id); cm_process_work(cm_id_priv, work); @@ -1349,7 +1359,6 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; struct cm_rep_msg *rep_msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1371,11 +1380,10 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, rep_msg = (struct cm_rep_msg *) msg->mad; cm_format_rep(rep_msg, cm_id_priv, param); - msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT; - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -1413,7 +1421,6 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; void *data; int ret; @@ -1440,8 +1447,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id, cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -1486,7 +1492,6 @@ static void cm_dup_rep_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; struct cm_rep_msg *rep_msg; struct ib_mad_send_buf *msg = NULL; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1514,8 +1519,7 @@ static void cm_dup_rep_handler(struct cm_work *work) goto unlock; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, - &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) goto free; goto deref; @@ -1583,8 +1587,7 @@ static int cm_rep_handler(struct cm_work *work) /* todo: handle peer_to_peer */ - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -1618,8 +1621,7 @@ static int cm_establish_handler(struct cm_work *work) goto out; } - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -1658,8 +1660,7 @@ static int cm_rtu_handler(struct cm_work *work) } cm_id_priv->id.state = IB_CM_ESTABLISHED; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -1696,7 +1697,6 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1718,11 +1718,10 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id, cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, private_data, private_data_len); - msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT; - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { cm_enter_timewait(cm_id_priv); spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -1756,7 +1755,6 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; void *data; int ret; @@ -1786,8 +1784,7 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id, cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr, - &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -1804,7 +1801,6 @@ static int cm_dreq_handler(struct cm_work *work) struct cm_id_private *cm_id_priv; struct cm_dreq_msg *dreq_msg; struct ib_mad_send_buf *msg = NULL; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1823,8 +1819,7 @@ static int cm_dreq_handler(struct cm_work *work) switch (cm_id_priv->id.state) { case IB_CM_REP_SENT: case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); break; case IB_CM_ESTABLISHED: case IB_CM_MRA_REP_RCVD: @@ -1838,8 +1833,7 @@ static int cm_dreq_handler(struct cm_work *work) cm_id_priv->private_data_len); spin_unlock_irqrestore(&cm_id_priv->lock, flags); - if (ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr)) + if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; default: @@ -1886,8 +1880,7 @@ static int cm_drep_handler(struct cm_work *work) } cm_enter_timewait(cm_id_priv); - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -1912,7 +1905,6 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -1956,8 +1948,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id, if (ret) goto out; - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); @@ -2033,8 +2024,7 @@ static int cm_rej_handler(struct cm_work *work) case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* fall through */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: @@ -2044,8 +2034,7 @@ static int cm_rej_handler(struct cm_work *work) cm_reset_to_idle(cm_id_priv); break; case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* fall through */ case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: @@ -2080,7 +2069,6 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; void *data; unsigned long flags; int ret; @@ -2104,8 +2092,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_REQ, service_timeout, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) goto error2; cm_id->state = IB_CM_MRA_REQ_SENT; @@ -2118,8 +2105,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_REP, service_timeout, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) goto error2; cm_id->state = IB_CM_MRA_REP_SENT; @@ -2132,8 +2118,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_OTHER, service_timeout, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) goto error2; cm_id->lap_state = IB_CM_MRA_LAP_SENT; @@ -2195,14 +2180,14 @@ static int cm_mra_handler(struct cm_work *work) case IB_CM_REQ_SENT: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || ib_modify_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg, timeout)) + cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; break; case IB_CM_REP_SENT: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP || ib_modify_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg, timeout)) + cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; break; @@ -2210,7 +2195,7 @@ static int cm_mra_handler(struct cm_work *work) if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || cm_id_priv->id.lap_state != IB_CM_LAP_SENT || ib_modify_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg, timeout)) + cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; break; @@ -2273,7 +2258,6 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2294,11 +2278,10 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv, alternate_path, private_data, private_data_len); - msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED; - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -2342,7 +2325,6 @@ static int cm_lap_handler(struct cm_work *work) struct cm_lap_msg *lap_msg; struct ib_cm_lap_event_param *param; struct ib_mad_send_buf *msg = NULL; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2376,8 +2358,7 @@ static int cm_lap_handler(struct cm_work *work) cm_id_priv->private_data_len); spin_unlock_irqrestore(&cm_id_priv->lock, flags); - if (ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr)) + if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; default: @@ -2433,7 +2414,6 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2456,8 +2436,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id, cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status, info, info_length, private_data, private_data_len); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -2496,8 +2475,7 @@ static int cm_apr_handler(struct cm_work *work) goto out; } cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_id_priv->msg = NULL; ret = atomic_inc_and_test(&cm_id_priv->work_count); @@ -2572,7 +2550,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2595,13 +2572,12 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv, param); - msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms; + msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state == IB_CM_IDLE) - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); else ret = -EINVAL; @@ -2629,7 +2605,6 @@ static void cm_format_sidr_req_event(struct cm_work *work, param = &work->cm_event.param.sidr_req_rcvd; param->pkey = __be16_to_cpu(sidr_req_msg->pkey); param->listen_id = listen_id; - param->device = work->port->mad_agent->device; param->port = work->port->port_num; work->cm_event.private_data = &sidr_req_msg->private_data; } @@ -2642,7 +2617,7 @@ static int cm_sidr_req_handler(struct cm_work *work) struct ib_wc *wc; unsigned long flags; - cm_id = ib_create_cm_id(NULL, NULL); + cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); cm_id_priv = container_of(cm_id, struct cm_id_private, id); @@ -2666,7 +2641,8 @@ static int cm_sidr_req_handler(struct cm_work *work) spin_unlock_irqrestore(&cm.lock, flags); goto out; /* Duplicate message. */ } - cur_cm_id_priv = cm_find_listen(sidr_req_msg->service_id); + cur_cm_id_priv = cm_find_listen(cm_id->device, + sidr_req_msg->service_id); if (!cur_cm_id_priv) { rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); spin_unlock_irqrestore(&cm.lock, flags); @@ -2715,7 +2691,6 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; unsigned long flags; int ret; @@ -2737,8 +2712,7 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, param); - ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, - &msg->send_wr, &bad_send_wr); + ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); @@ -2791,8 +2765,7 @@ static int cm_sidr_rep_handler(struct cm_work *work) goto out; } cm_id_priv->id.state = IB_CM_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - (unsigned long) cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_format_sidr_rep_event(work); @@ -2860,9 +2833,7 @@ discard: static void cm_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { - struct ib_mad_send_buf *msg; - - msg = (struct ib_mad_send_buf *)(unsigned long)mad_send_wc->wr_id; + struct ib_mad_send_buf *msg = mad_send_wc->send_buf; switch (mad_send_wc->status) { case IB_WC_SUCCESS: @@ -3064,10 +3035,10 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; - qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; + qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE; if (cm_id_priv->responder_resources) - qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ; + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ; qp_attr->pkey_index = cm_id_priv->av.pkey_index; qp_attr->port_num = cm_id_priv->av.port->port_num; ret = 0; @@ -3097,14 +3068,18 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | - IB_QP_DEST_QPN | IB_QP_RQ_PSN | - IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; + IB_QP_DEST_QPN | IB_QP_RQ_PSN; qp_attr->ah_attr = cm_id_priv->av.ah_attr; qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); - qp_attr->max_dest_rd_atomic = cm_id_priv->responder_resources; - qp_attr->min_rnr_timer = 0; + if (cm_id_priv->qp_type == IB_QPT_RC) { + *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_MIN_RNR_TIMER; + qp_attr->max_dest_rd_atomic = + cm_id_priv->responder_resources; + qp_attr->min_rnr_timer = 0; + } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_ALT_PATH; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; @@ -3133,14 +3108,17 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: - *qp_attr_mask = IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | - IB_QP_RNR_RETRY | IB_QP_SQ_PSN | - IB_QP_MAX_QP_RD_ATOMIC; - qp_attr->timeout = cm_id_priv->local_ack_timeout; - qp_attr->retry_cnt = cm_id_priv->retry_count; - qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; + *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); - qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; + if (cm_id_priv->qp_type == IB_QPT_RC) { + *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_MAX_QP_RD_ATOMIC; + qp_attr->timeout = cm_id_priv->local_ack_timeout; + qp_attr->retry_cnt = cm_id_priv->retry_count; + qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; + qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; + } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_PATH_MIG_STATE; qp_attr->path_mig_state = IB_MIG_REARM; @@ -3323,6 +3301,7 @@ static void __exit ib_cm_cleanup(void) flush_workqueue(cm.wq); destroy_workqueue(cm.wq); ib_unregister_client(&cm_client); + idr_destroy(&cm.local_id_table); } module_init(ib_cm_init); diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 813ab70..4d3aee9 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -186,6 +186,7 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg, req_msg->offset40 = cpu_to_be32((be32_to_cpu( req_msg->offset40) & 0xFFFFFFF9) | 0x2); + break; default: req_msg->offset40 = cpu_to_be32(be32_to_cpu( req_msg->offset40) & diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d3cf84e..5a6e449 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -514,6 +514,12 @@ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { + if (device->node_type == IB_NODE_SWITCH) { + if (port_num) + return -EINVAL; + } else if (port_num < 1 || port_num > device->phys_port_cnt) + return -EINVAL; + return device->query_port(device, port_num, port_attr); } EXPORT_SYMBOL(ib_query_port); @@ -583,6 +589,12 @@ int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { + if (device->node_type == IB_NODE_SWITCH) { + if (port_num) + return -EINVAL; + } else if (port_num < 1 || port_num > device->phys_port_cnt) + return -EINVAL; + return device->modify_port(device, port_num, port_modify_mask, port_modify); } diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index a14ca87..88f9f8c 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -579,7 +579,7 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list) } static void snoop_send(struct ib_mad_qp_info *qp_info, - struct ib_send_wr *send_wr, + struct ib_mad_send_buf *send_buf, struct ib_mad_send_wc *mad_send_wc, int mad_snoop_flags) { @@ -597,7 +597,7 @@ static void snoop_send(struct ib_mad_qp_info *qp_info, atomic_inc(&mad_snoop_priv->refcount); spin_unlock_irqrestore(&qp_info->snoop_lock, flags); mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent, - send_wr, mad_send_wc); + send_buf, mad_send_wc); if (atomic_dec_and_test(&mad_snoop_priv->refcount)) wake_up(&mad_snoop_priv->wait); spin_lock_irqsave(&qp_info->snoop_lock, flags); @@ -654,10 +654,10 @@ static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num, * Return < 0 if error */ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, - struct ib_smp *smp, - struct ib_send_wr *send_wr) + struct ib_mad_send_wr_private *mad_send_wr) { int ret; + struct ib_smp *smp = mad_send_wr->send_buf.mad; unsigned long flags; struct ib_mad_local_private *local; struct ib_mad_private *mad_priv; @@ -666,6 +666,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_device *device = mad_agent_priv->agent.device; u8 port_num = mad_agent_priv->agent.port_num; struct ib_wc mad_wc; + struct ib_send_wr *send_wr = &mad_send_wr->send_wr; if (!smi_handle_dr_smp_send(smp, device->node_type, port_num)) { ret = -EINVAL; @@ -745,13 +746,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, goto out; } - local->send_wr = *send_wr; - local->send_wr.sg_list = local->sg_list; - memcpy(local->sg_list, send_wr->sg_list, - sizeof *send_wr->sg_list * send_wr->num_sge); - local->send_wr.next = NULL; - local->tid = send_wr->wr.ud.mad_hdr->tid; - local->wr_id = send_wr->wr_id; + local->mad_send_wr = mad_send_wr; /* Reference MAD agent until send side of local completion handled */ atomic_inc(&mad_agent_priv->refcount); /* Queue local completion to local list */ @@ -781,17 +776,17 @@ static int get_buf_length(int hdr_len, int data_len) struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, - struct ib_ah *ah, int rmpp_active, + int rmpp_active, int hdr_len, int data_len, gfp_t gfp_mask) { struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_send_buf *send_buf; + struct ib_mad_send_wr_private *mad_send_wr; int buf_size; void *buf; - mad_agent_priv = container_of(mad_agent, - struct ib_mad_agent_private, agent); + mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, + agent); buf_size = get_buf_length(hdr_len, data_len); if ((!mad_agent->rmpp_version && @@ -799,45 +794,40 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, (!rmpp_active && buf_size > sizeof(struct ib_mad))) return ERR_PTR(-EINVAL); - buf = kmalloc(sizeof *send_buf + buf_size, gfp_mask); + buf = kmalloc(sizeof *mad_send_wr + buf_size, gfp_mask); if (!buf) return ERR_PTR(-ENOMEM); - memset(buf, 0, sizeof *send_buf + buf_size); - - send_buf = buf + buf_size; - send_buf->mad = buf; - - send_buf->sge.addr = dma_map_single(mad_agent->device->dma_device, - buf, buf_size, DMA_TO_DEVICE); - pci_unmap_addr_set(send_buf, mapping, send_buf->sge.addr); - send_buf->sge.length = buf_size; - send_buf->sge.lkey = mad_agent->mr->lkey; - - send_buf->send_wr.wr_id = (unsigned long) send_buf; - send_buf->send_wr.sg_list = &send_buf->sge; - send_buf->send_wr.num_sge = 1; - send_buf->send_wr.opcode = IB_WR_SEND; - send_buf->send_wr.send_flags = IB_SEND_SIGNALED; - send_buf->send_wr.wr.ud.ah = ah; - send_buf->send_wr.wr.ud.mad_hdr = &send_buf->mad->mad_hdr; - send_buf->send_wr.wr.ud.remote_qpn = remote_qpn; - send_buf->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; - send_buf->send_wr.wr.ud.pkey_index = pkey_index; + memset(buf, 0, sizeof *mad_send_wr + buf_size); + + mad_send_wr = buf + buf_size; + mad_send_wr->send_buf.mad = buf; + + mad_send_wr->mad_agent_priv = mad_agent_priv; + mad_send_wr->sg_list[0].length = buf_size; + mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey; + + mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; + mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; + mad_send_wr->send_wr.num_sge = 1; + mad_send_wr->send_wr.opcode = IB_WR_SEND; + mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED; + mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn; + mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; + mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; if (rmpp_active) { - struct ib_rmpp_mad *rmpp_mad; - rmpp_mad = (struct ib_rmpp_mad *)send_buf->mad; + struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad; rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len - - offsetof(struct ib_rmpp_mad, data) + data_len); + IB_MGMT_RMPP_HDR + data_len); rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version; rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); } - send_buf->mad_agent = mad_agent; + mad_send_wr->send_buf.mad_agent = mad_agent; atomic_inc(&mad_agent_priv->refcount); - return send_buf; + return &mad_send_wr->send_buf; } EXPORT_SYMBOL(ib_create_send_mad); @@ -847,10 +837,6 @@ void ib_free_send_mad(struct ib_mad_send_buf *send_buf) mad_agent_priv = container_of(send_buf->mad_agent, struct ib_mad_agent_private, agent); - - dma_unmap_single(send_buf->mad_agent->device->dma_device, - pci_unmap_addr(send_buf, mapping), - send_buf->sge.length, DMA_TO_DEVICE); kfree(send_buf->mad); if (atomic_dec_and_test(&mad_agent_priv->refcount)) @@ -861,8 +847,10 @@ EXPORT_SYMBOL(ib_free_send_mad); int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_qp_info *qp_info; - struct ib_send_wr *bad_send_wr; struct list_head *list; + struct ib_send_wr *bad_send_wr; + struct ib_mad_agent *mad_agent; + struct ib_sge *sge; unsigned long flags; int ret; @@ -871,10 +859,17 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; + mad_agent = mad_send_wr->send_buf.mad_agent; + sge = mad_send_wr->sg_list; + sge->addr = dma_map_single(mad_agent->device->dma_device, + mad_send_wr->send_buf.mad, sge->length, + DMA_TO_DEVICE); + pci_unmap_addr_set(mad_send_wr, mapping, sge->addr); + spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { - ret = ib_post_send(mad_send_wr->mad_agent_priv->agent.qp, - &mad_send_wr->send_wr, &bad_send_wr); + ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr, + &bad_send_wr); list = &qp_info->send_queue.list; } else { ret = 0; @@ -886,6 +881,11 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) list_add_tail(&mad_send_wr->mad_list.list, list); } spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); + if (ret) + dma_unmap_single(mad_agent->device->dma_device, + pci_unmap_addr(mad_send_wr, mapping), + sge->length, DMA_TO_DEVICE); + return ret; } @@ -893,45 +893,28 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated * with the registered client */ -int ib_post_send_mad(struct ib_mad_agent *mad_agent, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr) +int ib_post_send_mad(struct ib_mad_send_buf *send_buf, + struct ib_mad_send_buf **bad_send_buf) { - int ret = -EINVAL; struct ib_mad_agent_private *mad_agent_priv; - - /* Validate supplied parameters */ - if (!bad_send_wr) - goto error1; - - if (!mad_agent || !send_wr) - goto error2; - - if (!mad_agent->send_handler) - goto error2; - - mad_agent_priv = container_of(mad_agent, - struct ib_mad_agent_private, - agent); + struct ib_mad_send_buf *next_send_buf; + struct ib_mad_send_wr_private *mad_send_wr; + unsigned long flags; + int ret = -EINVAL; /* Walk list of send WRs and post each on send list */ - while (send_wr) { - unsigned long flags; - struct ib_send_wr *next_send_wr; - struct ib_mad_send_wr_private *mad_send_wr; - struct ib_smp *smp; - - /* Validate more parameters */ - if (send_wr->num_sge > IB_MAD_SEND_REQ_MAX_SG) - goto error2; + for (; send_buf; send_buf = next_send_buf) { - if (send_wr->wr.ud.timeout_ms && !mad_agent->recv_handler) - goto error2; - - if (!send_wr->wr.ud.mad_hdr) { - printk(KERN_ERR PFX "MAD header must be supplied " - "in WR %p\n", send_wr); - goto error2; + mad_send_wr = container_of(send_buf, + struct ib_mad_send_wr_private, + send_buf); + mad_agent_priv = mad_send_wr->mad_agent_priv; + + if (!send_buf->mad_agent->send_handler || + (send_buf->timeout_ms && + !send_buf->mad_agent->recv_handler)) { + ret = -EINVAL; + goto error; } /* @@ -939,40 +922,24 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent, * current one completes, and the user modifies the work * request associated with the completion */ - next_send_wr = (struct ib_send_wr *)send_wr->next; + next_send_buf = send_buf->next; + mad_send_wr->send_wr.wr.ud.ah = send_buf->ah; - smp = (struct ib_smp *)send_wr->wr.ud.mad_hdr; - if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - ret = handle_outgoing_dr_smp(mad_agent_priv, smp, - send_wr); + if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + ret = handle_outgoing_dr_smp(mad_agent_priv, + mad_send_wr); if (ret < 0) /* error */ - goto error2; + goto error; else if (ret == 1) /* locally consumed */ - goto next; + continue; } - /* Allocate MAD send WR tracking structure */ - mad_send_wr = kmalloc(sizeof *mad_send_wr, GFP_ATOMIC); - if (!mad_send_wr) { - printk(KERN_ERR PFX "No memory for " - "ib_mad_send_wr_private\n"); - ret = -ENOMEM; - goto error2; - } - memset(mad_send_wr, 0, sizeof *mad_send_wr); - - mad_send_wr->send_wr = *send_wr; - mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; - memcpy(mad_send_wr->sg_list, send_wr->sg_list, - sizeof *send_wr->sg_list * send_wr->num_sge); - mad_send_wr->wr_id = send_wr->wr_id; - mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid; - mad_send_wr->mad_agent_priv = mad_agent_priv; + mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid; /* Timeout will be updated after send completes */ - mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr. - ud.timeout_ms); - mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries; - /* One reference for each work request to QP + response */ + mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms); + mad_send_wr->retries = send_buf->retries; + /* Reference for work request to QP + response */ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); mad_send_wr->status = IB_WC_SUCCESS; @@ -995,16 +962,13 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent, list_del(&mad_send_wr->agent_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); atomic_dec(&mad_agent_priv->refcount); - goto error2; + goto error; } -next: - send_wr = next_send_wr; } return 0; - -error2: - *bad_send_wr = send_wr; -error1: +error: + if (bad_send_buf) + *bad_send_buf = send_buf; return ret; } EXPORT_SYMBOL(ib_post_send_mad); @@ -1447,8 +1411,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv, * of MAD. */ hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32; - list_for_each_entry(entry, &port_priv->agent_list, - agent_list) { + list_for_each_entry(entry, &port_priv->agent_list, agent_list) { if (entry->agent.hi_tid == hi_tid) { mad_agent = entry; break; @@ -1571,8 +1534,7 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid) */ list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { - if (is_data_mad(mad_agent_priv, - mad_send_wr->send_wr.wr.ud.mad_hdr) && + if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && mad_send_wr->tid == tid && mad_send_wr->timeout) { /* Verify request has not been canceled */ return (mad_send_wr->status == IB_WC_SUCCESS) ? @@ -1628,14 +1590,14 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Defined behavior is to complete response before request */ - mad_recv_wc->wc->wr_id = mad_send_wr->wr_id; + mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf; mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, mad_recv_wc); atomic_dec(&mad_agent_priv->refcount); mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; - mad_send_wc.wr_id = mad_send_wr->wr_id; + mad_send_wc.send_buf = &mad_send_wr->send_buf; ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); } else { mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, @@ -1728,11 +1690,11 @@ local: if (ret & IB_MAD_RESULT_CONSUMED) goto out; if (ret & IB_MAD_RESULT_REPLY) { - /* Send response */ - if (!agent_send(response, &recv->grh, wc, - port_priv->device, - port_priv->port_num)) - response = NULL; + agent_send_response(&response->mad.mad, + &recv->grh, wc, + port_priv->device, + port_priv->port_num, + qp_info->qp->qp_num); goto out; } } @@ -1866,15 +1828,15 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, if (mad_send_wr->status != IB_WC_SUCCESS ) mad_send_wc->status = mad_send_wr->status; - if (ret != IB_RMPP_RESULT_INTERNAL) + if (ret == IB_RMPP_RESULT_INTERNAL) + ib_rmpp_send_handler(mad_send_wc); + else mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, mad_send_wc); /* Release reference on agent taken when sending */ if (atomic_dec_and_test(&mad_agent_priv->refcount)) wake_up(&mad_agent_priv->wait); - - kfree(mad_send_wr); return; done: spin_unlock_irqrestore(&mad_agent_priv->lock, flags); @@ -1888,6 +1850,7 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, struct ib_mad_qp_info *qp_info; struct ib_mad_queue *send_queue; struct ib_send_wr *bad_send_wr; + struct ib_mad_send_wc mad_send_wc; unsigned long flags; int ret; @@ -1898,6 +1861,9 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, qp_info = send_queue->qp_info; retry: + dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device, + pci_unmap_addr(mad_send_wr, mapping), + mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); queued_send_wr = NULL; spin_lock_irqsave(&send_queue->lock, flags); list_del(&mad_list->list); @@ -1914,17 +1880,17 @@ retry: } spin_unlock_irqrestore(&send_queue->lock, flags); - /* Restore client wr_id in WC and complete send */ - wc->wr_id = mad_send_wr->wr_id; + mad_send_wc.send_buf = &mad_send_wr->send_buf; + mad_send_wc.status = wc->status; + mad_send_wc.vendor_err = wc->vendor_err; if (atomic_read(&qp_info->snoop_count)) - snoop_send(qp_info, &mad_send_wr->send_wr, - (struct ib_mad_send_wc *)wc, + snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS); - ib_mad_complete_send_wr(mad_send_wr, (struct ib_mad_send_wc *)wc); + ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); if (queued_send_wr) { ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr, - &bad_send_wr); + &bad_send_wr); if (ret) { printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret); mad_send_wr = queued_send_wr; @@ -2066,38 +2032,37 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &cancel_list, agent_list) { - mad_send_wc.wr_id = mad_send_wr->wr_id; + mad_send_wc.send_buf = &mad_send_wr->send_buf; + list_del(&mad_send_wr->agent_list); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); - - list_del(&mad_send_wr->agent_list); - kfree(mad_send_wr); atomic_dec(&mad_agent_priv->refcount); } } static struct ib_mad_send_wr_private* -find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, u64 wr_id) +find_send_wr(struct ib_mad_agent_private *mad_agent_priv, + struct ib_mad_send_buf *send_buf) { struct ib_mad_send_wr_private *mad_send_wr; list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, agent_list) { - if (mad_send_wr->wr_id == wr_id) + if (&mad_send_wr->send_buf == send_buf) return mad_send_wr; } list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { - if (is_data_mad(mad_agent_priv, - mad_send_wr->send_wr.wr.ud.mad_hdr) && - mad_send_wr->wr_id == wr_id) + if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && + &mad_send_wr->send_buf == send_buf) return mad_send_wr; } return NULL; } -int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) +int ib_modify_mad(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, u32 timeout_ms) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; @@ -2107,7 +2072,7 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); spin_lock_irqsave(&mad_agent_priv->lock, flags); - mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id); + mad_send_wr = find_send_wr(mad_agent_priv, send_buf); if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); return -EINVAL; @@ -2119,7 +2084,7 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } - mad_send_wr->send_wr.wr.ud.timeout_ms = timeout_ms; + mad_send_wr->send_buf.timeout_ms = timeout_ms; if (active) mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); else @@ -2130,9 +2095,10 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms) } EXPORT_SYMBOL(ib_modify_mad); -void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id) +void ib_cancel_mad(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf) { - ib_modify_mad(mad_agent, wr_id, 0); + ib_modify_mad(mad_agent, send_buf, 0); } EXPORT_SYMBOL(ib_cancel_mad); @@ -2166,10 +2132,9 @@ static void local_completions(void *data) * Defined behavior is to complete response * before request */ - build_smp_wc(local->wr_id, + build_smp_wc((unsigned long) local->mad_send_wr, be16_to_cpu(IB_LID_PERMISSIVE), - 0 /* pkey index */, - recv_mad_agent->agent.port_num, &wc); + 0, recv_mad_agent->agent.port_num, &wc); local->mad_priv->header.recv_wc.wc = &wc; local->mad_priv->header.recv_wc.mad_len = @@ -2196,11 +2161,11 @@ local_send_completion: /* Complete send */ mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; - mad_send_wc.wr_id = local->wr_id; + mad_send_wc.send_buf = &local->mad_send_wr->send_buf; if (atomic_read(&mad_agent_priv->qp_info->snoop_count)) - snoop_send(mad_agent_priv->qp_info, &local->send_wr, - &mad_send_wc, - IB_MAD_SNOOP_SEND_COMPLETIONS); + snoop_send(mad_agent_priv->qp_info, + &local->mad_send_wr->send_buf, + &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); @@ -2221,8 +2186,7 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) if (!mad_send_wr->retries--) return -ETIMEDOUT; - mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_wr. - wr.ud.timeout_ms); + mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { ret = ib_retry_rmpp(mad_send_wr); @@ -2285,11 +2249,10 @@ static void timeout_sends(void *data) mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; else mad_send_wc.status = mad_send_wr->status; - mad_send_wc.wr_id = mad_send_wr->wr_id; + mad_send_wc.send_buf = &mad_send_wr->send_buf; mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); - kfree(mad_send_wr); atomic_dec(&mad_agent_priv->refcount); spin_lock_irqsave(&mad_agent_priv->lock, flags); } @@ -2683,40 +2646,47 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) static void ib_mad_init_device(struct ib_device *device) { - int num_ports, cur_port, i; + int start, end, i; if (device->node_type == IB_NODE_SWITCH) { - num_ports = 1; - cur_port = 0; + start = 0; + end = 0; } else { - num_ports = device->phys_port_cnt; - cur_port = 1; + start = 1; + end = device->phys_port_cnt; } - for (i = 0; i < num_ports; i++, cur_port++) { - if (ib_mad_port_open(device, cur_port)) { + + for (i = start; i <= end; i++) { + if (ib_mad_port_open(device, i)) { printk(KERN_ERR PFX "Couldn't open %s port %d\n", - device->name, cur_port); - goto error_device_open; + device->name, i); + goto error; } - if (ib_agent_port_open(device, cur_port)) { + if (ib_agent_port_open(device, i)) { printk(KERN_ERR PFX "Couldn't open %s port %d " "for agents\n", - device->name, cur_port); - goto error_device_open; + device->name, i); + goto error_agent; } } return; -error_device_open: - while (i > 0) { - cur_port--; - if (ib_agent_port_close(device, cur_port)) +error_agent: + if (ib_mad_port_close(device, i)) + printk(KERN_ERR PFX "Couldn't close %s port %d\n", + device->name, i); + +error: + i--; + + while (i >= start) { + if (ib_agent_port_close(device, i)) printk(KERN_ERR PFX "Couldn't close %s port %d " "for agents\n", - device->name, cur_port); - if (ib_mad_port_close(device, cur_port)) + device->name, i); + if (ib_mad_port_close(device, i)) printk(KERN_ERR PFX "Couldn't close %s port %d\n", - device->name, cur_port); + device->name, i); i--; } } @@ -2754,7 +2724,6 @@ static int __init ib_mad_init_module(void) int ret; spin_lock_init(&ib_mad_port_list_lock); - spin_lock_init(&ib_agent_port_list_lock); ib_mad_cache = kmem_cache_create("ib_mad", sizeof(struct ib_mad_private), diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index f1ba794..570f786 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -118,9 +118,10 @@ struct ib_mad_send_wr_private { struct ib_mad_list_head mad_list; struct list_head agent_list; struct ib_mad_agent_private *mad_agent_priv; + struct ib_mad_send_buf send_buf; + DECLARE_PCI_UNMAP_ADDR(mapping) struct ib_send_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; - u64 wr_id; /* client WR ID */ __be64 tid; unsigned long timeout; int retries; @@ -141,10 +142,7 @@ struct ib_mad_local_private { struct list_head completion_list; struct ib_mad_private *mad_priv; struct ib_mad_agent_private *recv_mad_agent; - struct ib_send_wr send_wr; - struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; - u64 wr_id; /* client WR ID */ - __be64 tid; + struct ib_mad_send_wr_private *mad_send_wr; }; struct ib_mad_mgmt_method_table { diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index e23836d..3249e1d 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -103,12 +103,12 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent) static int data_offset(u8 mgmt_class) { if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) - return offsetof(struct ib_sa_mad, data); + return IB_MGMT_SA_HDR; else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) - return offsetof(struct ib_vendor_mad, data); + return IB_MGMT_VENDOR_HDR; else - return offsetof(struct ib_rmpp_mad, data); + return IB_MGMT_RMPP_HDR; } static void format_ack(struct ib_rmpp_mad *ack, @@ -135,55 +135,52 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv, struct ib_mad_recv_wc *recv_wc) { struct ib_mad_send_buf *msg; - struct ib_send_wr *bad_send_wr; - int hdr_len, ret; + int ret; - hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr); msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, - recv_wc->wc->pkey_index, rmpp_recv->ah, 1, - hdr_len, sizeof(struct ib_rmpp_mad) - hdr_len, - GFP_KERNEL); + recv_wc->wc->pkey_index, 1, IB_MGMT_RMPP_HDR, + IB_MGMT_RMPP_DATA, GFP_KERNEL); if (!msg) return; - format_ack((struct ib_rmpp_mad *) msg->mad, - (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv); - ret = ib_post_send_mad(&rmpp_recv->agent->agent, &msg->send_wr, - &bad_send_wr); + format_ack(msg->mad, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, + rmpp_recv); + msg->ah = rmpp_recv->ah; + ret = ib_post_send_mad(msg, NULL); if (ret) ib_free_send_mad(msg); } -static int alloc_response_msg(struct ib_mad_agent *agent, - struct ib_mad_recv_wc *recv_wc, - struct ib_mad_send_buf **msg) +static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, + struct ib_mad_recv_wc *recv_wc) { - struct ib_mad_send_buf *m; + struct ib_mad_send_buf *msg; struct ib_ah *ah; - int hdr_len; ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc, recv_wc->recv_buf.grh, agent->port_num); if (IS_ERR(ah)) - return PTR_ERR(ah); - - hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr); - m = ib_create_send_mad(agent, recv_wc->wc->src_qp, - recv_wc->wc->pkey_index, ah, 1, hdr_len, - sizeof(struct ib_rmpp_mad) - hdr_len, - GFP_KERNEL); - if (IS_ERR(m)) { + return (void *) ah; + + msg = ib_create_send_mad(agent, recv_wc->wc->src_qp, + recv_wc->wc->pkey_index, 1, + IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA, + GFP_KERNEL); + if (IS_ERR(msg)) ib_destroy_ah(ah); - return PTR_ERR(m); - } - *msg = m; - return 0; + else + msg->ah = ah; + + return msg; } -static void free_msg(struct ib_mad_send_buf *msg) +void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc) { - ib_destroy_ah(msg->send_wr.wr.ud.ah); - ib_free_send_mad(msg); + struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad; + + if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_ACK) + ib_destroy_ah(mad_send_wc->send_buf->ah); + ib_free_send_mad(mad_send_wc->send_buf); } static void nack_recv(struct ib_mad_agent_private *agent, @@ -191,14 +188,13 @@ static void nack_recv(struct ib_mad_agent_private *agent, { struct ib_mad_send_buf *msg; struct ib_rmpp_mad *rmpp_mad; - struct ib_send_wr *bad_send_wr; int ret; - ret = alloc_response_msg(&agent->agent, recv_wc, &msg); - if (ret) + msg = alloc_response_msg(&agent->agent, recv_wc); + if (IS_ERR(msg)) return; - rmpp_mad = (struct ib_rmpp_mad *) msg->mad; + rmpp_mad = msg->mad; memcpy(rmpp_mad, recv_wc->recv_buf.mad, data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class)); @@ -210,9 +206,11 @@ static void nack_recv(struct ib_mad_agent_private *agent, rmpp_mad->rmpp_hdr.seg_num = 0; rmpp_mad->rmpp_hdr.paylen_newwin = 0; - ret = ib_post_send_mad(&agent->agent, &msg->send_wr, &bad_send_wr); - if (ret) - free_msg(msg); + ret = ib_post_send_mad(msg, NULL); + if (ret) { + ib_destroy_ah(msg->ah); + ib_free_send_mad(msg); + } } static void recv_timeout_handler(void *data) @@ -585,7 +583,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) int timeout; u32 paylen; - rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + rmpp_mad = mad_send_wr->send_buf.mad; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num); @@ -612,7 +610,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) } /* 2 seconds for an ACK until we can find the packet lifetime */ - timeout = mad_send_wr->send_wr.wr.ud.timeout_ms; + timeout = mad_send_wr->send_buf.timeout_ms; if (!timeout || timeout > 2000) mad_send_wr->timeout = msecs_to_jiffies(2000); mad_send_wr->seg_num++; @@ -640,7 +638,7 @@ static void abort_send(struct ib_mad_agent_private *agent, __be64 tid, wc.status = IB_WC_REM_ABORT_ERR; wc.vendor_err = rmpp_status; - wc.wr_id = mad_send_wr->wr_id; + wc.send_buf = &mad_send_wr->send_buf; ib_mad_complete_send_wr(mad_send_wr, &wc); return; out: @@ -694,12 +692,12 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, if (seg_num > mad_send_wr->last_ack) { mad_send_wr->last_ack = seg_num; - mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries; + mad_send_wr->retries = mad_send_wr->send_buf.retries; } mad_send_wr->newwin = newwin; if (mad_send_wr->last_ack == mad_send_wr->total_seg) { /* If no response is expected, the ACK completes the send */ - if (!mad_send_wr->send_wr.wr.ud.timeout_ms) { + if (!mad_send_wr->send_buf.timeout_ms) { struct ib_mad_send_wc wc; ib_mark_mad_done(mad_send_wr); @@ -707,13 +705,13 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, wc.status = IB_WC_SUCCESS; wc.vendor_err = 0; - wc.wr_id = mad_send_wr->wr_id; + wc.send_buf = &mad_send_wr->send_buf; ib_mad_complete_send_wr(mad_send_wr, &wc); return; } if (mad_send_wr->refcount == 1) - ib_reset_mad_timeout(mad_send_wr, mad_send_wr-> - send_wr.wr.ud.timeout_ms); + ib_reset_mad_timeout(mad_send_wr, + mad_send_wr->send_buf.timeout_ms); } else if (mad_send_wr->refcount == 1 && mad_send_wr->seg_num < mad_send_wr->newwin && mad_send_wr->seg_num <= mad_send_wr->total_seg) { @@ -842,7 +840,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) struct ib_rmpp_mad *rmpp_mad; int i, total_len, ret; - rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + rmpp_mad = mad_send_wr->send_buf.mad; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; @@ -863,7 +861,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) / (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset); - mad_send_wr->pad = total_len - offsetof(struct ib_rmpp_mad, data) - + mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); /* We need to wait for the final ACK even if there isn't a response */ @@ -878,23 +876,15 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc) { struct ib_rmpp_mad *rmpp_mad; - struct ib_mad_send_buf *msg; int ret; - rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + rmpp_mad = mad_send_wr->send_buf.mad; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ - if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) { - msg = (struct ib_mad_send_buf *) (unsigned long) - mad_send_wc->wr_id; - if (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_ACK) - ib_free_send_mad(msg); - else - free_msg(msg); + if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */ - } if (mad_send_wc->status != IB_WC_SUCCESS || mad_send_wr->status != IB_WC_SUCCESS) @@ -905,7 +895,7 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, if (mad_send_wr->last_ack == mad_send_wr->total_seg) { mad_send_wr->timeout = - msecs_to_jiffies(mad_send_wr->send_wr.wr.ud.timeout_ms); + msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); return IB_RMPP_RESULT_PROCESSED; /* Send done */ } @@ -926,7 +916,7 @@ int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr) struct ib_rmpp_mad *rmpp_mad; int ret; - rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; + rmpp_mad = mad_send_wr->send_buf.mad; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ diff --git a/drivers/infiniband/core/mad_rmpp.h b/drivers/infiniband/core/mad_rmpp.h index c4924df..f0616fd 100644 --- a/drivers/infiniband/core/mad_rmpp.h +++ b/drivers/infiniband/core/mad_rmpp.h @@ -51,6 +51,8 @@ ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent, int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc); +void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc); + void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent); int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 2626182..89ce9dc 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -73,11 +73,10 @@ struct ib_sa_device { struct ib_sa_query { void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); void (*release)(struct ib_sa_query *); - struct ib_sa_port *port; - struct ib_sa_mad *mad; - struct ib_sa_sm_ah *sm_ah; - DECLARE_PCI_UNMAP_ADDR(mapping) - int id; + struct ib_sa_port *port; + struct ib_mad_send_buf *mad_buf; + struct ib_sa_sm_ah *sm_ah; + int id; }; struct ib_sa_service_query { @@ -426,6 +425,7 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) { unsigned long flags; struct ib_mad_agent *agent; + struct ib_mad_send_buf *mad_buf; spin_lock_irqsave(&idr_lock, flags); if (idr_find(&query_idr, id) != query) { @@ -433,9 +433,10 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) return; } agent = query->port->agent; + mad_buf = query->mad_buf; spin_unlock_irqrestore(&idr_lock, flags); - ib_cancel_mad(agent, id); + ib_cancel_mad(agent, mad_buf); } EXPORT_SYMBOL(ib_sa_cancel_query); @@ -457,71 +458,46 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) static int send_mad(struct ib_sa_query *query, int timeout_ms) { - struct ib_sa_port *port = query->port; unsigned long flags; - int ret; - struct ib_sge gather_list; - struct ib_send_wr *bad_wr, wr = { - .opcode = IB_WR_SEND, - .sg_list = &gather_list, - .num_sge = 1, - .send_flags = IB_SEND_SIGNALED, - .wr = { - .ud = { - .mad_hdr = &query->mad->mad_hdr, - .remote_qpn = 1, - .remote_qkey = IB_QP1_QKEY, - .timeout_ms = timeout_ms, - } - } - }; + int ret, id; retry: if (!idr_pre_get(&query_idr, GFP_ATOMIC)) return -ENOMEM; spin_lock_irqsave(&idr_lock, flags); - ret = idr_get_new(&query_idr, query, &query->id); + ret = idr_get_new(&query_idr, query, &id); spin_unlock_irqrestore(&idr_lock, flags); if (ret == -EAGAIN) goto retry; if (ret) return ret; - wr.wr_id = query->id; + query->mad_buf->timeout_ms = timeout_ms; + query->mad_buf->context[0] = query; + query->id = id; - spin_lock_irqsave(&port->ah_lock, flags); - kref_get(&port->sm_ah->ref); - query->sm_ah = port->sm_ah; - wr.wr.ud.ah = port->sm_ah->ah; - spin_unlock_irqrestore(&port->ah_lock, flags); + spin_lock_irqsave(&query->port->ah_lock, flags); + kref_get(&query->port->sm_ah->ref); + query->sm_ah = query->port->sm_ah; + spin_unlock_irqrestore(&query->port->ah_lock, flags); - gather_list.addr = dma_map_single(port->agent->device->dma_device, - query->mad, - sizeof (struct ib_sa_mad), - DMA_TO_DEVICE); - gather_list.length = sizeof (struct ib_sa_mad); - gather_list.lkey = port->agent->mr->lkey; - pci_unmap_addr_set(query, mapping, gather_list.addr); + query->mad_buf->ah = query->sm_ah->ah; - ret = ib_post_send_mad(port->agent, &wr, &bad_wr); + ret = ib_post_send_mad(query->mad_buf, NULL); if (ret) { - dma_unmap_single(port->agent->device->dma_device, - pci_unmap_addr(query, mapping), - sizeof (struct ib_sa_mad), - DMA_TO_DEVICE); - kref_put(&query->sm_ah->ref, free_sm_ah); spin_lock_irqsave(&idr_lock, flags); - idr_remove(&query_idr, query->id); + idr_remove(&query_idr, id); spin_unlock_irqrestore(&idr_lock, flags); + + kref_put(&query->sm_ah->ref, free_sm_ah); } /* * It's not safe to dereference query any more, because the * send may already have completed and freed the query in - * another context. So use wr.wr_id, which has a copy of the - * query's id. + * another context. */ - return ret ? ret : wr.wr_id; + return ret ? ret : id; } static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, @@ -543,7 +519,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) { - kfree(sa_query->mad); kfree(container_of(sa_query, struct ib_sa_path_query, sa_query)); } @@ -583,43 +558,58 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, { struct ib_sa_path_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; - struct ib_mad_agent *agent = port->agent; + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; int ret; + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; - query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); - if (!query->sa_query.mad) { - kfree(query); - return -ENOMEM; + + query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, + 0, IB_MGMT_SA_HDR, + IB_MGMT_SA_DATA, gfp_mask); + if (!query->sa_query.mad_buf) { + ret = -ENOMEM; + goto err1; } query->callback = callback; query->context = context; - init_mad(query->sa_query.mad, agent); + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); - query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; - query->sa_query.release = ib_sa_path_rec_release; - query->sa_query.port = port; - query->sa_query.mad->mad_hdr.method = IB_MGMT_METHOD_GET; - query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); - query->sa_query.mad->sa_hdr.comp_mask = comp_mask; + query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; + query->sa_query.release = ib_sa_path_rec_release; + query->sa_query.port = port; + mad->mad_hdr.method = IB_MGMT_METHOD_GET; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); + mad->sa_hdr.comp_mask = comp_mask; - ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), - rec, query->sa_query.mad->data); + ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms); - if (ret < 0) { - *sa_query = NULL; - kfree(query->sa_query.mad); - kfree(query); - } + if (ret < 0) + goto err2; + + return ret; +err2: + *sa_query = NULL; + ib_free_send_mad(query->sa_query.mad_buf); + +err1: + kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_path_rec_get); @@ -643,7 +633,6 @@ static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) { - kfree(sa_query->mad); kfree(container_of(sa_query, struct ib_sa_service_query, sa_query)); } @@ -685,10 +674,17 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, { struct ib_sa_service_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; - struct ib_mad_agent *agent = port->agent; + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; int ret; + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + if (method != IB_MGMT_METHOD_GET && method != IB_MGMT_METHOD_SET && method != IB_SA_METHOD_DELETE) @@ -697,37 +693,45 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; - query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); - if (!query->sa_query.mad) { - kfree(query); - return -ENOMEM; + + query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, + 0, IB_MGMT_SA_HDR, + IB_MGMT_SA_DATA, gfp_mask); + if (!query->sa_query.mad_buf) { + ret = -ENOMEM; + goto err1; } query->callback = callback; query->context = context; - init_mad(query->sa_query.mad, agent); + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); - query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; - query->sa_query.release = ib_sa_service_rec_release; - query->sa_query.port = port; - query->sa_query.mad->mad_hdr.method = method; - query->sa_query.mad->mad_hdr.attr_id = - cpu_to_be16(IB_SA_ATTR_SERVICE_REC); - query->sa_query.mad->sa_hdr.comp_mask = comp_mask; + query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; + query->sa_query.release = ib_sa_service_rec_release; + query->sa_query.port = port; + mad->mad_hdr.method = method; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC); + mad->sa_hdr.comp_mask = comp_mask; ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), - rec, query->sa_query.mad->data); + rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms); - if (ret < 0) { - *sa_query = NULL; - kfree(query->sa_query.mad); - kfree(query); - } + if (ret < 0) + goto err2; + + return ret; +err2: + *sa_query = NULL; + ib_free_send_mad(query->sa_query.mad_buf); + +err1: + kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_service_rec_query); @@ -751,7 +755,6 @@ static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query) { - kfree(sa_query->mad); kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query)); } @@ -768,60 +771,69 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, { struct ib_sa_mcmember_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); - struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port]; - struct ib_mad_agent *agent = port->agent; + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; int ret; + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + query = kmalloc(sizeof *query, gfp_mask); if (!query) return -ENOMEM; - query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask); - if (!query->sa_query.mad) { - kfree(query); - return -ENOMEM; + + query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, + 0, IB_MGMT_SA_HDR, + IB_MGMT_SA_DATA, gfp_mask); + if (!query->sa_query.mad_buf) { + ret = -ENOMEM; + goto err1; } query->callback = callback; query->context = context; - init_mad(query->sa_query.mad, agent); + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); - query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; - query->sa_query.release = ib_sa_mcmember_rec_release; - query->sa_query.port = port; - query->sa_query.mad->mad_hdr.method = method; - query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); - query->sa_query.mad->sa_hdr.comp_mask = comp_mask; + query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; + query->sa_query.release = ib_sa_mcmember_rec_release; + query->sa_query.port = port; + mad->mad_hdr.method = method; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); + mad->sa_hdr.comp_mask = comp_mask; ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), - rec, query->sa_query.mad->data); + rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms); - if (ret < 0) { - *sa_query = NULL; - kfree(query->sa_query.mad); - kfree(query); - } + if (ret < 0) + goto err2; return ret; + +err2: + *sa_query = NULL; + ib_free_send_mad(query->sa_query.mad_buf); + +err1: + kfree(query); + return ret; } EXPORT_SYMBOL(ib_sa_mcmember_rec_query); static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { - struct ib_sa_query *query; + struct ib_sa_query *query = mad_send_wc->send_buf->context[0]; unsigned long flags; - spin_lock_irqsave(&idr_lock, flags); - query = idr_find(&query_idr, mad_send_wc->wr_id); - spin_unlock_irqrestore(&idr_lock, flags); - - if (!query) - return; - if (query->callback) switch (mad_send_wc->status) { case IB_WC_SUCCESS: @@ -838,30 +850,25 @@ static void send_handler(struct ib_mad_agent *agent, break; } - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(query, mapping), - sizeof (struct ib_sa_mad), - DMA_TO_DEVICE); - kref_put(&query->sm_ah->ref, free_sm_ah); - - query->release(query); - spin_lock_irqsave(&idr_lock, flags); - idr_remove(&query_idr, mad_send_wc->wr_id); + idr_remove(&query_idr, query->id); spin_unlock_irqrestore(&idr_lock, flags); + + ib_free_send_mad(mad_send_wc->send_buf); + kref_put(&query->sm_ah->ref, free_sm_ah); + query->release(query); } static void recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_sa_query *query; - unsigned long flags; + struct ib_mad_send_buf *mad_buf; - spin_lock_irqsave(&idr_lock, flags); - query = idr_find(&query_idr, mad_recv_wc->wc->wr_id); - spin_unlock_irqrestore(&idr_lock, flags); + mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id; + query = mad_buf->context[0]; - if (query && query->callback) { + if (query->callback) { if (mad_recv_wc->wc->status == IB_WC_SUCCESS) query->callback(query, mad_recv_wc->recv_buf.mad->mad_hdr.status ? @@ -975,6 +982,7 @@ static int __init ib_sa_init(void) static void __exit ib_sa_cleanup(void) { ib_unregister_client(&sa_client); + idr_destroy(&query_idr); } module_init(ib_sa_init); diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h index db25503..2b3c401 100644 --- a/drivers/infiniband/core/smi.h +++ b/drivers/infiniband/core/smi.h @@ -39,6 +39,8 @@ #ifndef __SMI_H_ #define __SMI_H_ +#include <rdma/ib_smi.h> + int smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type, int port_num, diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 211ba32..7ce7a6c 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -65,6 +65,11 @@ struct port_table_attribute { int index; }; +static inline int ibdev_is_alive(const struct ib_device *dev) +{ + return dev->reg_state == IB_DEV_REGISTERED; +} + static ssize_t port_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -74,6 +79,8 @@ static ssize_t port_attr_show(struct kobject *kobj, if (!port_attr->show) return -EIO; + if (!ibdev_is_alive(p->ibdev)) + return -ENODEV; return port_attr->show(p, port_attr, buf); } @@ -581,6 +588,9 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf) { struct ib_device *dev = container_of(cdev, struct ib_device, class_dev); + if (!ibdev_is_alive(dev)) + return -ENODEV; + switch (dev->node_type) { case IB_NODE_CA: return sprintf(buf, "%d: CA\n", dev->node_type); case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); @@ -595,6 +605,9 @@ static ssize_t show_sys_image_guid(struct class_device *cdev, char *buf) struct ib_device_attr attr; ssize_t ret; + if (!ibdev_is_alive(dev)) + return -ENODEV; + ret = ib_query_device(dev, &attr); if (ret) return ret; @@ -612,6 +625,9 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf) struct ib_device_attr attr; ssize_t ret; + if (!ibdev_is_alive(dev)) + return -ENODEV; + ret = ib_query_device(dev, &attr); if (ret) return ret; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 021b8f1..2847756 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -41,37 +41,81 @@ #include <linux/file.h> #include <linux/mount.h> #include <linux/cdev.h> +#include <linux/idr.h> #include <asm/uaccess.h> -#include "ucm.h" +#include <rdma/ib_cm.h> +#include <rdma/ib_user_cm.h> MODULE_AUTHOR("Libor Michalek"); MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); MODULE_LICENSE("Dual BSD/GPL"); -static int ucm_debug_level; +struct ib_ucm_device { + int devnum; + struct cdev dev; + struct class_device class_dev; + struct ib_device *ib_dev; +}; + +struct ib_ucm_file { + struct semaphore mutex; + struct file *filp; + struct ib_ucm_device *device; + + struct list_head ctxs; + struct list_head events; + wait_queue_head_t poll_wait; +}; + +struct ib_ucm_context { + int id; + wait_queue_head_t wait; + atomic_t ref; + int events_reported; + + struct ib_ucm_file *file; + struct ib_cm_id *cm_id; + __u64 uid; + + struct list_head events; /* list of pending events. */ + struct list_head file_list; /* member in file ctx list */ +}; + +struct ib_ucm_event { + struct ib_ucm_context *ctx; + struct list_head file_list; /* member in file event list */ + struct list_head ctx_list; /* member in ctx event list */ -module_param_named(debug_level, ucm_debug_level, int, 0644); -MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); + struct ib_cm_id *cm_id; + struct ib_ucm_event_resp resp; + void *data; + void *info; + int data_len; + int info_len; +}; enum { IB_UCM_MAJOR = 231, - IB_UCM_MINOR = 255 + IB_UCM_BASE_MINOR = 224, + IB_UCM_MAX_DEVICES = 32 }; -#define IB_UCM_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_MINOR) +#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) -#define PFX "UCM: " +static void ib_ucm_add_one(struct ib_device *device); +static void ib_ucm_remove_one(struct ib_device *device); -#define ucm_dbg(format, arg...) \ - do { \ - if (ucm_debug_level > 0) \ - printk(KERN_DEBUG PFX format, ## arg); \ - } while (0) +static struct ib_client ucm_client = { + .name = "ucm", + .add = ib_ucm_add_one, + .remove = ib_ucm_remove_one +}; -static struct semaphore ctx_id_mutex; -static struct idr ctx_id_table; +static DECLARE_MUTEX(ctx_id_mutex); +static DEFINE_IDR(ctx_id_table); +static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES); static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) { @@ -152,17 +196,13 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) goto error; list_add_tail(&ctx->file_list, &file->ctxs); - ucm_dbg("Allocated CM ID <%d>\n", ctx->id); return ctx; error: kfree(ctx); return NULL; } -/* - * Event portion of the API, handle CM events - * and allow event polling. - */ + static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath, struct ib_sa_path_rec *kpath) { @@ -209,6 +249,7 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, ureq->retry_count = kreq->retry_count; ureq->rnr_retry_count = kreq->rnr_retry_count; ureq->srq = kreq->srq; + ureq->port = kreq->port; ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path); ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path); @@ -295,6 +336,8 @@ static int ib_ucm_event_process(struct ib_cm_event *evt, case IB_CM_SIDR_REQ_RECEIVED: uvt->resp.u.sidr_req_resp.pkey = evt->param.sidr_req_rcvd.pkey; + uvt->resp.u.sidr_req_resp.port = + evt->param.sidr_req_rcvd.port; uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; break; case IB_CM_SIDR_REP_RECEIVED: @@ -387,9 +430,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - /* - * wait - */ + down(&file->mutex); while (list_empty(&file->events)) { @@ -471,7 +512,6 @@ done: return result; } - static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) @@ -494,29 +534,27 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, return -ENOMEM; ctx->uid = cmd.uid; - ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, ctx); + ctx->cm_id = ib_create_cm_id(file->device->ib_dev, + ib_ucm_event_handler, ctx); if (IS_ERR(ctx->cm_id)) { result = PTR_ERR(ctx->cm_id); - goto err; + goto err1; } resp.id = ctx->id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) { result = -EFAULT; - goto err; + goto err2; } - return 0; -err: +err2: + ib_destroy_cm_id(ctx->cm_id); +err1: down(&ctx_id_mutex); idr_remove(&ctx_id_table, ctx->id); up(&ctx_id_mutex); - - if (!IS_ERR(ctx->cm_id)) - ib_destroy_cm_id(ctx->cm_id); - kfree(ctx); return result; } @@ -1184,9 +1222,6 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; - ucm_dbg("Write. cmd <%d> in <%d> out <%d> len <%Zu>\n", - hdr.cmd, hdr.in, hdr.out, len); - if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; @@ -1231,8 +1266,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp) filp->private_data = file; file->filp = filp; - - ucm_dbg("Created struct\n"); + file->device = container_of(inode->i_cdev, struct ib_ucm_device, dev); return 0; } @@ -1263,7 +1297,17 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) return 0; } -static struct file_operations ib_ucm_fops = { +static void ib_ucm_release_class_dev(struct class_device *class_dev) +{ + struct ib_ucm_device *dev; + + dev = container_of(class_dev, struct ib_ucm_device, class_dev); + cdev_del(&dev->dev); + clear_bit(dev->devnum, dev_map); + kfree(dev); +} + +static struct file_operations ucm_fops = { .owner = THIS_MODULE, .open = ib_ucm_open, .release = ib_ucm_close, @@ -1271,55 +1315,142 @@ static struct file_operations ib_ucm_fops = { .poll = ib_ucm_poll, }; +static struct class ucm_class = { + .name = "infiniband_cm", + .release = ib_ucm_release_class_dev +}; -static struct class *ib_ucm_class; -static struct cdev ib_ucm_cdev; +static ssize_t show_dev(struct class_device *class_dev, char *buf) +{ + struct ib_ucm_device *dev; + + dev = container_of(class_dev, struct ib_ucm_device, class_dev); + return print_dev_t(buf, dev->dev.dev); +} +static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL); -static int __init ib_ucm_init(void) +static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { - int result; + struct ib_ucm_device *dev; + + dev = container_of(class_dev, struct ib_ucm_device, class_dev); + return sprintf(buf, "%s\n", dev->ib_dev->name); +} +static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); - result = register_chrdev_region(IB_UCM_DEV, 1, "infiniband_cm"); - if (result) { - ucm_dbg("Error <%d> registering dev\n", result); - goto err_chr; - } +static void ib_ucm_add_one(struct ib_device *device) +{ + struct ib_ucm_device *ucm_dev; + + if (!device->alloc_ucontext) + return; + + ucm_dev = kmalloc(sizeof *ucm_dev, GFP_KERNEL); + if (!ucm_dev) + return; - cdev_init(&ib_ucm_cdev, &ib_ucm_fops); + memset(ucm_dev, 0, sizeof *ucm_dev); + ucm_dev->ib_dev = device; + + ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); + if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES) + goto err; + + set_bit(ucm_dev->devnum, dev_map); + + cdev_init(&ucm_dev->dev, &ucm_fops); + ucm_dev->dev.owner = THIS_MODULE; + kobject_set_name(&ucm_dev->dev.kobj, "ucm%d", ucm_dev->devnum); + if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1)) + goto err; - result = cdev_add(&ib_ucm_cdev, IB_UCM_DEV, 1); - if (result) { - ucm_dbg("Error <%d> adding cdev\n", result); + ucm_dev->class_dev.class = &ucm_class; + ucm_dev->class_dev.dev = device->dma_device; + snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d", + ucm_dev->devnum); + if (class_device_register(&ucm_dev->class_dev)) goto err_cdev; - } - ib_ucm_class = class_create(THIS_MODULE, "infiniband_cm"); - if (IS_ERR(ib_ucm_class)) { - result = PTR_ERR(ib_ucm_class); - ucm_dbg("Error <%d> creating class\n", result); + if (class_device_create_file(&ucm_dev->class_dev, + &class_device_attr_dev)) + goto err_class; + if (class_device_create_file(&ucm_dev->class_dev, + &class_device_attr_ibdev)) goto err_class; + + ib_set_client_data(device, &ucm_client, ucm_dev); + return; + +err_class: + class_device_unregister(&ucm_dev->class_dev); +err_cdev: + cdev_del(&ucm_dev->dev); + clear_bit(ucm_dev->devnum, dev_map); +err: + kfree(ucm_dev); + return; +} + +static void ib_ucm_remove_one(struct ib_device *device) +{ + struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client); + + if (!ucm_dev) + return; + + class_device_unregister(&ucm_dev->class_dev); +} + +static ssize_t show_abi_version(struct class *class, char *buf) +{ + return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION); +} +static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); + +static int __init ib_ucm_init(void) +{ + int ret; + + ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES, + "infiniband_cm"); + if (ret) { + printk(KERN_ERR "ucm: couldn't register device number\n"); + goto err; } - class_device_create(ib_ucm_class, NULL, IB_UCM_DEV, NULL, "ucm"); + ret = class_register(&ucm_class); + if (ret) { + printk(KERN_ERR "ucm: couldn't create class infiniband_cm\n"); + goto err_chrdev; + } - idr_init(&ctx_id_table); - init_MUTEX(&ctx_id_mutex); + ret = class_create_file(&ucm_class, &class_attr_abi_version); + if (ret) { + printk(KERN_ERR "ucm: couldn't create abi_version attribute\n"); + goto err_class; + } + ret = ib_register_client(&ucm_client); + if (ret) { + printk(KERN_ERR "ucm: couldn't register client\n"); + goto err_class; + } return 0; + err_class: - cdev_del(&ib_ucm_cdev); -err_cdev: - unregister_chrdev_region(IB_UCM_DEV, 1); -err_chr: - return result; + class_unregister(&ucm_class); +err_chrdev: + unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); +err: + return ret; } static void __exit ib_ucm_cleanup(void) { - class_device_destroy(ib_ucm_class, IB_UCM_DEV); - class_destroy(ib_ucm_class); - cdev_del(&ib_ucm_cdev); - unregister_chrdev_region(IB_UCM_DEV, 1); + ib_unregister_client(&ucm_client); + class_unregister(&ucm_class); + unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); + idr_destroy(&ctx_id_table); } module_init(ib_ucm_init); diff --git a/drivers/infiniband/core/ucm.h b/drivers/infiniband/core/ucm.h deleted file mode 100644 index f46f37b..0000000 --- a/drivers/infiniband/core/ucm.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id: ucm.h 2208 2005-04-22 23:24:31Z libor $ - */ - -#ifndef UCM_H -#define UCM_H - -#include <linux/fs.h> -#include <linux/device.h> -#include <linux/cdev.h> -#include <linux/idr.h> - -#include <rdma/ib_cm.h> -#include <rdma/ib_user_cm.h> - -struct ib_ucm_file { - struct semaphore mutex; - struct file *filp; - - struct list_head ctxs; /* list of active connections */ - struct list_head events; /* list of pending events */ - wait_queue_head_t poll_wait; -}; - -struct ib_ucm_context { - int id; - wait_queue_head_t wait; - atomic_t ref; - int events_reported; - - struct ib_ucm_file *file; - struct ib_cm_id *cm_id; - __u64 uid; - - struct list_head events; /* list of pending events. */ - struct list_head file_list; /* member in file ctx list */ -}; - -struct ib_ucm_event { - struct ib_ucm_context *ctx; - struct list_head file_list; /* member in file event list */ - struct list_head ctx_list; /* member in ctx event list */ - - struct ib_cm_id *cm_id; - struct ib_ucm_event_resp resp; - void *data; - void *info; - int data_len; - int info_len; -}; - -#endif /* UCM_H */ diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index a64d6b4..97128e2 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -64,18 +64,39 @@ enum { IB_UMAD_MINOR_BASE = 0 }; +/* + * Our lifetime rules for these structs are the following: each time a + * device special file is opened, we look up the corresponding struct + * ib_umad_port by minor in the umad_port[] table while holding the + * port_lock. If this lookup succeeds, we take a reference on the + * ib_umad_port's struct ib_umad_device while still holding the + * port_lock; if the lookup fails, we fail the open(). We drop these + * references in the corresponding close(). + * + * In addition to references coming from open character devices, there + * is one more reference to each ib_umad_device representing the + * module's reference taken when allocating the ib_umad_device in + * ib_umad_add_one(). + * + * When destroying an ib_umad_device, we clear all of its + * ib_umad_ports from umad_port[] while holding port_lock before + * dropping the module's reference to the ib_umad_device. This is + * always safe because any open() calls will either succeed and obtain + * a reference before we clear the umad_port[] entries, or fail after + * we clear the umad_port[] entries. + */ + struct ib_umad_port { - int devnum; - struct cdev dev; - struct class_device class_dev; + struct cdev *dev; + struct class_device *class_dev; - int sm_devnum; - struct cdev sm_dev; - struct class_device sm_class_dev; + struct cdev *sm_dev; + struct class_device *sm_class_dev; struct semaphore sm_sem; struct ib_device *ib_dev; struct ib_umad_device *umad_dev; + int dev_num; u8 port_num; }; @@ -96,21 +117,31 @@ struct ib_umad_file { }; struct ib_umad_packet { - struct ib_ah *ah; struct ib_mad_send_buf *msg; struct list_head list; int length; - DECLARE_PCI_UNMAP_ADDR(mapping) struct ib_user_mad mad; }; +static struct class *umad_class; + static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); -static spinlock_t map_lock; + +static DEFINE_SPINLOCK(port_lock); +static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS]; static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2); static void ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device); +static void ib_umad_release_dev(struct kref *ref) +{ + struct ib_umad_device *dev = + container_of(ref, struct ib_umad_device, ref); + + kfree(dev); +} + static int queue_packet(struct ib_umad_file *file, struct ib_mad_agent *agent, struct ib_umad_packet *packet) @@ -139,22 +170,19 @@ static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *send_wc) { struct ib_umad_file *file = agent->context; - struct ib_umad_packet *timeout, *packet = - (void *) (unsigned long) send_wc->wr_id; + struct ib_umad_packet *timeout; + struct ib_umad_packet *packet = send_wc->send_buf->context[0]; - ib_destroy_ah(packet->msg->send_wr.wr.ud.ah); + ib_destroy_ah(packet->msg->ah); ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { - timeout = kmalloc(sizeof *timeout + sizeof (struct ib_mad_hdr), - GFP_KERNEL); + timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, GFP_KERNEL); if (!timeout) goto out; - memset(timeout, 0, sizeof *timeout + sizeof (struct ib_mad_hdr)); - - timeout->length = sizeof (struct ib_mad_hdr); - timeout->mad.hdr.id = packet->mad.hdr.id; + timeout->length = IB_MGMT_MAD_HDR; + timeout->mad.hdr.id = packet->mad.hdr.id; timeout->mad.hdr.status = ETIMEDOUT; memcpy(timeout->mad.data, packet->mad.data, sizeof (struct ib_mad_hdr)); @@ -177,11 +205,10 @@ static void recv_handler(struct ib_mad_agent *agent, goto out; length = mad_recv_wc->mad_len; - packet = kmalloc(sizeof *packet + length, GFP_KERNEL); + packet = kzalloc(sizeof *packet + length, GFP_KERNEL); if (!packet) goto out; - memset(packet, 0, sizeof *packet + length); packet->length = length; ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data); @@ -247,7 +274,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, else ret = -ENOSPC; } else if (copy_to_user(buf, &packet->mad, - packet->length + sizeof (struct ib_user_mad))) + packet->length + sizeof (struct ib_user_mad))) ret = -EFAULT; else ret = packet->length + sizeof (struct ib_user_mad); @@ -268,26 +295,23 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, struct ib_umad_packet *packet; struct ib_mad_agent *agent; struct ib_ah_attr ah_attr; - struct ib_send_wr *bad_wr; + struct ib_ah *ah; struct ib_rmpp_mad *rmpp_mad; u8 method; __be64 *tid; - int ret, length, hdr_len, data_len, rmpp_hdr_size; + int ret, length, hdr_len, copy_offset; int rmpp_active = 0; if (count < sizeof (struct ib_user_mad)) return -EINVAL; length = count - sizeof (struct ib_user_mad); - packet = kmalloc(sizeof *packet + sizeof(struct ib_mad_hdr) + - sizeof(struct ib_rmpp_hdr), GFP_KERNEL); + packet = kmalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); if (!packet) return -ENOMEM; if (copy_from_user(&packet->mad, buf, - sizeof (struct ib_user_mad) + - sizeof(struct ib_mad_hdr) + - sizeof(struct ib_rmpp_hdr))) { + sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) { ret = -EFAULT; goto err; } @@ -298,8 +322,6 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err; } - packet->length = length; - down_read(&file->agent_mutex); agent = file->agent[packet->mad.hdr.id]; @@ -321,9 +343,9 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; } - packet->ah = ib_create_ah(agent->qp->pd, &ah_attr); - if (IS_ERR(packet->ah)) { - ret = PTR_ERR(packet->ah); + ah = ib_create_ah(agent->qp->pd, &ah_attr); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); goto err_up; } @@ -337,64 +359,44 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, /* Validate that the management class can support RMPP */ if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) { - hdr_len = offsetof(struct ib_sa_mad, data); - data_len = length - hdr_len; + hdr_len = IB_MGMT_SA_HDR; } else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && (rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) { - hdr_len = offsetof(struct ib_vendor_mad, data); - data_len = length - hdr_len; + hdr_len = IB_MGMT_VENDOR_HDR; } else { ret = -EINVAL; goto err_ah; } rmpp_active = 1; + copy_offset = IB_MGMT_RMPP_HDR; } else { - if (length > sizeof(struct ib_mad)) { - ret = -EINVAL; - goto err_ah; - } - hdr_len = offsetof(struct ib_mad, data); - data_len = length - hdr_len; + hdr_len = IB_MGMT_MAD_HDR; + copy_offset = IB_MGMT_MAD_HDR; } packet->msg = ib_create_send_mad(agent, be32_to_cpu(packet->mad.hdr.qpn), - 0, packet->ah, rmpp_active, - hdr_len, data_len, + 0, rmpp_active, + hdr_len, length - hdr_len, GFP_KERNEL); if (IS_ERR(packet->msg)) { ret = PTR_ERR(packet->msg); goto err_ah; } - packet->msg->send_wr.wr.ud.timeout_ms = packet->mad.hdr.timeout_ms; - packet->msg->send_wr.wr.ud.retries = packet->mad.hdr.retries; - - /* Override send WR WRID initialized in ib_create_send_mad */ - packet->msg->send_wr.wr_id = (unsigned long) packet; - - if (!rmpp_active) { - /* Copy message from user into send buffer */ - if (copy_from_user(packet->msg->mad, - buf + sizeof(struct ib_user_mad), length)) { - ret = -EFAULT; - goto err_msg; - } - } else { - rmpp_hdr_size = sizeof(struct ib_mad_hdr) + - sizeof(struct ib_rmpp_hdr); + packet->msg->ah = ah; + packet->msg->timeout_ms = packet->mad.hdr.timeout_ms; + packet->msg->retries = packet->mad.hdr.retries; + packet->msg->context[0] = packet; - /* Only copy MAD headers (RMPP header in place) */ - memcpy(packet->msg->mad, packet->mad.data, - sizeof(struct ib_mad_hdr)); - - /* Now, copy rest of message from user into send buffer */ - if (copy_from_user(((struct ib_rmpp_mad *) packet->msg->mad)->data, - buf + sizeof (struct ib_user_mad) + rmpp_hdr_size, - length - rmpp_hdr_size)) { - ret = -EFAULT; - goto err_msg; - } + /* Copy MAD headers (RMPP header in place) */ + memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); + /* Now, copy rest of message from user into send buffer */ + if (copy_from_user(packet->msg->mad + copy_offset, + buf + sizeof (struct ib_user_mad) + copy_offset, + length - copy_offset)) { + ret = -EFAULT; + goto err_msg; } /* @@ -403,29 +405,29 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, * transaction ID matches the agent being used to send the * MAD. */ - method = packet->msg->mad->mad_hdr.method; + method = ((struct ib_mad_hdr *) packet->msg->mad)->method; if (!(method & IB_MGMT_METHOD_RESP) && method != IB_MGMT_METHOD_TRAP_REPRESS && method != IB_MGMT_METHOD_SEND) { - tid = &packet->msg->mad->mad_hdr.tid; + tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); } - ret = ib_post_send_mad(agent, &packet->msg->send_wr, &bad_wr); + ret = ib_post_send_mad(packet->msg, NULL); if (ret) goto err_msg; up_read(&file->agent_mutex); - return sizeof (struct ib_user_mad_hdr) + packet->length; + return count; err_msg: ib_free_send_mad(packet->msg); err_ah: - ib_destroy_ah(packet->ah); + ib_destroy_ah(ah); err_up: up_read(&file->agent_mutex); @@ -565,15 +567,23 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd, static int ib_umad_open(struct inode *inode, struct file *filp) { - struct ib_umad_port *port = - container_of(inode->i_cdev, struct ib_umad_port, dev); + struct ib_umad_port *port; struct ib_umad_file *file; - file = kmalloc(sizeof *file, GFP_KERNEL); - if (!file) - return -ENOMEM; + spin_lock(&port_lock); + port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE]; + if (port) + kref_get(&port->umad_dev->ref); + spin_unlock(&port_lock); - memset(file, 0, sizeof *file); + if (!port) + return -ENXIO; + + file = kzalloc(sizeof *file, GFP_KERNEL); + if (!file) { + kref_put(&port->umad_dev->ref, ib_umad_release_dev); + return -ENOMEM; + } spin_lock_init(&file->recv_lock); init_rwsem(&file->agent_mutex); @@ -589,6 +599,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp) static int ib_umad_close(struct inode *inode, struct file *filp) { struct ib_umad_file *file = filp->private_data; + struct ib_umad_device *dev = file->port->umad_dev; struct ib_umad_packet *packet, *tmp; int i; @@ -603,6 +614,8 @@ static int ib_umad_close(struct inode *inode, struct file *filp) kfree(file); + kref_put(&dev->ref, ib_umad_release_dev); + return 0; } @@ -619,30 +632,46 @@ static struct file_operations umad_fops = { static int ib_umad_sm_open(struct inode *inode, struct file *filp) { - struct ib_umad_port *port = - container_of(inode->i_cdev, struct ib_umad_port, sm_dev); + struct ib_umad_port *port; struct ib_port_modify props = { .set_port_cap_mask = IB_PORT_SM }; int ret; + spin_lock(&port_lock); + port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS]; + if (port) + kref_get(&port->umad_dev->ref); + spin_unlock(&port_lock); + + if (!port) + return -ENXIO; + if (filp->f_flags & O_NONBLOCK) { - if (down_trylock(&port->sm_sem)) - return -EAGAIN; + if (down_trylock(&port->sm_sem)) { + ret = -EAGAIN; + goto fail; + } } else { - if (down_interruptible(&port->sm_sem)) - return -ERESTARTSYS; + if (down_interruptible(&port->sm_sem)) { + ret = -ERESTARTSYS; + goto fail; + } } ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); if (ret) { up(&port->sm_sem); - return ret; + goto fail; } filp->private_data = port; return 0; + +fail: + kref_put(&port->umad_dev->ref, ib_umad_release_dev); + return ret; } static int ib_umad_sm_close(struct inode *inode, struct file *filp) @@ -656,6 +685,8 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); up(&port->sm_sem); + kref_put(&port->umad_dev->ref, ib_umad_release_dev); + return ret; } @@ -671,21 +702,13 @@ static struct ib_client umad_client = { .remove = ib_umad_remove_one }; -static ssize_t show_dev(struct class_device *class_dev, char *buf) -{ - struct ib_umad_port *port = class_get_devdata(class_dev); - - if (class_dev == &port->class_dev) - return print_dev_t(buf, port->dev.dev); - else - return print_dev_t(buf, port->sm_dev.dev); -} -static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL); - static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { struct ib_umad_port *port = class_get_devdata(class_dev); + if (!port) + return -ENODEV; + return sprintf(buf, "%s\n", port->ib_dev->name); } static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); @@ -694,38 +717,13 @@ static ssize_t show_port(struct class_device *class_dev, char *buf) { struct ib_umad_port *port = class_get_devdata(class_dev); + if (!port) + return -ENODEV; + return sprintf(buf, "%d\n", port->port_num); } static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL); -static void ib_umad_release_dev(struct kref *ref) -{ - struct ib_umad_device *dev = - container_of(ref, struct ib_umad_device, ref); - - kfree(dev); -} - -static void ib_umad_release_port(struct class_device *class_dev) -{ - struct ib_umad_port *port = class_get_devdata(class_dev); - - if (class_dev == &port->class_dev) { - cdev_del(&port->dev); - clear_bit(port->devnum, dev_map); - } else { - cdev_del(&port->sm_dev); - clear_bit(port->sm_devnum, dev_map); - } - - kref_put(&port->umad_dev->ref, ib_umad_release_dev); -} - -static struct class umad_class = { - .name = "infiniband_mad", - .release = ib_umad_release_port -}; - static ssize_t show_abi_version(struct class *class, char *buf) { return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION); @@ -735,91 +733,102 @@ static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); static int ib_umad_init_port(struct ib_device *device, int port_num, struct ib_umad_port *port) { - spin_lock(&map_lock); - port->devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); - if (port->devnum >= IB_UMAD_MAX_PORTS) { - spin_unlock(&map_lock); + spin_lock(&port_lock); + port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); + if (port->dev_num >= IB_UMAD_MAX_PORTS) { + spin_unlock(&port_lock); return -1; } - port->sm_devnum = find_next_zero_bit(dev_map, IB_UMAD_MAX_PORTS * 2, IB_UMAD_MAX_PORTS); - if (port->sm_devnum >= IB_UMAD_MAX_PORTS * 2) { - spin_unlock(&map_lock); - return -1; - } - set_bit(port->devnum, dev_map); - set_bit(port->sm_devnum, dev_map); - spin_unlock(&map_lock); + set_bit(port->dev_num, dev_map); + spin_unlock(&port_lock); port->ib_dev = device; port->port_num = port_num; init_MUTEX(&port->sm_sem); - cdev_init(&port->dev, &umad_fops); - port->dev.owner = THIS_MODULE; - kobject_set_name(&port->dev.kobj, "umad%d", port->devnum); - if (cdev_add(&port->dev, base_dev + port->devnum, 1)) + port->dev = cdev_alloc(); + if (!port->dev) return -1; - - port->class_dev.class = &umad_class; - port->class_dev.dev = device->dma_device; - - snprintf(port->class_dev.class_id, BUS_ID_SIZE, "umad%d", port->devnum); - - if (class_device_register(&port->class_dev)) + port->dev->owner = THIS_MODULE; + port->dev->ops = &umad_fops; + kobject_set_name(&port->dev->kobj, "umad%d", port->dev_num); + if (cdev_add(port->dev, base_dev + port->dev_num, 1)) goto err_cdev; - class_set_devdata(&port->class_dev, port); - kref_get(&port->umad_dev->ref); + port->class_dev = class_device_create(umad_class, NULL, port->dev->dev, + device->dma_device, + "umad%d", port->dev_num); + if (IS_ERR(port->class_dev)) + goto err_cdev; - if (class_device_create_file(&port->class_dev, &class_device_attr_dev)) - goto err_class; - if (class_device_create_file(&port->class_dev, &class_device_attr_ibdev)) + if (class_device_create_file(port->class_dev, &class_device_attr_ibdev)) goto err_class; - if (class_device_create_file(&port->class_dev, &class_device_attr_port)) + if (class_device_create_file(port->class_dev, &class_device_attr_port)) goto err_class; - cdev_init(&port->sm_dev, &umad_sm_fops); - port->sm_dev.owner = THIS_MODULE; - kobject_set_name(&port->dev.kobj, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS); - if (cdev_add(&port->sm_dev, base_dev + port->sm_devnum, 1)) - return -1; - - port->sm_class_dev.class = &umad_class; - port->sm_class_dev.dev = device->dma_device; - - snprintf(port->sm_class_dev.class_id, BUS_ID_SIZE, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS); + port->sm_dev = cdev_alloc(); + if (!port->sm_dev) + goto err_class; + port->sm_dev->owner = THIS_MODULE; + port->sm_dev->ops = &umad_sm_fops; + kobject_set_name(&port->dev->kobj, "issm%d", port->dev_num); + if (cdev_add(port->sm_dev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1)) + goto err_sm_cdev; - if (class_device_register(&port->sm_class_dev)) + port->sm_class_dev = class_device_create(umad_class, NULL, port->sm_dev->dev, + device->dma_device, + "issm%d", port->dev_num); + if (IS_ERR(port->sm_class_dev)) goto err_sm_cdev; - class_set_devdata(&port->sm_class_dev, port); - kref_get(&port->umad_dev->ref); + class_set_devdata(port->class_dev, port); + class_set_devdata(port->sm_class_dev, port); - if (class_device_create_file(&port->sm_class_dev, &class_device_attr_dev)) - goto err_sm_class; - if (class_device_create_file(&port->sm_class_dev, &class_device_attr_ibdev)) + if (class_device_create_file(port->sm_class_dev, &class_device_attr_ibdev)) goto err_sm_class; - if (class_device_create_file(&port->sm_class_dev, &class_device_attr_port)) + if (class_device_create_file(port->sm_class_dev, &class_device_attr_port)) goto err_sm_class; + spin_lock(&port_lock); + umad_port[port->dev_num] = port; + spin_unlock(&port_lock); + return 0; err_sm_class: - class_device_unregister(&port->sm_class_dev); + class_device_destroy(umad_class, port->sm_dev->dev); err_sm_cdev: - cdev_del(&port->sm_dev); + cdev_del(port->sm_dev); err_class: - class_device_unregister(&port->class_dev); + class_device_destroy(umad_class, port->dev->dev); err_cdev: - cdev_del(&port->dev); - clear_bit(port->devnum, dev_map); + cdev_del(port->dev); + clear_bit(port->dev_num, dev_map); return -1; } +static void ib_umad_kill_port(struct ib_umad_port *port) +{ + class_set_devdata(port->class_dev, NULL); + class_set_devdata(port->sm_class_dev, NULL); + + class_device_destroy(umad_class, port->dev->dev); + class_device_destroy(umad_class, port->sm_dev->dev); + + cdev_del(port->dev); + cdev_del(port->sm_dev); + + spin_lock(&port_lock); + umad_port[port->dev_num] = NULL; + spin_unlock(&port_lock); + + clear_bit(port->dev_num, dev_map); +} + static void ib_umad_add_one(struct ib_device *device) { struct ib_umad_device *umad_dev; @@ -832,15 +841,12 @@ static void ib_umad_add_one(struct ib_device *device) e = device->phys_port_cnt; } - umad_dev = kmalloc(sizeof *umad_dev + + umad_dev = kzalloc(sizeof *umad_dev + (e - s + 1) * sizeof (struct ib_umad_port), GFP_KERNEL); if (!umad_dev) return; - memset(umad_dev, 0, sizeof *umad_dev + - (e - s + 1) * sizeof (struct ib_umad_port)); - kref_init(&umad_dev->ref); umad_dev->start_port = s; @@ -858,10 +864,8 @@ static void ib_umad_add_one(struct ib_device *device) return; err: - while (--i >= s) { - class_device_unregister(&umad_dev->port[i - s].class_dev); - class_device_unregister(&umad_dev->port[i - s].sm_class_dev); - } + while (--i >= s) + ib_umad_kill_port(&umad_dev->port[i]); kref_put(&umad_dev->ref, ib_umad_release_dev); } @@ -874,10 +878,8 @@ static void ib_umad_remove_one(struct ib_device *device) if (!umad_dev) return; - for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) { - class_device_unregister(&umad_dev->port[i].class_dev); - class_device_unregister(&umad_dev->port[i].sm_class_dev); - } + for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) + ib_umad_kill_port(&umad_dev->port[i]); kref_put(&umad_dev->ref, ib_umad_release_dev); } @@ -886,8 +888,6 @@ static int __init ib_umad_init(void) { int ret; - spin_lock_init(&map_lock); - ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2, "infiniband_mad"); if (ret) { @@ -895,13 +895,14 @@ static int __init ib_umad_init(void) goto out; } - ret = class_register(&umad_class); - if (ret) { + umad_class = class_create(THIS_MODULE, "infiniband_mad"); + if (IS_ERR(umad_class)) { + ret = PTR_ERR(umad_class); printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n"); goto out_chrdev; } - ret = class_create_file(&umad_class, &class_attr_abi_version); + ret = class_create_file(umad_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n"); goto out_class; @@ -916,7 +917,7 @@ static int __init ib_umad_init(void) return 0; out_class: - class_unregister(&umad_class); + class_destroy(umad_class); out_chrdev: unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); @@ -928,7 +929,7 @@ out: static void __exit ib_umad_cleanup(void) { ib_unregister_client(&umad_client); - class_unregister(&umad_class); + class_destroy(umad_class); unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); } diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index cc12434..031cdf3 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -3,6 +3,7 @@ * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -38,29 +39,47 @@ #ifndef UVERBS_H #define UVERBS_H -/* Include device.h and fs.h until cdev.h is self-sufficient */ -#include <linux/fs.h> -#include <linux/device.h> -#include <linux/cdev.h> #include <linux/kref.h> #include <linux/idr.h> #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> +/* + * Our lifetime rules for these structs are the following: + * + * struct ib_uverbs_device: One reference is held by the module and + * released in ib_uverbs_remove_one(). Another reference is taken by + * ib_uverbs_open() each time the character special file is opened, + * and released in ib_uverbs_release_file() when the file is released. + * + * struct ib_uverbs_file: One reference is held by the VFS and + * released when the file is closed. Another reference is taken when + * an asynchronous event queue file is created and released when the + * event file is closed. + * + * struct ib_uverbs_event_file: One reference is held by the VFS and + * released when the file is closed. For asynchronous event files, + * another reference is held by the corresponding main context file + * and released when that file is closed. For completion event files, + * a reference is taken when a CQ is created that uses the file, and + * released when the CQ is destroyed. + */ + struct ib_uverbs_device { + struct kref ref; int devnum; - struct cdev dev; - struct class_device class_dev; + struct cdev *dev; + struct class_device *class_dev; struct ib_device *ib_dev; - int num_comp; + int num_comp_vectors; }; struct ib_uverbs_event_file { struct kref ref; + struct file *file; struct ib_uverbs_file *uverbs_file; spinlock_t lock; - int fd; int is_async; wait_queue_head_t poll_wait; struct fasync_struct *async_queue; @@ -73,8 +92,7 @@ struct ib_uverbs_file { struct ib_uverbs_device *device; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; - struct ib_uverbs_event_file async_file; - struct ib_uverbs_event_file comp_file[1]; + struct ib_uverbs_event_file *async_file; }; struct ib_uverbs_event { @@ -110,10 +128,23 @@ extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; +struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, + int is_async, int *fd); +void ib_uverbs_release_event_file(struct kref *ref); +struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); + +void ib_uverbs_release_ucq(struct ib_uverbs_file *file, + struct ib_uverbs_event_file *ev_file, + struct ib_ucq_object *uobj); +void ib_uverbs_release_uevent(struct ib_uverbs_file *file, + struct ib_uevent_object *uobj); + void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event); int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, void *addr, size_t size, int write); @@ -125,21 +156,26 @@ void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem); const char __user *buf, int in_len, \ int out_len) -IB_UVERBS_DECLARE_CMD(query_params); IB_UVERBS_DECLARE_CMD(get_context); IB_UVERBS_DECLARE_CMD(query_device); IB_UVERBS_DECLARE_CMD(query_port); -IB_UVERBS_DECLARE_CMD(query_gid); -IB_UVERBS_DECLARE_CMD(query_pkey); IB_UVERBS_DECLARE_CMD(alloc_pd); IB_UVERBS_DECLARE_CMD(dealloc_pd); IB_UVERBS_DECLARE_CMD(reg_mr); IB_UVERBS_DECLARE_CMD(dereg_mr); +IB_UVERBS_DECLARE_CMD(create_comp_channel); IB_UVERBS_DECLARE_CMD(create_cq); +IB_UVERBS_DECLARE_CMD(poll_cq); +IB_UVERBS_DECLARE_CMD(req_notify_cq); IB_UVERBS_DECLARE_CMD(destroy_cq); IB_UVERBS_DECLARE_CMD(create_qp); IB_UVERBS_DECLARE_CMD(modify_qp); IB_UVERBS_DECLARE_CMD(destroy_qp); +IB_UVERBS_DECLARE_CMD(post_send); +IB_UVERBS_DECLARE_CMD(post_recv); +IB_UVERBS_DECLARE_CMD(post_srq_recv); +IB_UVERBS_DECLARE_CMD(create_ah); +IB_UVERBS_DECLARE_CMD(destroy_ah); IB_UVERBS_DECLARE_CMD(attach_mcast); IB_UVERBS_DECLARE_CMD(detach_mcast); IB_UVERBS_DECLARE_CMD(create_srq); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 5624451..8c89abc 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -33,6 +34,9 @@ * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $ */ +#include <linux/file.h> +#include <linux/fs.h> + #include <asm/uaccess.h> #include "uverbs.h" @@ -45,29 +49,6 @@ (udata)->outlen = (olen); \ } while (0) -ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_params cmd; - struct ib_uverbs_query_params_resp resp; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - - resp.num_cq_events = file->device->num_comp; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -77,7 +58,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_udata udata; struct ib_device *ibdev = file->device->ib_dev; struct ib_ucontext *ucontext; - int i; + struct file *filp; int ret; if (out_len < sizeof resp) @@ -110,26 +91,42 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); - resp.async_fd = file->async_file.fd; - for (i = 0; i < file->device->num_comp; ++i) - if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab + - i * sizeof (__u32), - &file->comp_file[i].fd, sizeof (__u32))) { - ret = -EFAULT; - goto err_free; - } + resp.num_comp_vectors = file->device->num_comp_vectors; + + filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd); + if (IS_ERR(filp)) { + ret = PTR_ERR(filp); + goto err_free; + } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_free; + goto err_file; } + file->async_file = filp->private_data; + + INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev, + ib_uverbs_event_handler); + ret = ib_register_event_handler(&file->event_handler); + if (ret) + goto err_file; + + kref_get(&file->async_file->ref); + kref_get(&file->ref); file->ucontext = ucontext; + + fd_install(resp.async_fd, filp); + up(&file->mutex); return in_len; +err_file: + put_unused_fd(resp.async_fd); + fput(filp); + err_free: ibdev->dealloc_ucontext(ucontext); @@ -255,62 +252,6 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, return in_len; } -ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_gid cmd; - struct ib_uverbs_query_gid_resp resp; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - - ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index, - (union ib_gid *) resp.gid); - if (ret) - return ret; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - -ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_pkey cmd; - struct ib_uverbs_query_pkey_resp resp; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - - ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index, - &resp.pkey); - if (ret) - return ret; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -349,24 +290,20 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, pd->uobject = uobj; atomic_set(&pd->usecnt, 0); + down(&ib_uverbs_idr_mutex); + retry: if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) { ret = -ENOMEM; - goto err_pd; + goto err_up; } - down(&ib_uverbs_idr_mutex); ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id); - up(&ib_uverbs_idr_mutex); if (ret == -EAGAIN) goto retry; if (ret) - goto err_pd; - - down(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->pd_list); - up(&file->mutex); + goto err_up; memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; @@ -374,21 +311,22 @@ retry: if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } - return in_len; - -err_list: - down(&file->mutex); - list_del(&uobj->list); + down(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->pd_list); up(&file->mutex); - down(&ib_uverbs_idr_mutex); - idr_remove(&ib_uverbs_pd_idr, uobj->id); up(&ib_uverbs_idr_mutex); -err_pd: + return in_len; + +err_idr: + idr_remove(&ib_uverbs_pd_idr, uobj->id); + +err_up: + up(&ib_uverbs_idr_mutex); ib_dealloc_pd(pd); err: @@ -459,6 +397,14 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; + /* + * Local write permission is required if remote write or + * remote atomic permission is also requested. + */ + if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && + !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) + return -EINVAL; + obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; @@ -524,24 +470,22 @@ retry: resp.mr_handle = obj->uobject.id; - down(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); - up(&file->mutex); - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } + down(&file->mutex); + list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); + up(&file->mutex); + up(&ib_uverbs_idr_mutex); return in_len; -err_list: - down(&file->mutex); - list_del(&obj->uobject.list); - up(&file->mutex); +err_idr: + idr_remove(&ib_uverbs_mr_idr, obj->uobject.id); err_unreg: ib_dereg_mr(mr); @@ -595,6 +539,35 @@ out: return ret ? ret : in_len; } +ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_comp_channel cmd; + struct ib_uverbs_create_comp_channel_resp resp; + struct file *filp; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd); + if (IS_ERR(filp)) + return PTR_ERR(filp); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + put_unused_fd(resp.fd); + fput(filp); + return -EFAULT; + } + + fd_install(resp.fd, filp); + return in_len; +} + ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -603,6 +576,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, struct ib_uverbs_create_cq_resp resp; struct ib_udata udata; struct ib_ucq_object *uobj; + struct ib_uverbs_event_file *ev_file = NULL; struct ib_cq *cq; int ret; @@ -616,9 +590,12 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - if (cmd.event_handler >= file->device->num_comp) + if (cmd.comp_vector >= file->device->num_comp_vectors) return -EINVAL; + if (cmd.comp_channel >= 0) + ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; @@ -641,27 +618,23 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, cq->uobject = &uobj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; - cq->cq_context = file; + cq->cq_context = ev_file; atomic_set(&cq->usecnt, 0); + down(&ib_uverbs_idr_mutex); + retry: if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) { ret = -ENOMEM; - goto err_cq; + goto err_up; } - down(&ib_uverbs_idr_mutex); ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id); - up(&ib_uverbs_idr_mutex); if (ret == -EAGAIN) goto retry; if (ret) - goto err_cq; - - down(&file->mutex); - list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list); - up(&file->mutex); + goto err_up; memset(&resp, 0, sizeof resp); resp.cq_handle = uobj->uobject.id; @@ -670,21 +643,22 @@ retry: if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } - return in_len; - -err_list: - down(&file->mutex); - list_del(&uobj->uobject.list); + down(&file->mutex); + list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list); up(&file->mutex); - down(&ib_uverbs_idr_mutex); - idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id); up(&ib_uverbs_idr_mutex); -err_cq: + return in_len; + +err_idr: + idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id); + +err_up: + up(&ib_uverbs_idr_mutex); ib_destroy_cq(cq); err: @@ -692,6 +666,93 @@ err: return ret; } +ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_poll_cq cmd; + struct ib_uverbs_poll_cq_resp *resp; + struct ib_cq *cq; + struct ib_wc *wc; + int ret = 0; + int i; + int rsize; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL); + if (!wc) + return -ENOMEM; + + rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc); + resp = kmalloc(rsize, GFP_KERNEL); + if (!resp) { + ret = -ENOMEM; + goto out_wc; + } + + down(&ib_uverbs_idr_mutex); + cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); + if (!cq || cq->uobject->context != file->ucontext) { + ret = -EINVAL; + goto out; + } + + resp->count = ib_poll_cq(cq, cmd.ne, wc); + + for (i = 0; i < resp->count; i++) { + resp->wc[i].wr_id = wc[i].wr_id; + resp->wc[i].status = wc[i].status; + resp->wc[i].opcode = wc[i].opcode; + resp->wc[i].vendor_err = wc[i].vendor_err; + resp->wc[i].byte_len = wc[i].byte_len; + resp->wc[i].imm_data = wc[i].imm_data; + resp->wc[i].qp_num = wc[i].qp_num; + resp->wc[i].src_qp = wc[i].src_qp; + resp->wc[i].wc_flags = wc[i].wc_flags; + resp->wc[i].pkey_index = wc[i].pkey_index; + resp->wc[i].slid = wc[i].slid; + resp->wc[i].sl = wc[i].sl; + resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits; + resp->wc[i].port_num = wc[i].port_num; + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + kfree(resp); + +out_wc: + kfree(wc); + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_req_notify_cq cmd; + struct ib_cq *cq; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); + if (cq && cq->uobject->context == file->ucontext) { + ib_req_notify_cq(cq, cmd.solicited_only ? + IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); + ret = in_len; + } + up(&ib_uverbs_idr_mutex); + + return ret; +} + ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -700,7 +761,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, struct ib_uverbs_destroy_cq_resp resp; struct ib_cq *cq; struct ib_ucq_object *uobj; - struct ib_uverbs_event *evt, *tmp; + struct ib_uverbs_event_file *ev_file; u64 user_handle; int ret = -EINVAL; @@ -716,7 +777,8 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, goto out; user_handle = cq->uobject->user_handle; - uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); + uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); + ev_file = cq->cq_context; ret = ib_destroy_cq(cq); if (ret) @@ -728,19 +790,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - spin_lock_irq(&file->comp_file[0].lock); - list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->comp_file[0].lock); - - spin_lock_irq(&file->async_file.lock); - list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->async_file.lock); + ib_uverbs_release_ucq(file, ev_file, uobj); resp.comp_events_reported = uobj->comp_events_reported; resp.async_events_reported = uobj->async_events_reported; @@ -859,24 +909,22 @@ retry: resp.qp_handle = uobj->uobject.id; - down(&file->mutex); - list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list); - up(&file->mutex); - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } + down(&file->mutex); + list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list); + up(&file->mutex); + up(&ib_uverbs_idr_mutex); return in_len; -err_list: - down(&file->mutex); - list_del(&uobj->uobject.list); - up(&file->mutex); +err_idr: + idr_remove(&ib_uverbs_qp_idr, uobj->uobject.id); err_destroy: ib_destroy_qp(qp); @@ -979,7 +1027,6 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, struct ib_uverbs_destroy_qp_resp resp; struct ib_qp *qp; struct ib_uevent_object *uobj; - struct ib_uverbs_event *evt, *tmp; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -1005,12 +1052,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - spin_lock_irq(&file->async_file.lock); - list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->async_file.lock); + ib_uverbs_release_uevent(file, uobj); resp.events_reported = uobj->events_reported; @@ -1026,6 +1068,468 @@ out: return ret ? ret : in_len; } +ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_send cmd; + struct ib_uverbs_post_send_resp resp; + struct ib_uverbs_send_wr *user_wr; + struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; + struct ib_qp *qp; + int i, sg_ind; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count + + cmd.sge_count * sizeof (struct ib_uverbs_sge)) + return -EINVAL; + + if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr)) + return -EINVAL; + + user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL); + if (!user_wr) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + sg_ind = 0; + last = NULL; + for (i = 0; i < cmd.wr_count; ++i) { + if (copy_from_user(user_wr, + buf + sizeof cmd + i * cmd.wqe_size, + cmd.wqe_size)) { + ret = -EFAULT; + goto out; + } + + if (user_wr->num_sge + sg_ind > cmd.sge_count) { + ret = -EINVAL; + goto out; + } + + next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + + user_wr->num_sge * sizeof (struct ib_sge), + GFP_KERNEL); + if (!next) { + ret = -ENOMEM; + goto out; + } + + if (!last) + wr = next; + else + last->next = next; + last = next; + + next->next = NULL; + next->wr_id = user_wr->wr_id; + next->num_sge = user_wr->num_sge; + next->opcode = user_wr->opcode; + next->send_flags = user_wr->send_flags; + next->imm_data = user_wr->imm_data; + + if (qp->qp_type == IB_QPT_UD) { + next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr, + user_wr->wr.ud.ah); + if (!next->wr.ud.ah) { + ret = -EINVAL; + goto out; + } + next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; + next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; + } else { + switch (next->opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + case IB_WR_RDMA_READ: + next->wr.rdma.remote_addr = + user_wr->wr.rdma.remote_addr; + next->wr.rdma.rkey = + user_wr->wr.rdma.rkey; + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + next->wr.atomic.remote_addr = + user_wr->wr.atomic.remote_addr; + next->wr.atomic.compare_add = + user_wr->wr.atomic.compare_add; + next->wr.atomic.swap = user_wr->wr.atomic.swap; + next->wr.atomic.rkey = user_wr->wr.atomic.rkey; + break; + default: + break; + } + } + + if (next->num_sge) { + next->sg_list = (void *) next + + ALIGN(sizeof *next, sizeof (struct ib_sge)); + if (copy_from_user(next->sg_list, + buf + sizeof cmd + + cmd.wr_count * cmd.wqe_size + + sg_ind * sizeof (struct ib_sge), + next->num_sge * sizeof (struct ib_sge))) { + ret = -EFAULT; + goto out; + } + sg_ind += next->num_sge; + } else + next->sg_list = NULL; + } + + resp.bad_wr = 0; + ret = qp->device->post_send(qp, wr, &bad_wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + kfree(user_wr); + + return ret ? ret : in_len; +} + +static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, + int in_len, + u32 wr_count, + u32 sge_count, + u32 wqe_size) +{ + struct ib_uverbs_recv_wr *user_wr; + struct ib_recv_wr *wr = NULL, *last, *next; + int sg_ind; + int i; + int ret; + + if (in_len < wqe_size * wr_count + + sge_count * sizeof (struct ib_uverbs_sge)) + return ERR_PTR(-EINVAL); + + if (wqe_size < sizeof (struct ib_uverbs_recv_wr)) + return ERR_PTR(-EINVAL); + + user_wr = kmalloc(wqe_size, GFP_KERNEL); + if (!user_wr) + return ERR_PTR(-ENOMEM); + + sg_ind = 0; + last = NULL; + for (i = 0; i < wr_count; ++i) { + if (copy_from_user(user_wr, buf + i * wqe_size, + wqe_size)) { + ret = -EFAULT; + goto err; + } + + if (user_wr->num_sge + sg_ind > sge_count) { + ret = -EINVAL; + goto err; + } + + next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + + user_wr->num_sge * sizeof (struct ib_sge), + GFP_KERNEL); + if (!next) { + ret = -ENOMEM; + goto err; + } + + if (!last) + wr = next; + else + last->next = next; + last = next; + + next->next = NULL; + next->wr_id = user_wr->wr_id; + next->num_sge = user_wr->num_sge; + + if (next->num_sge) { + next->sg_list = (void *) next + + ALIGN(sizeof *next, sizeof (struct ib_sge)); + if (copy_from_user(next->sg_list, + buf + wr_count * wqe_size + + sg_ind * sizeof (struct ib_sge), + next->num_sge * sizeof (struct ib_sge))) { + ret = -EFAULT; + goto err; + } + sg_ind += next->num_sge; + } else + next->sg_list = NULL; + } + + kfree(user_wr); + return wr; + +err: + kfree(user_wr); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ERR_PTR(ret); +} + +ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_recv cmd; + struct ib_uverbs_post_recv_resp resp; + struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_qp *qp; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, + in_len - sizeof cmd, cmd.wr_count, + cmd.sge_count, cmd.wqe_size); + if (IS_ERR(wr)) + return PTR_ERR(wr); + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + resp.bad_wr = 0; + ret = qp->device->post_recv(qp, wr, &bad_wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_srq_recv cmd; + struct ib_uverbs_post_srq_recv_resp resp; + struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_srq *srq; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, + in_len - sizeof cmd, cmd.wr_count, + cmd.sge_count, cmd.wqe_size); + if (IS_ERR(wr)) + return PTR_ERR(wr); + + down(&ib_uverbs_idr_mutex); + + srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); + if (!srq || srq->uobject->context != file->ucontext) + goto out; + + resp.bad_wr = 0; + ret = srq->device->post_srq_recv(srq, wr, &bad_wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_ah cmd; + struct ib_uverbs_create_ah_resp resp; + struct ib_uobject *uobj; + struct ib_pd *pd; + struct ib_ah *ah; + struct ib_ah_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + if (!pd || pd->uobject->context != file->ucontext) { + ret = -EINVAL; + goto err_up; + } + + uobj->user_handle = cmd.user_handle; + uobj->context = file->ucontext; + + attr.dlid = cmd.attr.dlid; + attr.sl = cmd.attr.sl; + attr.src_path_bits = cmd.attr.src_path_bits; + attr.static_rate = cmd.attr.static_rate; + attr.port_num = cmd.attr.port_num; + attr.grh.flow_label = cmd.attr.grh.flow_label; + attr.grh.sgid_index = cmd.attr.grh.sgid_index; + attr.grh.hop_limit = cmd.attr.grh.hop_limit; + attr.grh.traffic_class = cmd.attr.grh.traffic_class; + memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); + + ah = ib_create_ah(pd, &attr); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); + goto err_up; + } + + ah->uobject = uobj; + +retry: + if (!idr_pre_get(&ib_uverbs_ah_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_destroy; + } + + ret = idr_get_new(&ib_uverbs_ah_idr, ah, &uobj->id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_destroy; + + resp.ah_handle = uobj->id; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_idr; + } + + down(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->ah_list); + up(&file->mutex); + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_idr: + idr_remove(&ib_uverbs_ah_idr, uobj->id); + +err_destroy: + ib_destroy_ah(ah); + +err_up: + up(&ib_uverbs_idr_mutex); + + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, + const char __user *buf, int in_len, int out_len) +{ + struct ib_uverbs_destroy_ah cmd; + struct ib_ah *ah; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + ah = idr_find(&ib_uverbs_ah_idr, cmd.ah_handle); + if (!ah || ah->uobject->context != file->ucontext) + goto out; + + uobj = ah->uobject; + + ret = ib_destroy_ah(ah); + if (ret) + goto out; + + idr_remove(&ib_uverbs_ah_idr, cmd.ah_handle); + + down(&file->mutex); + list_del(&uobj->list); + up(&file->mutex); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -1148,24 +1652,22 @@ retry: resp.srq_handle = uobj->uobject.id; - down(&file->mutex); - list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list); - up(&file->mutex); - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_list; + goto err_idr; } + down(&file->mutex); + list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list); + up(&file->mutex); + up(&ib_uverbs_idr_mutex); return in_len; -err_list: - down(&file->mutex); - list_del(&uobj->uobject.list); - up(&file->mutex); +err_idr: + idr_remove(&ib_uverbs_srq_idr, uobj->uobject.id); err_destroy: ib_destroy_srq(srq); @@ -1217,7 +1719,6 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, struct ib_uverbs_destroy_srq_resp resp; struct ib_srq *srq; struct ib_uevent_object *uobj; - struct ib_uverbs_event *evt, *tmp; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -1243,12 +1744,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - spin_lock_irq(&file->async_file.lock); - list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { - list_del(&evt->list); - kfree(evt); - } - spin_unlock_irq(&file->async_file.lock); + ib_uverbs_release_uevent(file, uobj); resp.events_reported = uobj->events_reported; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 1251180..0eb38f4 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -3,6 +3,7 @@ * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -43,6 +44,7 @@ #include <linux/poll.h> #include <linux/file.h> #include <linux/mount.h> +#include <linux/cdev.h> #include <asm/uaccess.h> @@ -62,6 +64,8 @@ enum { #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) +static struct class *uverbs_class; + DECLARE_MUTEX(ib_uverbs_idr_mutex); DEFINE_IDR(ib_uverbs_pd_idr); DEFINE_IDR(ib_uverbs_mr_idr); @@ -72,31 +76,37 @@ DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); static spinlock_t map_lock; +static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES]; static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) = { - [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params, - [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, - [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, - [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, - [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid, - [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey, - [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, - [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, - [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, - [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, - [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, - [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, - [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, - [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, - [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, - [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, - [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, - [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, - [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, - [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, + [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, + [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, + [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, + [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, + [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, + [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, + [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, + [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, + [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, + [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, + [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, + [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, + [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, + [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, + [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, + [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, + [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, + [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, + [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, + [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, + [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, + [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, + [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, + [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, + [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, }; static struct vfsmount *uverbs_event_mnt; @@ -104,7 +114,54 @@ static struct vfsmount *uverbs_event_mnt; static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device); -static int ib_dealloc_ucontext(struct ib_ucontext *context) +static void ib_uverbs_release_dev(struct kref *ref) +{ + struct ib_uverbs_device *dev = + container_of(ref, struct ib_uverbs_device, ref); + + kfree(dev); +} + +void ib_uverbs_release_ucq(struct ib_uverbs_file *file, + struct ib_uverbs_event_file *ev_file, + struct ib_ucq_object *uobj) +{ + struct ib_uverbs_event *evt, *tmp; + + if (ev_file) { + spin_lock_irq(&ev_file->lock); + list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&ev_file->lock); + + kref_put(&ev_file->ref, ib_uverbs_release_event_file); + } + + spin_lock_irq(&file->async_file->lock); + list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&file->async_file->lock); +} + +void ib_uverbs_release_uevent(struct ib_uverbs_file *file, + struct ib_uevent_object *uobj) +{ + struct ib_uverbs_event *evt, *tmp; + + spin_lock_irq(&file->async_file->lock); + list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&file->async_file->lock); +} + +static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, + struct ib_ucontext *context) { struct ib_uobject *uobj, *tmp; @@ -113,30 +170,46 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context) down(&ib_uverbs_idr_mutex); - /* XXX Free AHs */ + list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { + struct ib_ah *ah = idr_find(&ib_uverbs_ah_idr, uobj->id); + idr_remove(&ib_uverbs_ah_idr, uobj->id); + ib_destroy_ah(ah); + list_del(&uobj->list); + kfree(uobj); + } list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id); + struct ib_uevent_object *uevent = + container_of(uobj, struct ib_uevent_object, uobject); idr_remove(&ib_uverbs_qp_idr, uobj->id); ib_destroy_qp(qp); list_del(&uobj->list); - kfree(container_of(uobj, struct ib_uevent_object, uobject)); + ib_uverbs_release_uevent(file, uevent); + kfree(uevent); } list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id); + struct ib_uverbs_event_file *ev_file = cq->cq_context; + struct ib_ucq_object *ucq = + container_of(uobj, struct ib_ucq_object, uobject); idr_remove(&ib_uverbs_cq_idr, uobj->id); ib_destroy_cq(cq); list_del(&uobj->list); - kfree(container_of(uobj, struct ib_ucq_object, uobject)); + ib_uverbs_release_ucq(file, ev_file, ucq); + kfree(ucq); } list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { struct ib_srq *srq = idr_find(&ib_uverbs_srq_idr, uobj->id); + struct ib_uevent_object *uevent = + container_of(uobj, struct ib_uevent_object, uobject); idr_remove(&ib_uverbs_srq_idr, uobj->id); ib_destroy_srq(srq); list_del(&uobj->list); - kfree(container_of(uobj, struct ib_uevent_object, uobject)); + ib_uverbs_release_uevent(file, uevent); + kfree(uevent); } /* XXX Free MWs */ @@ -175,6 +248,8 @@ static void ib_uverbs_release_file(struct kref *ref) container_of(ref, struct ib_uverbs_file, ref); module_put(file->device->ib_dev->owner); + kref_put(&file->device->ref, ib_uverbs_release_dev); + kfree(file); } @@ -188,25 +263,19 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, spin_lock_irq(&file->lock); - while (list_empty(&file->event_list) && file->fd >= 0) { + while (list_empty(&file->event_list)) { spin_unlock_irq(&file->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, - !list_empty(&file->event_list) || - file->fd < 0)) + !list_empty(&file->event_list))) return -ERESTARTSYS; spin_lock_irq(&file->lock); } - if (file->fd < 0) { - spin_unlock_irq(&file->lock); - return -ENODEV; - } - event = list_entry(file->event_list.next, struct ib_uverbs_event, list); if (file->is_async) @@ -248,26 +317,19 @@ static unsigned int ib_uverbs_event_poll(struct file *filp, poll_wait(filp, &file->poll_wait, wait); spin_lock_irq(&file->lock); - if (file->fd < 0) - pollflags = POLLERR; - else if (!list_empty(&file->event_list)) + if (!list_empty(&file->event_list)) pollflags = POLLIN | POLLRDNORM; spin_unlock_irq(&file->lock); return pollflags; } -static void ib_uverbs_event_release(struct ib_uverbs_event_file *file) +void ib_uverbs_release_event_file(struct kref *ref) { - struct ib_uverbs_event *entry, *tmp; + struct ib_uverbs_event_file *file = + container_of(ref, struct ib_uverbs_event_file, ref); - spin_lock_irq(&file->lock); - if (file->fd != -1) { - file->fd = -1; - list_for_each_entry_safe(entry, tmp, &file->event_list, list) - kfree(entry); - } - spin_unlock_irq(&file->lock); + kfree(file); } static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) @@ -280,21 +342,30 @@ static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) static int ib_uverbs_event_close(struct inode *inode, struct file *filp) { struct ib_uverbs_event_file *file = filp->private_data; + struct ib_uverbs_event *entry, *tmp; + + spin_lock_irq(&file->lock); + file->file = NULL; + list_for_each_entry_safe(entry, tmp, &file->event_list, list) { + if (entry->counter) + list_del(&entry->obj_list); + kfree(entry); + } + spin_unlock_irq(&file->lock); - ib_uverbs_event_release(file); ib_uverbs_event_fasync(-1, filp, 0); - kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); + + if (file->is_async) { + ib_unregister_event_handler(&file->uverbs_file->event_handler); + kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); + } + kref_put(&file->ref, ib_uverbs_release_event_file); return 0; } static struct file_operations uverbs_event_fops = { - /* - * No .owner field since we artificially create event files, - * so there is no increment to the module reference count in - * the open path. All event files come from a uverbs command - * file, which already takes a module reference, so this is OK. - */ + .owner = THIS_MODULE, .read = ib_uverbs_event_read, .poll = ib_uverbs_event_poll, .release = ib_uverbs_event_close, @@ -303,27 +374,37 @@ static struct file_operations uverbs_event_fops = { void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { - struct ib_uverbs_file *file = cq_context; - struct ib_ucq_object *uobj; - struct ib_uverbs_event *entry; - unsigned long flags; + struct ib_uverbs_event_file *file = cq_context; + struct ib_ucq_object *uobj; + struct ib_uverbs_event *entry; + unsigned long flags; + + if (!file) + return; + + spin_lock_irqsave(&file->lock, flags); + if (!file->file) { + spin_unlock_irqrestore(&file->lock, flags); + return; + } entry = kmalloc(sizeof *entry, GFP_ATOMIC); - if (!entry) + if (!entry) { + spin_unlock_irqrestore(&file->lock, flags); return; + } uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); entry->desc.comp.cq_handle = cq->uobject->user_handle; entry->counter = &uobj->comp_events_reported; - spin_lock_irqsave(&file->comp_file[0].lock, flags); - list_add_tail(&entry->list, &file->comp_file[0].event_list); + list_add_tail(&entry->list, &file->event_list); list_add_tail(&entry->obj_list, &uobj->comp_list); - spin_unlock_irqrestore(&file->comp_file[0].lock, flags); + spin_unlock_irqrestore(&file->lock, flags); - wake_up_interruptible(&file->comp_file[0].poll_wait); - kill_fasync(&file->comp_file[0].async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&file->poll_wait); + kill_fasync(&file->async_queue, SIGIO, POLL_IN); } static void ib_uverbs_async_handler(struct ib_uverbs_file *file, @@ -334,32 +415,40 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, struct ib_uverbs_event *entry; unsigned long flags; + spin_lock_irqsave(&file->async_file->lock, flags); + if (!file->async_file->file) { + spin_unlock_irqrestore(&file->async_file->lock, flags); + return; + } + entry = kmalloc(sizeof *entry, GFP_ATOMIC); - if (!entry) + if (!entry) { + spin_unlock_irqrestore(&file->async_file->lock, flags); return; + } entry->desc.async.element = element; entry->desc.async.event_type = event; entry->counter = counter; - spin_lock_irqsave(&file->async_file.lock, flags); - list_add_tail(&entry->list, &file->async_file.event_list); + list_add_tail(&entry->list, &file->async_file->event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); - spin_unlock_irqrestore(&file->async_file.lock, flags); + spin_unlock_irqrestore(&file->async_file->lock, flags); - wake_up_interruptible(&file->async_file.poll_wait); - kill_fasync(&file->async_file.async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&file->async_file->poll_wait); + kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); } void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) { + struct ib_uverbs_event_file *ev_file = context_ptr; struct ib_ucq_object *uobj; uobj = container_of(event->element.cq->uobject, struct ib_ucq_object, uobject); - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, + ib_uverbs_async_handler(ev_file->uverbs_file, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); @@ -389,8 +478,8 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) &uobj->events_reported); } -static void ib_uverbs_event_handler(struct ib_event_handler *handler, - struct ib_event *event) +void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event) { struct ib_uverbs_file *file = container_of(handler, struct ib_uverbs_file, event_handler); @@ -399,38 +488,90 @@ static void ib_uverbs_event_handler(struct ib_event_handler *handler, NULL, NULL); } -static int ib_uverbs_event_init(struct ib_uverbs_event_file *file, - struct ib_uverbs_file *uverbs_file) +struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, + int is_async, int *fd) { + struct ib_uverbs_event_file *ev_file; struct file *filp; + int ret; - spin_lock_init(&file->lock); - INIT_LIST_HEAD(&file->event_list); - init_waitqueue_head(&file->poll_wait); - file->uverbs_file = uverbs_file; - file->async_queue = NULL; - - file->fd = get_unused_fd(); - if (file->fd < 0) - return file->fd; + ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL); + if (!ev_file) + return ERR_PTR(-ENOMEM); + + kref_init(&ev_file->ref); + spin_lock_init(&ev_file->lock); + INIT_LIST_HEAD(&ev_file->event_list); + init_waitqueue_head(&ev_file->poll_wait); + ev_file->uverbs_file = uverbs_file; + ev_file->async_queue = NULL; + ev_file->is_async = is_async; + + *fd = get_unused_fd(); + if (*fd < 0) { + ret = *fd; + goto err; + } filp = get_empty_filp(); if (!filp) { - put_unused_fd(file->fd); - return -ENFILE; + ret = -ENFILE; + goto err_fd; } - filp->f_op = &uverbs_event_fops; + ev_file->file = filp; + + /* + * fops_get() can't fail here, because we're coming from a + * system call on a uverbs file, which will already have a + * module reference. + */ + filp->f_op = fops_get(&uverbs_event_fops); filp->f_vfsmnt = mntget(uverbs_event_mnt); filp->f_dentry = dget(uverbs_event_mnt->mnt_root); filp->f_mapping = filp->f_dentry->d_inode->i_mapping; filp->f_flags = O_RDONLY; filp->f_mode = FMODE_READ; - filp->private_data = file; + filp->private_data = ev_file; - fd_install(file->fd, filp); + return filp; - return 0; +err_fd: + put_unused_fd(*fd); + +err: + kfree(ev_file); + return ERR_PTR(ret); +} + +/* + * Look up a completion event file by FD. If lookup is successful, + * takes a ref to the event file struct that it returns; if + * unsuccessful, returns NULL. + */ +struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) +{ + struct ib_uverbs_event_file *ev_file = NULL; + struct file *filp; + + filp = fget(fd); + if (!filp) + return NULL; + + if (filp->f_op != &uverbs_event_fops) + goto out; + + ev_file = filp->private_data; + if (ev_file->is_async) { + ev_file = NULL; + goto out; + } + + kref_get(&ev_file->ref); + +out: + fput(filp); + return ev_file; } static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, @@ -450,11 +591,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (hdr.command < 0 || hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || - !uverbs_cmd_table[hdr.command]) + !uverbs_cmd_table[hdr.command] || + !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) return -EINVAL; - if (!file->ucontext && - hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS && + if (!file->ucontext && hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) return -EINVAL; @@ -474,84 +615,57 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) static int ib_uverbs_open(struct inode *inode, struct file *filp) { - struct ib_uverbs_device *dev = - container_of(inode->i_cdev, struct ib_uverbs_device, dev); + struct ib_uverbs_device *dev; struct ib_uverbs_file *file; - int i = 0; int ret; - if (!try_module_get(dev->ib_dev->owner)) - return -ENODEV; + spin_lock(&map_lock); + dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR]; + if (dev) + kref_get(&dev->ref); + spin_unlock(&map_lock); + + if (!dev) + return -ENXIO; + + if (!try_module_get(dev->ib_dev->owner)) { + ret = -ENODEV; + goto err; + } - file = kmalloc(sizeof *file + - (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file), - GFP_KERNEL); + file = kmalloc(sizeof *file, GFP_KERNEL); if (!file) { ret = -ENOMEM; - goto err; + goto err_module; } - file->device = dev; + file->device = dev; + file->ucontext = NULL; + file->async_file = NULL; kref_init(&file->ref); init_MUTEX(&file->mutex); - file->ucontext = NULL; - - kref_get(&file->ref); - ret = ib_uverbs_event_init(&file->async_file, file); - if (ret) - goto err_kref; - - file->async_file.is_async = 1; - - for (i = 0; i < dev->num_comp; ++i) { - kref_get(&file->ref); - ret = ib_uverbs_event_init(&file->comp_file[i], file); - if (ret) - goto err_async; - file->comp_file[i].is_async = 0; - } - - filp->private_data = file; - INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev, - ib_uverbs_event_handler); - if (ib_register_event_handler(&file->event_handler)) - goto err_async; - return 0; -err_async: - while (i--) - ib_uverbs_event_release(&file->comp_file[i]); - - ib_uverbs_event_release(&file->async_file); - -err_kref: - /* - * One extra kref_put() because we took a reference before the - * event file creation that failed and got us here. - */ - kref_put(&file->ref, ib_uverbs_release_file); - kref_put(&file->ref, ib_uverbs_release_file); +err_module: + module_put(dev->ib_dev->owner); err: - module_put(dev->ib_dev->owner); + kref_put(&dev->ref, ib_uverbs_release_dev); + return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; - int i; - ib_unregister_event_handler(&file->event_handler); - ib_uverbs_event_release(&file->async_file); - ib_dealloc_ucontext(file->ucontext); + ib_uverbs_cleanup_ucontext(file, file->ucontext); - for (i = 0; i < file->device->num_comp; ++i) - ib_uverbs_event_release(&file->comp_file[i]); + if (file->async_file) + kref_put(&file->async_file->ref, ib_uverbs_release_event_file); kref_put(&file->ref, ib_uverbs_release_file); @@ -581,27 +695,25 @@ static struct ib_client uverbs_client = { static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { - struct ib_uverbs_device *dev = - container_of(class_dev, struct ib_uverbs_device, class_dev); + struct ib_uverbs_device *dev = class_get_devdata(class_dev); + + if (!dev) + return -ENODEV; return sprintf(buf, "%s\n", dev->ib_dev->name); } static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); -static void ib_uverbs_release_class_dev(struct class_device *class_dev) +static ssize_t show_dev_abi_version(struct class_device *class_dev, char *buf) { - struct ib_uverbs_device *dev = - container_of(class_dev, struct ib_uverbs_device, class_dev); + struct ib_uverbs_device *dev = class_get_devdata(class_dev); - cdev_del(&dev->dev); - clear_bit(dev->devnum, dev_map); - kfree(dev); -} + if (!dev) + return -ENODEV; -static struct class uverbs_class = { - .name = "infiniband_verbs", - .release = ib_uverbs_release_class_dev -}; + return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver); +} +static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); static ssize_t show_abi_version(struct class *class, char *buf) { @@ -622,6 +734,8 @@ static void ib_uverbs_add_one(struct ib_device *device) memset(uverbs_dev, 0, sizeof *uverbs_dev); + kref_init(&uverbs_dev->ref); + spin_lock(&map_lock); uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) { @@ -631,41 +745,49 @@ static void ib_uverbs_add_one(struct ib_device *device) set_bit(uverbs_dev->devnum, dev_map); spin_unlock(&map_lock); - uverbs_dev->ib_dev = device; - uverbs_dev->num_comp = 1; + uverbs_dev->ib_dev = device; + uverbs_dev->num_comp_vectors = 1; - if (device->mmap) - cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops); - else - cdev_init(&uverbs_dev->dev, &uverbs_fops); - uverbs_dev->dev.owner = THIS_MODULE; - kobject_set_name(&uverbs_dev->dev.kobj, "uverbs%d", uverbs_dev->devnum); - if (cdev_add(&uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) + uverbs_dev->dev = cdev_alloc(); + if (!uverbs_dev->dev) goto err; + uverbs_dev->dev->owner = THIS_MODULE; + uverbs_dev->dev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; + kobject_set_name(&uverbs_dev->dev->kobj, "uverbs%d", uverbs_dev->devnum); + if (cdev_add(uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) + goto err_cdev; - uverbs_dev->class_dev.class = &uverbs_class; - uverbs_dev->class_dev.dev = device->dma_device; - uverbs_dev->class_dev.devt = uverbs_dev->dev.dev; - snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum); - if (class_device_register(&uverbs_dev->class_dev)) + uverbs_dev->class_dev = class_device_create(uverbs_class, NULL, + uverbs_dev->dev->dev, + device->dma_device, + "uverbs%d", uverbs_dev->devnum); + if (IS_ERR(uverbs_dev->class_dev)) goto err_cdev; - if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev)) + class_set_devdata(uverbs_dev->class_dev, uverbs_dev); + + if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_ibdev)) goto err_class; + if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_abi_version)) + goto err_class; + + spin_lock(&map_lock); + dev_table[uverbs_dev->devnum] = uverbs_dev; + spin_unlock(&map_lock); ib_set_client_data(device, &uverbs_client, uverbs_dev); return; err_class: - class_device_unregister(&uverbs_dev->class_dev); + class_device_destroy(uverbs_class, uverbs_dev->dev->dev); err_cdev: - cdev_del(&uverbs_dev->dev); + cdev_del(uverbs_dev->dev); clear_bit(uverbs_dev->devnum, dev_map); err: - kfree(uverbs_dev); + kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); return; } @@ -676,7 +798,16 @@ static void ib_uverbs_remove_one(struct ib_device *device) if (!uverbs_dev) return; - class_device_unregister(&uverbs_dev->class_dev); + class_set_devdata(uverbs_dev->class_dev, NULL); + class_device_destroy(uverbs_class, uverbs_dev->dev->dev); + cdev_del(uverbs_dev->dev); + + spin_lock(&map_lock); + dev_table[uverbs_dev->devnum] = NULL; + spin_unlock(&map_lock); + + clear_bit(uverbs_dev->devnum, dev_map); + kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); } static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags, @@ -706,13 +837,14 @@ static int __init ib_uverbs_init(void) goto out; } - ret = class_register(&uverbs_class); - if (ret) { + uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); + if (IS_ERR(uverbs_class)) { + ret = PTR_ERR(uverbs_class); printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n"); goto out_chrdev; } - ret = class_create_file(&uverbs_class, &class_attr_abi_version); + ret = class_create_file(uverbs_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); goto out_class; @@ -746,7 +878,7 @@ out_fs: unregister_filesystem(&uverbs_event_fs); out_class: - class_unregister(&uverbs_class); + class_destroy(uverbs_class); out_chrdev: unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); @@ -760,8 +892,15 @@ static void __exit ib_uverbs_cleanup(void) ib_unregister_client(&uverbs_client); mntput(uverbs_event_mnt); unregister_filesystem(&uverbs_event_fs); - class_unregister(&uverbs_class); + class_destroy(uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); + idr_destroy(&ib_uverbs_pd_idr); + idr_destroy(&ib_uverbs_mr_idr); + idr_destroy(&ib_uverbs_mw_idr); + idr_destroy(&ib_uverbs_ah_idr); + idr_destroy(&ib_uverbs_cq_idr); + idr_destroy(&ib_uverbs_qp_idr); + idr_destroy(&ib_uverbs_srq_idr); } module_init(ib_uverbs_init); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5081d90..72d3ef7 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -523,16 +523,22 @@ EXPORT_SYMBOL(ib_dealloc_fmr); int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { - return qp->device->attach_mcast ? - qp->device->attach_mcast(qp, gid, lid) : - -ENOSYS; + if (!qp->device->attach_mcast) + return -ENOSYS; + if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) + return -EINVAL; + + return qp->device->attach_mcast(qp, gid, lid); } EXPORT_SYMBOL(ib_attach_mcast); int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { - return qp->device->detach_mcast ? - qp->device->detach_mcast(qp, gid, lid) : - -ENOSYS; + if (!qp->device->detach_mcast) + return -ENOSYS; + if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) + return -EINVAL; + + return qp->device->detach_mcast(qp, gid, lid); } EXPORT_SYMBOL(ib_detach_mcast); diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile index c44f7ba..47ec5a7 100644 --- a/drivers/infiniband/hw/mthca/Makefile +++ b/drivers/infiniband/hw/mthca/Makefile @@ -7,4 +7,5 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \ mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \ mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \ - mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o + mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o \ + mthca_catas.o diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c new file mode 100644 index 0000000..7ac52af --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mthca_dev.h" + +enum { + MTHCA_CATAS_POLL_INTERVAL = 5 * HZ, + + MTHCA_CATAS_TYPE_INTERNAL = 0, + MTHCA_CATAS_TYPE_UPLINK = 3, + MTHCA_CATAS_TYPE_DDR = 4, + MTHCA_CATAS_TYPE_PARITY = 5, +}; + +static DEFINE_SPINLOCK(catas_lock); + +static void handle_catas(struct mthca_dev *dev) +{ + struct ib_event event; + const char *type; + int i; + + event.device = &dev->ib_dev; + event.event = IB_EVENT_DEVICE_FATAL; + event.element.port_num = 0; + + ib_dispatch_event(&event); + + switch (swab32(readl(dev->catas_err.map)) >> 24) { + case MTHCA_CATAS_TYPE_INTERNAL: + type = "internal error"; + break; + case MTHCA_CATAS_TYPE_UPLINK: + type = "uplink bus error"; + break; + case MTHCA_CATAS_TYPE_DDR: + type = "DDR data error"; + break; + case MTHCA_CATAS_TYPE_PARITY: + type = "internal parity error"; + break; + default: + type = "unknown error"; + break; + } + + mthca_err(dev, "Catastrophic error detected: %s\n", type); + for (i = 0; i < dev->catas_err.size; ++i) + mthca_err(dev, " buf[%02x]: %08x\n", + i, swab32(readl(dev->catas_err.map + i))); +} + +static void poll_catas(unsigned long dev_ptr) +{ + struct mthca_dev *dev = (struct mthca_dev *) dev_ptr; + unsigned long flags; + int i; + + for (i = 0; i < dev->catas_err.size; ++i) + if (readl(dev->catas_err.map + i)) { + handle_catas(dev); + return; + } + + spin_lock_irqsave(&catas_lock, flags); + if (dev->catas_err.stop) + mod_timer(&dev->catas_err.timer, + jiffies + MTHCA_CATAS_POLL_INTERVAL); + spin_unlock_irqrestore(&catas_lock, flags); + + return; +} + +void mthca_start_catas_poll(struct mthca_dev *dev) +{ + unsigned long addr; + + init_timer(&dev->catas_err.timer); + dev->catas_err.stop = 0; + dev->catas_err.map = NULL; + + addr = pci_resource_start(dev->pdev, 0) + + ((pci_resource_len(dev->pdev, 0) - 1) & + dev->catas_err.addr); + + if (!request_mem_region(addr, dev->catas_err.size * 4, + DRV_NAME)) { + mthca_warn(dev, "couldn't request catastrophic error region " + "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); + return; + } + + dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4); + if (!dev->catas_err.map) { + mthca_warn(dev, "couldn't map catastrophic error region " + "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); + release_mem_region(addr, dev->catas_err.size * 4); + return; + } + + dev->catas_err.timer.data = (unsigned long) dev; + dev->catas_err.timer.function = poll_catas; + dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL; + add_timer(&dev->catas_err.timer); +} + +void mthca_stop_catas_poll(struct mthca_dev *dev) +{ + spin_lock_irq(&catas_lock); + dev->catas_err.stop = 1; + spin_unlock_irq(&catas_lock); + + del_timer_sync(&dev->catas_err.timer); + + if (dev->catas_err.map) { + iounmap(dev->catas_err.map); + release_mem_region(pci_resource_start(dev->pdev, 0) + + ((pci_resource_len(dev->pdev, 0) - 1) & + dev->catas_err.addr), + dev->catas_err.size * 4); + } +} diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 378646b..49f211d 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -706,9 +707,13 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); dev->cmd.max_cmds = 1 << lg; + MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET); + MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET); mthca_dbg(dev, "FW version %012llx, max commands %d\n", (unsigned long long) dev->fw_ver, dev->cmd.max_cmds); + mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n", + (unsigned long long) dev->catas_err.addr, dev->catas_err.size); if (mthca_is_memfree(dev)) { MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET); @@ -933,9 +938,9 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, goto out; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); - dev_lim->max_srq_sz = 1 << field; + dev_lim->max_srq_sz = (1 << field) - 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); - dev_lim->max_qp_sz = 1 << field; + dev_lim->max_qp_sz = (1 << field) - 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET); dev_lim->reserved_qps = 1 << (field & 0xf); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET); @@ -1045,6 +1050,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars); mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n", dev_lim->max_pds, dev_lim->reserved_mgms); + mthca_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", + dev_lim->max_cq_sz, dev_lim->max_qp_sz, dev_lim->max_srq_sz); mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 7bff5a8..7e68bd4 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -83,6 +83,8 @@ enum { /* Arbel FW gives us these, but we need them for Tavor */ MTHCA_MPT_ENTRY_SIZE = 0x40, MTHCA_MTT_SEG_SIZE = 0x40, + + MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2) }; enum { @@ -128,12 +130,16 @@ struct mthca_limits { int num_uars; int max_sg; int num_qps; + int max_wqes; + int max_qp_init_rdma; int reserved_qps; int num_srqs; + int max_srq_wqes; int reserved_srqs; int num_eecs; int reserved_eecs; int num_cqs; + int max_cqes; int reserved_cqs; int num_eqs; int reserved_eqs; @@ -148,6 +154,7 @@ struct mthca_limits { int reserved_mcgs; int num_pds; int reserved_pds; + u32 flags; u8 port_width_cap; }; @@ -251,6 +258,14 @@ struct mthca_mcg_table { struct mthca_icm_table *table; }; +struct mthca_catas_err { + u64 addr; + u32 __iomem *map; + unsigned long stop; + u32 size; + struct timer_list timer; +}; + struct mthca_dev { struct ib_device ib_dev; struct pci_dev *pdev; @@ -311,6 +326,8 @@ struct mthca_dev { struct mthca_av_table av_table; struct mthca_mcg_table mcg_table; + struct mthca_catas_err catas_err; + struct mthca_uar driver_uar; struct mthca_db_table *db_tab; struct mthca_pd driver_pd; @@ -398,6 +415,9 @@ void mthca_cleanup_mcg_table(struct mthca_dev *dev); int mthca_register_device(struct mthca_dev *dev); void mthca_unregister_device(struct mthca_dev *dev); +void mthca_start_catas_poll(struct mthca_dev *dev); +void mthca_stop_catas_poll(struct mthca_dev *dev); + int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); @@ -447,6 +467,8 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, struct ib_srq_attr *attr, struct mthca_srq *srq); void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); +int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask); void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type); void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 8dfafda..e5a047a 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -83,7 +83,8 @@ enum { MTHCA_EVENT_TYPE_PATH_MIG = 0x01, MTHCA_EVENT_TYPE_COMM_EST = 0x02, MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03, - MTHCA_EVENT_TYPE_SRQ_LAST_WQE = 0x13, + MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13, + MTHCA_EVENT_TYPE_SRQ_LIMIT = 0x14, MTHCA_EVENT_TYPE_CQ_ERROR = 0x04, MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, @@ -110,8 +111,9 @@ enum { (1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \ (1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \ (1ULL << MTHCA_EVENT_TYPE_ECC_DETECT)) -#define MTHCA_SRQ_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ - (1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE) +#define MTHCA_SRQ_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ + (1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT)) #define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD) #define MTHCA_EQ_DB_INC_CI (1 << 24) @@ -142,6 +144,9 @@ struct mthca_eqe { __be32 qpn; } __attribute__((packed)) qp; struct { + __be32 srqn; + } __attribute__((packed)) srq; + struct { __be32 cqn; u32 reserved1; u8 reserved2[3]; @@ -305,6 +310,16 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) IB_EVENT_SQ_DRAINED); break; + case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE: + mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_LAST_WQE_REACHED); + break; + + case MTHCA_EVENT_TYPE_SRQ_LIMIT: + mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff, + IB_EVENT_SRQ_LIMIT_REACHED); + break; + case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR: mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, IB_EVENT_QP_FATAL); diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 9804174..8561b29 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -46,11 +46,6 @@ enum { MTHCA_VENDOR_CLASS2 = 0xa }; -struct mthca_trap_mad { - struct ib_mad *mad; - DECLARE_PCI_UNMAP_ADDR(mapping) -}; - static void update_sm_ah(struct mthca_dev *dev, u8 port_num, u16 lid, u8 sl) { @@ -116,49 +111,14 @@ static void forward_trap(struct mthca_dev *dev, struct ib_mad *mad) { int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; - struct mthca_trap_mad *tmad; - struct ib_sge gather_list; - struct ib_send_wr *bad_wr, wr = { - .opcode = IB_WR_SEND, - .sg_list = &gather_list, - .num_sge = 1, - .send_flags = IB_SEND_SIGNALED, - .wr = { - .ud = { - .remote_qpn = qpn, - .remote_qkey = qpn ? IB_QP1_QKEY : 0, - .timeout_ms = 0 - } - } - }; + struct ib_mad_send_buf *send_buf; struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn]; int ret; unsigned long flags; if (agent) { - tmad = kmalloc(sizeof *tmad, GFP_KERNEL); - if (!tmad) - return; - - tmad->mad = kmalloc(sizeof *tmad->mad, GFP_KERNEL); - if (!tmad->mad) { - kfree(tmad); - return; - } - - memcpy(tmad->mad, mad, sizeof *mad); - - wr.wr.ud.mad_hdr = &tmad->mad->mad_hdr; - wr.wr_id = (unsigned long) tmad; - - gather_list.addr = dma_map_single(agent->device->dma_device, - tmad->mad, - sizeof *tmad->mad, - DMA_TO_DEVICE); - gather_list.length = sizeof *tmad->mad; - gather_list.lkey = to_mpd(agent->qp->pd)->ntmr.ibmr.lkey; - pci_unmap_addr_set(tmad, mapping, gather_list.addr); - + send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, + IB_MGMT_MAD_DATA, GFP_ATOMIC); /* * We rely here on the fact that MLX QPs don't use the * address handle after the send is posted (this is @@ -166,21 +126,15 @@ static void forward_trap(struct mthca_dev *dev, * it's OK for our devices). */ spin_lock_irqsave(&dev->sm_lock, flags); - wr.wr.ud.ah = dev->sm_ah[port_num - 1]; - if (wr.wr.ud.ah) - ret = ib_post_send_mad(agent, &wr, &bad_wr); + memcpy(send_buf->mad, mad, sizeof *mad); + if ((send_buf->ah = dev->sm_ah[port_num - 1])) + ret = ib_post_send_mad(send_buf, NULL); else ret = -EINVAL; spin_unlock_irqrestore(&dev->sm_lock, flags); - if (ret) { - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(tmad, mapping), - sizeof *tmad->mad, - DMA_TO_DEVICE); - kfree(tmad->mad); - kfree(tmad); - } + if (ret) + ib_free_send_mad(send_buf); } } @@ -267,15 +221,7 @@ int mthca_process_mad(struct ib_device *ibdev, static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { - struct mthca_trap_mad *tmad = - (void *) (unsigned long) mad_send_wc->wr_id; - - dma_unmap_single(agent->device->dma_device, - pci_unmap_addr(tmad, mapping), - sizeof *tmad->mad, - DMA_TO_DEVICE); - kfree(tmad->mad); - kfree(tmad); + ib_free_send_mad(mad_send_wc->send_buf); } int mthca_create_agents(struct mthca_dev *dev) diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 23a3f56..883d1e5 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -162,9 +162,18 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim mdev->limits.pkey_table_len = dev_lim->max_pkeys; mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay; mdev->limits.max_sg = dev_lim->max_sg; + mdev->limits.max_wqes = dev_lim->max_qp_sz; + mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp; mdev->limits.reserved_qps = dev_lim->reserved_qps; + mdev->limits.max_srq_wqes = dev_lim->max_srq_sz; mdev->limits.reserved_srqs = dev_lim->reserved_srqs; mdev->limits.reserved_eecs = dev_lim->reserved_eecs; + /* + * Subtract 1 from the limit because we need to allocate a + * spare CQE so the HCA HW can tell the difference between an + * empty CQ and a full CQ. + */ + mdev->limits.max_cqes = dev_lim->max_cq_sz - 1; mdev->limits.reserved_cqs = dev_lim->reserved_cqs; mdev->limits.reserved_eqs = dev_lim->reserved_eqs; mdev->limits.reserved_mtts = dev_lim->reserved_mtts; @@ -172,6 +181,7 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim mdev->limits.reserved_uars = dev_lim->reserved_uars; mdev->limits.reserved_pds = dev_lim->reserved_pds; mdev->limits.port_width_cap = dev_lim->max_port_width; + mdev->limits.flags = dev_lim->flags; /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. May be doable since hardware supports it for SRQ. @@ -1186,6 +1196,7 @@ MODULE_DEVICE_TABLE(pci, mthca_pci_table); static struct pci_driver mthca_driver = { .name = DRV_NAME, + .owner = THIS_MODULE, .id_table = mthca_pci_table, .probe = mthca_init_one, .remove = __devexit_p(mthca_remove_one) diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index a270760..b47ea7d 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -37,10 +37,6 @@ #include "mthca_dev.h" #include "mthca_cmd.h" -enum { - MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2) -}; - struct mthca_mgm { __be32 next_gid_index; u32 reserved[3]; @@ -189,7 +185,12 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } for (i = 0; i < MTHCA_QP_PER_MGM; ++i) - if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) { + if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) { + mthca_dbg(dev, "QP %06x already a member of MGM\n", + ibqp->qp_num); + err = 0; + goto out; + } else if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) { mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31)); break; } diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 9ad8b3b..d72fe95 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -487,7 +487,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, } } -int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db) +int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, + u32 qn, __be32 **db) { int group; int start, end, dir; diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h index 29433f2..4fdca26 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.h +++ b/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -173,7 +173,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, int mthca_init_db_tab(struct mthca_dev *dev); void mthca_cleanup_db_tab(struct mthca_dev *dev); -int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db); +int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, + u32 qn, __be32 **db); void mthca_free_db(struct mthca_dev *dev, int type, int db_index); #endif /* MTHCA_MEMFREE_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 3f5319a..1b9477e 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -37,6 +37,7 @@ */ #include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> #include <linux/mm.h> #include "mthca_dev.h" @@ -90,15 +91,26 @@ static int mthca_query_device(struct ib_device *ibdev, props->max_mr_size = ~0ull; props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; - props->max_qp_wr = 0xffff; + props->max_qp_wr = mdev->limits.max_wqes; props->max_sge = mdev->limits.max_sg; props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs; - props->max_cqe = 0xffff; + props->max_cqe = mdev->limits.max_cqes; props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws; props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds; props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift; - props->max_qp_init_rd_atom = 1 << mdev->qp_table.rdb_shift; + props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma; + props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; + props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs; + props->max_srq_wr = mdev->limits.max_srq_wqes; + props->max_srq_sge = mdev->limits.max_sg; props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay; + props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? + IB_ATOMIC_HCA : IB_ATOMIC_NONE; + props->max_pkeys = mdev->limits.pkey_table_len; + props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms; + props->max_mcast_qp_attach = MTHCA_QP_PER_MGM; + props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * + props->max_mcast_grp; err = 0; out: @@ -150,9 +162,13 @@ static int mthca_query_port(struct ib_device *ibdev, props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len; props->max_msg_sz = 0x80000000; props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len; + props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; props->active_speed = out_mad->data[35] >> 4; + props->max_mtu = out_mad->data[41] & 0xf; + props->active_mtu = out_mad->data[36] >> 4; + props->subnet_timeout = out_mad->data[51] & 0x1f; out: kfree(in_mad); @@ -634,6 +650,9 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, int nent; int err; + if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) + return ERR_PTR(-EINVAL); + if (context) { if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) return ERR_PTR(-EFAULT); @@ -1058,6 +1077,26 @@ int mthca_register_device(struct mthca_dev *dev) strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; + dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION; + dev->ib_dev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); dev->ib_dev.node_type = IB_NODE_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.dma_device = &dev->pdev->dev; @@ -1077,6 +1116,7 @@ int mthca_register_device(struct mthca_dev *dev) if (dev->mthca_flags & MTHCA_FLAG_SRQ) { dev->ib_dev.create_srq = mthca_create_srq; + dev->ib_dev.modify_srq = mthca_modify_srq; dev->ib_dev.destroy_srq = mthca_destroy_srq; if (mthca_is_memfree(dev)) @@ -1135,10 +1175,13 @@ int mthca_register_device(struct mthca_dev *dev) } } + mthca_start_catas_poll(dev); + return 0; } void mthca_unregister_device(struct mthca_dev *dev) { + mthca_stop_catas_poll(dev); ib_unregister_device(&dev->ib_dev); } diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 5fa0066..62ff091 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -338,8 +338,7 @@ static const struct { [UC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | - IB_QP_RQ_PSN | - IB_QP_MAX_DEST_RD_ATOMIC), + IB_QP_RQ_PSN), [RC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | @@ -368,8 +367,7 @@ static const struct { .trans = MTHCA_TRANS_RTR2RTS, .req_param = { [UD] = IB_QP_SQ_PSN, - [UC] = (IB_QP_SQ_PSN | - IB_QP_MAX_QP_RD_ATOMIC), + [UC] = IB_QP_SQ_PSN, [RC] = (IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | @@ -446,8 +444,6 @@ static const struct { [UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [UC] = (IB_QP_AV | - IB_QP_MAX_QP_RD_ATOMIC | - IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | @@ -478,7 +474,7 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), - [UC] = (IB_QP_CUR_STATE), + [UC] = IB_QP_CUR_STATE, [RC] = (IB_QP_CUR_STATE | IB_QP_MIN_RNR_TIMER), [MLX] = (IB_QP_CUR_STATE | @@ -1112,8 +1108,10 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, struct mthca_qp *qp) { /* Sanity check QP size before proceeding */ - if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 || - cap->max_send_sge > 64 || cap->max_recv_sge > 64) + if (cap->max_send_wr > dev->limits.max_wqes || + cap->max_recv_wr > dev->limits.max_wqes || + cap->max_send_sge > dev->limits.max_sg || + cap->max_recv_sge > dev->limits.max_sg) return -EINVAL; if (mthca_is_memfree(dev)) { diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 18998d4..64f70aa 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -186,7 +186,8 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, int err; /* Sanity check SRQ size before proceeding */ - if (attr->max_wr > 16 << 20 || attr->max_sge > 64) + if (attr->max_wr > dev->limits.max_srq_wqes || + attr->max_sge > dev->limits.max_sg) return -EINVAL; srq->max = attr->max_wr; @@ -332,6 +333,29 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) mthca_free_mailbox(dev, mailbox); } +int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + int ret; + u8 status; + + /* We don't support resizing SRQs (yet?) */ + if (attr_mask & IB_SRQ_MAX_WR) + return -EINVAL; + + if (attr_mask & IB_SRQ_LIMIT) { + ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status); + if (ret) + return ret; + if (status) + return -EINVAL; + } + + return 0; +} + void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type) { @@ -354,7 +378,7 @@ void mthca_srq_event(struct mthca_dev *dev, u32 srqn, event.device = &dev->ib_dev; event.event = event_type; - event.element.srq = &srq->ibsrq; + event.element.srq = &srq->ibsrq; srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context); out: @@ -415,6 +439,14 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, wqe = get_wqe(srq, ind); next_ind = *wqe_to_link(wqe); + + if (next_ind < 0) { + mthca_err(dev, "SRQ %06x full\n", srq->srqn); + err = -ENOMEM; + *bad_wr = wr; + break; + } + prev_wqe = srq->last; srq->last = wqe; @@ -506,6 +538,13 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, wqe = get_wqe(srq, ind); next_ind = *wqe_to_link(wqe); + if (next_ind < 0) { + mthca_err(dev, "SRQ %06x full\n", srq->srqn); + err = -ENOMEM; + *bad_wr = wr; + break; + } + ((struct mthca_next_seg *) wqe)->nda_op = cpu_to_be32((next_ind << srq->wqe_shift) | 1); ((struct mthca_next_seg *) wqe)->ee_nds = 0; diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h index 41613ec..bb015c6 100644 --- a/drivers/infiniband/hw/mthca/mthca_user.h +++ b/drivers/infiniband/hw/mthca/mthca_user.h @@ -38,6 +38,12 @@ #include <linux/types.h> /* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define MTHCA_UVERBS_ABI_VERSION 1 + +/* * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 4ea1c1c..c994a91 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -100,7 +100,12 @@ struct ipoib_pseudoheader { struct ipoib_mcast; -struct ipoib_buf { +struct ipoib_rx_buf { + struct sk_buff *skb; + dma_addr_t mapping; +}; + +struct ipoib_tx_buf { struct sk_buff *skb; DECLARE_PCI_UNMAP_ADDR(mapping) }; @@ -150,14 +155,14 @@ struct ipoib_dev_priv { unsigned int admin_mtu; unsigned int mcast_mtu; - struct ipoib_buf *rx_ring; + struct ipoib_rx_buf *rx_ring; - spinlock_t tx_lock; - struct ipoib_buf *tx_ring; - unsigned tx_head; - unsigned tx_tail; - struct ib_sge tx_sge; - struct ib_send_wr tx_wr; + spinlock_t tx_lock; + struct ipoib_tx_buf *tx_ring; + unsigned tx_head; + unsigned tx_tail; + struct ib_sge tx_sge; + struct ib_send_wr tx_wr; struct ib_wc ibwc[IPOIB_NUM_WC]; @@ -277,7 +282,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid); -int ipoib_qp_create(struct net_device *dev); +int ipoib_init_qp(struct net_device *dev); int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); void ipoib_transport_dev_cleanup(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f744009..192fef8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -95,57 +95,65 @@ void ipoib_free_ah(struct kref *kref) } } -static inline int ipoib_ib_receive(struct ipoib_dev_priv *priv, - unsigned int wr_id, - dma_addr_t addr) +static int ipoib_ib_post_receive(struct net_device *dev, int id) { - struct ib_sge list = { - .addr = addr, - .length = IPOIB_BUF_SIZE, - .lkey = priv->mr->lkey, - }; - struct ib_recv_wr param = { - .wr_id = wr_id | IPOIB_OP_RECV, - .sg_list = &list, - .num_sge = 1, - }; + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_sge list; + struct ib_recv_wr param; struct ib_recv_wr *bad_wr; + int ret; + + list.addr = priv->rx_ring[id].mapping; + list.length = IPOIB_BUF_SIZE; + list.lkey = priv->mr->lkey; + + param.next = NULL; + param.wr_id = id | IPOIB_OP_RECV; + param.sg_list = &list; + param.num_sge = 1; + + ret = ib_post_recv(priv->qp, ¶m, &bad_wr); + if (unlikely(ret)) { + ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); + dma_unmap_single(priv->ca->dma_device, + priv->rx_ring[id].mapping, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + dev_kfree_skb_any(priv->rx_ring[id].skb); + priv->rx_ring[id].skb = NULL; + } - return ib_post_recv(priv->qp, ¶m, &bad_wr); + return ret; } -static int ipoib_ib_post_receive(struct net_device *dev, int id) +static int ipoib_alloc_rx_skb(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; dma_addr_t addr; - int ret; skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); - if (!skb) { - ipoib_warn(priv, "failed to allocate receive buffer\n"); - - priv->rx_ring[id].skb = NULL; + if (!skb) return -ENOMEM; - } - skb_reserve(skb, 4); /* 16 byte align IP header */ - priv->rx_ring[id].skb = skb; + + /* + * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte + * header. So we need 4 more bytes to get to 48 and align the + * IP header to a multiple of 16. + */ + skb_reserve(skb, 4); + addr = dma_map_single(priv->ca->dma_device, skb->data, IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - pci_unmap_addr_set(&priv->rx_ring[id], mapping, addr); - - ret = ipoib_ib_receive(priv, id, addr); - if (ret) { - ipoib_warn(priv, "ipoib_ib_receive failed for buf %d (%d)\n", - id, ret); - dma_unmap_single(priv->ca->dma_device, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(addr))) { dev_kfree_skb_any(skb); - priv->rx_ring[id].skb = NULL; + return -EIO; } - return ret; + priv->rx_ring[id].skb = skb; + priv->rx_ring[id].mapping = addr; + + return 0; } static int ipoib_ib_post_receives(struct net_device *dev) @@ -154,6 +162,10 @@ static int ipoib_ib_post_receives(struct net_device *dev) int i; for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) { + if (ipoib_alloc_rx_skb(dev, i)) { + ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); + return -ENOMEM; + } if (ipoib_ib_post_receive(dev, i)) { ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); return -EIO; @@ -176,28 +188,36 @@ static void ipoib_ib_handle_wc(struct net_device *dev, wr_id &= ~IPOIB_OP_RECV; if (wr_id < IPOIB_RX_RING_SIZE) { - struct sk_buff *skb = priv->rx_ring[wr_id].skb; - - priv->rx_ring[wr_id].skb = NULL; + struct sk_buff *skb = priv->rx_ring[wr_id].skb; + dma_addr_t addr = priv->rx_ring[wr_id].mapping; - dma_unmap_single(priv->ca->dma_device, - pci_unmap_addr(&priv->rx_ring[wr_id], - mapping), - IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); - - if (wc->status != IB_WC_SUCCESS) { + if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed recv event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); + dma_unmap_single(priv->ca->dma_device, addr, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); + priv->rx_ring[wr_id].skb = NULL; return; } + /* + * If we can't allocate a new RX buffer, dump + * this packet and reuse the old buffer. + */ + if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { + ++priv->stats.rx_dropped; + goto repost; + } + ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); + dma_unmap_single(priv->ca->dma_device, addr, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + skb_put(skb, wc->byte_len); skb_pull(skb, IB_GRH_BYTES); @@ -220,8 +240,8 @@ static void ipoib_ib_handle_wc(struct net_device *dev, dev_kfree_skb_any(skb); } - /* repost receive */ - if (ipoib_ib_post_receive(dev, wr_id)) + repost: + if (unlikely(ipoib_ib_post_receive(dev, wr_id))) ipoib_warn(priv, "ipoib_ib_post_receive failed " "for buf %d\n", wr_id); } else @@ -229,7 +249,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev, wr_id); } else { - struct ipoib_buf *tx_req; + struct ipoib_tx_buf *tx_req; unsigned long flags; if (wr_id >= IPOIB_TX_RING_SIZE) { @@ -302,7 +322,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_ah *address, u32 qpn) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_buf *tx_req; + struct ipoib_tx_buf *tx_req; dma_addr_t addr; if (skb->len > dev->mtu + INFINIBAND_ALEN) { @@ -387,9 +407,9 @@ int ipoib_ib_dev_open(struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; - ret = ipoib_qp_create(dev); + ret = ipoib_init_qp(dev); if (ret) { - ipoib_warn(priv, "ipoib_qp_create returned %d\n", ret); + ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret); return -1; } @@ -468,7 +488,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) struct ib_qp_attr qp_attr; int attr_mask; unsigned long begin; - struct ipoib_buf *tx_req; + struct ipoib_tx_buf *tx_req; int i; /* Kill the existing QP and allocate a new one */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 6c5bf07..cd4f423 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -637,8 +637,11 @@ static void ipoib_timeout(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - ipoib_warn(priv, "transmit timeout: latency %ld\n", - jiffies - dev->trans_start); + ipoib_warn(priv, "transmit timeout: latency %d msecs\n", + jiffies_to_msecs(jiffies - dev->trans_start)); + ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n", + netif_queue_stopped(dev), + priv->tx_head, priv->tx_tail); /* XXX reset QP, etc. */ } @@ -729,7 +732,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) /* Allocate RX/TX "rings" to hold queued skbs */ - priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf), + priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf), GFP_KERNEL); if (!priv->rx_ring) { printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", @@ -737,9 +740,9 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out; } memset(priv->rx_ring, 0, - IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf)); + IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf)); - priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf), + priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf), GFP_KERNEL); if (!priv->tx_ring) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", @@ -747,7 +750,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out_rx_ring_cleanup; } memset(priv->tx_ring, 0, - IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf)); + IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf)); /* priv->tx_head & tx_tail are already 0 */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 79f59d0..b5902a7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -92,7 +92,7 @@ int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid) return ret; } -int ipoib_qp_create(struct net_device *dev) +int ipoib_init_qp(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; @@ -149,10 +149,11 @@ int ipoib_qp_create(struct net_device *dev) return 0; out_fail: - ib_destroy_qp(priv->qp); - priv->qp = NULL; + qp_attr.qp_state = IB_QPS_RESET; + if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) + ipoib_warn(priv, "Failed to modify QP to RESET state\n"); - return -EINVAL; + return ret; } int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index 1c5f19f..f1c3bc5 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -171,10 +171,10 @@ struct sctp_sndrcvinfo { */ enum sctp_sinfo_flags { - MSG_UNORDERED = 1, /* Send/receive message unordered. */ - MSG_ADDR_OVER = 2, /* Override the primary destination. */ - MSG_ABORT=4, /* Send an ABORT message to the peer. */ - /* MSG_EOF is already defined per socket.h */ + SCTP_UNORDERED = 1, /* Send/receive message unordered. */ + SCTP_ADDR_OVER = 2, /* Override the primary destination. */ + SCTP_ABORT=4, /* Send an ABORT message to the peer. */ + SCTP_EOF=MSG_FIN, /* Initiate graceful shutdown process. */ }; diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 5308683..0a9fcd5 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. @@ -109,7 +109,6 @@ struct ib_cm_id; struct ib_cm_req_event_param { struct ib_cm_id *listen_id; - struct ib_device *device; u8 port; struct ib_sa_path_rec *primary_path; @@ -220,7 +219,6 @@ struct ib_cm_apr_event_param { struct ib_cm_sidr_req_event_param { struct ib_cm_id *listen_id; - struct ib_device *device; u8 port; u16 pkey; }; @@ -284,6 +282,7 @@ typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id, struct ib_cm_id { ib_cm_handler cm_handler; void *context; + struct ib_device *device; __be64 service_id; __be64 service_mask; enum ib_cm_state state; /* internal CM/debug use */ @@ -295,6 +294,8 @@ struct ib_cm_id { /** * ib_create_cm_id - Allocate a communication identifier. + * @device: Device associated with the cm_id. All related communication will + * be associated with the specified device. * @cm_handler: Callback invoked to notify the user of CM events. * @context: User specified context associated with the communication * identifier. @@ -302,7 +303,8 @@ struct ib_cm_id { * Communication identifiers are used to track connection states, service * ID resolution requests, and listen requests. */ -struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, +struct ib_cm_id *ib_create_cm_id(struct ib_device *device, + ib_cm_handler cm_handler, void *context); /** diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 4172e68..2c13350 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -109,10 +109,14 @@ #define IB_QP_SET_QKEY 0x80000000 enum { + IB_MGMT_MAD_HDR = 24, IB_MGMT_MAD_DATA = 232, + IB_MGMT_RMPP_HDR = 36, IB_MGMT_RMPP_DATA = 220, + IB_MGMT_VENDOR_HDR = 40, IB_MGMT_VENDOR_DATA = 216, - IB_MGMT_SA_DATA = 200 + IB_MGMT_SA_HDR = 56, + IB_MGMT_SA_DATA = 200, }; struct ib_mad_hdr { @@ -203,26 +207,25 @@ struct ib_class_port_info /** * ib_mad_send_buf - MAD data buffer and work request for sends. - * @mad: References an allocated MAD data buffer. The size of the data - * buffer is specified in the @send_wr.length field. - * @mapping: DMA mapping information. + * @next: A pointer used to chain together MADs for posting. + * @mad: References an allocated MAD data buffer. * @mad_agent: MAD agent that allocated the buffer. + * @ah: The address handle to use when sending the MAD. * @context: User-controlled context fields. - * @send_wr: An initialized work request structure used when sending the MAD. - * The wr_id field of the work request is initialized to reference this - * data structure. - * @sge: A scatter-gather list referenced by the work request. + * @timeout_ms: Time to wait for a response. + * @retries: Number of times to retry a request for a response. * * Users are responsible for initializing the MAD buffer itself, with the * exception of specifying the payload length field in any RMPP MAD. */ struct ib_mad_send_buf { - struct ib_mad *mad; - DECLARE_PCI_UNMAP_ADDR(mapping) + struct ib_mad_send_buf *next; + void *mad; struct ib_mad_agent *mad_agent; + struct ib_ah *ah; void *context[2]; - struct ib_send_wr send_wr; - struct ib_sge sge; + int timeout_ms; + int retries; }; /** @@ -287,7 +290,7 @@ typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, * or @mad_send_wc. */ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, - struct ib_send_wr *send_wr, + struct ib_mad_send_buf *send_buf, struct ib_mad_send_wc *mad_send_wc); /** @@ -334,13 +337,13 @@ struct ib_mad_agent { /** * ib_mad_send_wc - MAD send completion information. - * @wr_id: Work request identifier associated with the send MAD request. + * @send_buf: Send MAD data buffer associated with the send MAD request. * @status: Completion status. * @vendor_err: Optional vendor error information returned with a failed * request. */ struct ib_mad_send_wc { - u64 wr_id; + struct ib_mad_send_buf *send_buf; enum ib_wc_status status; u32 vendor_err; }; @@ -366,7 +369,7 @@ struct ib_mad_recv_buf { * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers. * @mad_len: The length of the received MAD, without duplicated headers. * - * For received response, the wr_id field of the wc is set to the wr_id + * For received response, the wr_id contains a pointer to the ib_mad_send_buf * for the corresponding send request. */ struct ib_mad_recv_wc { @@ -463,9 +466,9 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); /** * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated * with the registered client. - * @mad_agent: Specifies the associated registration to post the send to. - * @send_wr: Specifies the information needed to send the MAD(s). - * @bad_send_wr: Specifies the MAD on which an error was encountered. + * @send_buf: Specifies the information needed to send the MAD(s). + * @bad_send_buf: Specifies the MAD on which an error was encountered. This + * parameter is optional if only a single MAD is posted. * * Sent MADs are not guaranteed to complete in the order that they were posted. * @@ -479,9 +482,8 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); * defined data being transferred. The paylen_newwin field should be * specified in network-byte order. */ -int ib_post_send_mad(struct ib_mad_agent *mad_agent, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr); +int ib_post_send_mad(struct ib_mad_send_buf *send_buf, + struct ib_mad_send_buf **bad_send_buf); /** * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer. @@ -507,23 +509,25 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc); /** * ib_cancel_mad - Cancels an outstanding send MAD operation. * @mad_agent: Specifies the registration associated with sent MAD. - * @wr_id: Indicates the work request identifier of the MAD to cancel. + * @send_buf: Indicates the MAD to cancel. * * MADs will be returned to the user through the corresponding * ib_mad_send_handler. */ -void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id); +void ib_cancel_mad(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf); /** * ib_modify_mad - Modifies an outstanding send MAD operation. * @mad_agent: Specifies the registration associated with sent MAD. - * @wr_id: Indicates the work request identifier of the MAD to modify. + * @send_buf: Indicates the MAD to modify. * @timeout_ms: New timeout value for sent MAD. * * This call will reset the timeout value for a sent MAD to the specified * value. */ -int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms); +int ib_modify_mad(struct ib_mad_agent *mad_agent, + struct ib_mad_send_buf *send_buf, u32 timeout_ms); /** * ib_redirect_mad_qp - Registers a QP for MAD services. @@ -572,7 +576,6 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, * @remote_qpn: Specifies the QPN of the receiving node. * @pkey_index: Specifies which PKey the MAD will be sent using. This field * is valid only if the remote_qpn is QP 1. - * @ah: References the address handle used to transfer to the remote node. * @rmpp_active: Indicates if the send will enable RMPP. * @hdr_len: Indicates the size of the data header of the MAD. This length * should include the common MAD header, RMPP header, plus any class @@ -582,11 +585,10 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, * additional padding that may be necessary. * @gfp_mask: GFP mask used for the memory allocation. * - * This is a helper routine that may be used to allocate a MAD. Users are - * not required to allocate outbound MADs using this call. The returned - * MAD send buffer will reference a data buffer usable for sending a MAD, along + * This routine allocates a MAD for sending. The returned MAD send buffer + * will reference a data buffer usable for sending a MAD, along * with an initialized work request structure. Users may modify the returned - * MAD data buffer or work request before posting the send. + * MAD data buffer before posting the send. * * The returned data buffer will be cleared. Users are responsible for * initializing the common MAD and any class specific headers. If @rmpp_active @@ -594,7 +596,7 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, */ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, - struct ib_ah *ah, int rmpp_active, + int rmpp_active, int hdr_len, int data_len, gfp_t gfp_mask); diff --git a/include/rdma/ib_user_cm.h b/include/rdma/ib_user_cm.h index e4d1654..3037588 100644 --- a/include/rdma/ib_user_cm.h +++ b/include/rdma/ib_user_cm.h @@ -38,7 +38,7 @@ #include <linux/types.h> -#define IB_USER_CM_ABI_VERSION 2 +#define IB_USER_CM_ABI_VERSION 3 enum { IB_USER_CM_CMD_CREATE_ID, @@ -299,8 +299,6 @@ struct ib_ucm_event_get { }; struct ib_ucm_req_event_resp { - /* device */ - /* port */ struct ib_ucm_path_rec primary_path; struct ib_ucm_path_rec alternate_path; __be64 remote_ca_guid; @@ -316,6 +314,7 @@ struct ib_ucm_req_event_resp { __u8 retry_count; __u8 rnr_retry_count; __u8 srq; + __u8 port; }; struct ib_ucm_rep_event_resp { @@ -353,10 +352,9 @@ struct ib_ucm_apr_event_resp { }; struct ib_ucm_sidr_req_event_resp { - /* device */ - /* port */ __u16 pkey; - __u8 reserved[2]; + __u8 port; + __u8 reserved; }; struct ib_ucm_sidr_rep_event_resp { diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index fd85725..072f3a2 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -42,15 +43,12 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define IB_USER_VERBS_ABI_VERSION 2 +#define IB_USER_VERBS_ABI_VERSION 3 enum { - IB_USER_VERBS_CMD_QUERY_PARAMS, IB_USER_VERBS_CMD_GET_CONTEXT, IB_USER_VERBS_CMD_QUERY_DEVICE, IB_USER_VERBS_CMD_QUERY_PORT, - IB_USER_VERBS_CMD_QUERY_GID, - IB_USER_VERBS_CMD_QUERY_PKEY, IB_USER_VERBS_CMD_ALLOC_PD, IB_USER_VERBS_CMD_DEALLOC_PD, IB_USER_VERBS_CMD_CREATE_AH, @@ -65,6 +63,7 @@ enum { IB_USER_VERBS_CMD_ALLOC_MW, IB_USER_VERBS_CMD_BIND_MW, IB_USER_VERBS_CMD_DEALLOC_MW, + IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL, IB_USER_VERBS_CMD_CREATE_CQ, IB_USER_VERBS_CMD_RESIZE_CQ, IB_USER_VERBS_CMD_DESTROY_CQ, @@ -90,8 +89,11 @@ enum { * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * In particular do not use pointer types -- pass pointers in __u64 - * instead. + * Specifically: + * - Do not use pointer types -- pass pointers in __u64 instead. + * - Make sure that any structure larger than 4 bytes is padded to a + * multiple of 8 bytes. Otherwise the structure size will be + * different between 32-bit and 64-bit architectures. */ struct ib_uverbs_async_event_desc { @@ -118,27 +120,14 @@ struct ib_uverbs_cmd_hdr { __u16 out_words; }; -/* - * No driver_data for "query params" command, since this is intended - * to be a core function with no possible device dependence. - */ -struct ib_uverbs_query_params { - __u64 response; -}; - -struct ib_uverbs_query_params_resp { - __u32 num_cq_events; -}; - struct ib_uverbs_get_context { __u64 response; - __u64 cq_fd_tab; __u64 driver_data[0]; }; struct ib_uverbs_get_context_resp { __u32 async_fd; - __u32 reserved; + __u32 num_comp_vectors; }; struct ib_uverbs_query_device { @@ -220,31 +209,6 @@ struct ib_uverbs_query_port_resp { __u8 reserved[3]; }; -struct ib_uverbs_query_gid { - __u64 response; - __u8 port_num; - __u8 index; - __u8 reserved[6]; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_gid_resp { - __u8 gid[16]; -}; - -struct ib_uverbs_query_pkey { - __u64 response; - __u8 port_num; - __u8 index; - __u8 reserved[6]; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_pkey_resp { - __u16 pkey; - __u16 reserved; -}; - struct ib_uverbs_alloc_pd { __u64 response; __u64 driver_data[0]; @@ -278,11 +242,21 @@ struct ib_uverbs_dereg_mr { __u32 mr_handle; }; +struct ib_uverbs_create_comp_channel { + __u64 response; +}; + +struct ib_uverbs_create_comp_channel_resp { + __u32 fd; +}; + struct ib_uverbs_create_cq { __u64 response; __u64 user_handle; __u32 cqe; - __u32 event_handler; + __u32 comp_vector; + __s32 comp_channel; + __u32 reserved; __u64 driver_data[0]; }; @@ -291,6 +265,41 @@ struct ib_uverbs_create_cq_resp { __u32 cqe; }; +struct ib_uverbs_poll_cq { + __u64 response; + __u32 cq_handle; + __u32 ne; +}; + +struct ib_uverbs_wc { + __u64 wr_id; + __u32 status; + __u32 opcode; + __u32 vendor_err; + __u32 byte_len; + __u32 imm_data; + __u32 qp_num; + __u32 src_qp; + __u32 wc_flags; + __u16 pkey_index; + __u16 slid; + __u8 sl; + __u8 dlid_path_bits; + __u8 port_num; + __u8 reserved; +}; + +struct ib_uverbs_poll_cq_resp { + __u32 count; + __u32 reserved; + struct ib_uverbs_wc wc[0]; +}; + +struct ib_uverbs_req_notify_cq { + __u32 cq_handle; + __u32 solicited_only; +}; + struct ib_uverbs_destroy_cq { __u64 response; __u32 cq_handle; @@ -388,6 +397,127 @@ struct ib_uverbs_destroy_qp_resp { __u32 events_reported; }; +/* + * The ib_uverbs_sge structure isn't used anywhere, since we assume + * the ib_sge structure is packed the same way on 32-bit and 64-bit + * architectures in both kernel and user space. It's just here to + * document the ABI. + */ +struct ib_uverbs_sge { + __u64 addr; + __u32 length; + __u32 lkey; +}; + +struct ib_uverbs_send_wr { + __u64 wr_id; + __u32 num_sge; + __u32 opcode; + __u32 send_flags; + __u32 imm_data; + union { + struct { + __u64 remote_addr; + __u32 rkey; + __u32 reserved; + } rdma; + struct { + __u64 remote_addr; + __u64 compare_add; + __u64 swap; + __u32 rkey; + __u32 reserved; + } atomic; + struct { + __u32 ah; + __u32 remote_qpn; + __u32 remote_qkey; + __u32 reserved; + } ud; + } wr; +}; + +struct ib_uverbs_post_send { + __u64 response; + __u32 qp_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_send_wr send_wr[0]; +}; + +struct ib_uverbs_post_send_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_recv_wr { + __u64 wr_id; + __u32 num_sge; + __u32 reserved; +}; + +struct ib_uverbs_post_recv { + __u64 response; + __u32 qp_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_recv_wr recv_wr[0]; +}; + +struct ib_uverbs_post_recv_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_post_srq_recv { + __u64 response; + __u32 srq_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_recv_wr recv[0]; +}; + +struct ib_uverbs_post_srq_recv_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_global_route { + __u8 dgid[16]; + __u32 flow_label; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 reserved; +}; + +struct ib_uverbs_ah_attr { + struct ib_uverbs_global_route grh; + __u16 dlid; + __u8 sl; + __u8 src_path_bits; + __u8 static_rate; + __u8 is_global; + __u8 port_num; + __u8 reserved; +}; + +struct ib_uverbs_create_ah { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 reserved; + struct ib_uverbs_ah_attr attr; +}; + +struct ib_uverbs_create_ah_resp { + __u32 ah_handle; +}; + +struct ib_uverbs_destroy_ah { + __u32 ah_handle; +}; + struct ib_uverbs_attach_mcast { __u8 gid[16]; __u32 qp_handle; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e6f4c9e..f72d46d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -595,11 +595,8 @@ struct ib_send_wr { } atomic; struct { struct ib_ah *ah; - struct ib_mad_hdr *mad_hdr; u32 remote_qpn; u32 remote_qkey; - int timeout_ms; /* valid for MADs only */ - int retries; /* valid for MADs only */ u16 pkey_index; /* valid for GSI only */ u8 port_num; /* valid for DR SMPs on switch only */ } ud; @@ -951,6 +948,9 @@ struct ib_device { IB_DEV_UNREGISTERED } reg_state; + u64 uverbs_cmd_mask; + int uverbs_abi_ver; + u8 node_type; u8 phys_port_cnt; }; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 10e82ec..660c61b 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -554,7 +554,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, dp.ppid = sinfo->sinfo_ppid; /* Set the flags for an unordered send. */ - if (sinfo->sinfo_flags & MSG_UNORDERED) { + if (sinfo->sinfo_flags & SCTP_UNORDERED) { flags |= SCTP_DATA_UNORDERED; dp.ssn = 0; } else diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 02e068d..b529af5 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1010,6 +1010,19 @@ static int __sctp_connect(struct sock* sk, err = -EAGAIN; goto out_free; } + } else { + /* + * If an unprivileged user inherits a 1-many + * style socket with open associations on a + * privileged port, it MAY be permitted to + * accept new associations, but it SHOULD NOT + * be permitted to open new associations. + */ + if (ep->base.bind_addr.port < PROT_SOCK && + !capable(CAP_NET_BIND_SERVICE)) { + err = -EACCES; + goto out_free; + } } scope = sctp_scope(&to); @@ -1389,27 +1402,27 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, SCTP_DEBUG_PRINTK("msg_len: %zu, sinfo_flags: 0x%x\n", msg_len, sinfo_flags); - /* MSG_EOF or MSG_ABORT cannot be set on a TCP-style socket. */ - if (sctp_style(sk, TCP) && (sinfo_flags & (MSG_EOF | MSG_ABORT))) { + /* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */ + if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) { err = -EINVAL; goto out_nounlock; } - /* If MSG_EOF is set, no data can be sent. Disallow sending zero - * length messages when MSG_EOF|MSG_ABORT is not set. - * If MSG_ABORT is set, the message length could be non zero with + /* If SCTP_EOF is set, no data can be sent. Disallow sending zero + * length messages when SCTP_EOF|SCTP_ABORT is not set. + * If SCTP_ABORT is set, the message length could be non zero with * the msg_iov set to the user abort reason. */ - if (((sinfo_flags & MSG_EOF) && (msg_len > 0)) || - (!(sinfo_flags & (MSG_EOF|MSG_ABORT)) && (msg_len == 0))) { + if (((sinfo_flags & SCTP_EOF) && (msg_len > 0)) || + (!(sinfo_flags & (SCTP_EOF|SCTP_ABORT)) && (msg_len == 0))) { err = -EINVAL; goto out_nounlock; } - /* If MSG_ADDR_OVER is set, there must be an address + /* If SCTP_ADDR_OVER is set, there must be an address * specified in msg_name. */ - if ((sinfo_flags & MSG_ADDR_OVER) && (!msg->msg_name)) { + if ((sinfo_flags & SCTP_ADDR_OVER) && (!msg->msg_name)) { err = -EINVAL; goto out_nounlock; } @@ -1458,14 +1471,14 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_unlock; } - if (sinfo_flags & MSG_EOF) { + if (sinfo_flags & SCTP_EOF) { SCTP_DEBUG_PRINTK("Shutting down association: %p\n", asoc); sctp_primitive_SHUTDOWN(asoc, NULL); err = 0; goto out_unlock; } - if (sinfo_flags & MSG_ABORT) { + if (sinfo_flags & SCTP_ABORT) { SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc); sctp_primitive_ABORT(asoc, msg); err = 0; @@ -1477,7 +1490,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, if (!asoc) { SCTP_DEBUG_PRINTK("There is no association yet.\n"); - if (sinfo_flags & (MSG_EOF | MSG_ABORT)) { + if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) { err = -EINVAL; goto out_unlock; } @@ -1515,6 +1528,19 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, err = -EAGAIN; goto out_unlock; } + } else { + /* + * If an unprivileged user inherits a one-to-many + * style socket with open associations on a privileged + * port, it MAY be permitted to accept new associations, + * but it SHOULD NOT be permitted to open new + * associations. + */ + if (ep->base.bind_addr.port < PROT_SOCK && + !capable(CAP_NET_BIND_SERVICE)) { + err = -EACCES; + goto out_unlock; + } } scope = sctp_scope(&to); @@ -1611,10 +1637,10 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, /* If an address is passed with the sendto/sendmsg call, it is used * to override the primary destination address in the TCP model, or - * when MSG_ADDR_OVER flag is set in the UDP model. + * when SCTP_ADDR_OVER flag is set in the UDP model. */ if ((sctp_style(sk, TCP) && msg_name) || - (sinfo_flags & MSG_ADDR_OVER)) { + (sinfo_flags & SCTP_ADDR_OVER)) { chunk_tp = sctp_assoc_lookup_paddr(asoc, &to); if (!chunk_tp) { err = -EINVAL; @@ -2306,16 +2332,14 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl return -EINVAL; if (get_user(val, (int __user *)optval)) return -EFAULT; - if ((val < 8) || (val > SCTP_MAX_CHUNK_LEN)) + if ((val != 0) && ((val < 8) || (val > SCTP_MAX_CHUNK_LEN))) return -EINVAL; sp->user_frag = val; - if (val) { - /* Update the frag_point of the existing associations. */ - list_for_each(pos, &(sp->ep->asocs)) { - asoc = list_entry(pos, struct sctp_association, asocs); - asoc->frag_point = sctp_frag_point(sp, asoc->pmtu); - } + /* Update the frag_point of the existing associations. */ + list_for_each(pos, &(sp->ep->asocs)) { + asoc = list_entry(pos, struct sctp_association, asocs); + asoc->frag_point = sctp_frag_point(sp, asoc->pmtu); } return 0; @@ -2384,14 +2408,14 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva static int sctp_setsockopt_adaption_layer(struct sock *sk, char __user *optval, int optlen) { - __u32 val; + struct sctp_setadaption adaption; - if (optlen < sizeof(__u32)) + if (optlen != sizeof(struct sctp_setadaption)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(__u32))) + if (copy_from_user(&adaption, optval, optlen)) return -EFAULT; - sctp_sk(sk)->adaption_ind = val; + sctp_sk(sk)->adaption_ind = adaption.ssb_adaption_ind; return 0; } @@ -3672,17 +3696,15 @@ static int sctp_getsockopt_primary_addr(struct sock *sk, int len, static int sctp_getsockopt_adaption_layer(struct sock *sk, int len, char __user *optval, int __user *optlen) { - __u32 val; + struct sctp_setadaption adaption; - if (len < sizeof(__u32)) + if (len != sizeof(struct sctp_setadaption)) return -EINVAL; - len = sizeof(__u32); - val = sctp_sk(sk)->adaption_ind; - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, &val, len)) + adaption.ssb_adaption_ind = sctp_sk(sk)->adaption_ind; + if (copy_to_user(optval, &adaption, len)) return -EFAULT; + return 0; } @@ -4640,8 +4662,8 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg, /* Minimally, validate the sinfo_flags. */ if (cmsgs->info->sinfo_flags & - ~(MSG_UNORDERED | MSG_ADDR_OVER | - MSG_ABORT | MSG_EOF)) + ~(SCTP_UNORDERED | SCTP_ADDR_OVER | + SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 057e7fa..e049f41 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -698,7 +698,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, event->ssn = ntohs(chunk->subh.data_hdr->ssn); event->ppid = chunk->subh.data_hdr->ppid; if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) { - event->flags |= MSG_UNORDERED; + event->flags |= SCTP_UNORDERED; event->cumtsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); } event->tsn = ntohl(chunk->subh.data_hdr->tsn); @@ -824,7 +824,7 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, * * recvmsg() flags: * - * MSG_UNORDERED - This flag is present when the message was sent + * SCTP_UNORDERED - This flag is present when the message was sent * non-ordered. */ sinfo.sinfo_flags = event->flags; @@ -839,7 +839,7 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, * This field will hold the current cumulative TSN as * known by the underlying SCTP layer. Note this field is * ignored when sending and only valid for a receive - * operation when sinfo_flags are set to MSG_UNORDERED. + * operation when sinfo_flags are set to SCTP_UNORDERED. */ sinfo.sinfo_cumtsn = event->cumtsn; /* sinfo_assoc_id: sizeof (sctp_assoc_t) |