summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/core/agent.c293
-rw-r--r--drivers/infiniband/core/agent.h13
-rw-r--r--drivers/infiniband/core/agent_priv.h62
-rw-r--r--drivers/infiniband/core/cm.c217
-rw-r--r--drivers/infiniband/core/cm_msgs.h1
-rw-r--r--drivers/infiniband/core/device.c12
-rw-r--r--drivers/infiniband/core/mad.c335
-rw-r--r--drivers/infiniband/core/mad_priv.h8
-rw-r--r--drivers/infiniband/core/mad_rmpp.c114
-rw-r--r--drivers/infiniband/core/mad_rmpp.h2
-rw-r--r--drivers/infiniband/core/sa_query.c270
-rw-r--r--drivers/infiniband/core/smi.h2
-rw-r--r--drivers/infiniband/core/sysfs.c16
-rw-r--r--drivers/infiniband/core/ucm.c267
-rw-r--r--drivers/infiniband/core/ucm.h83
-rw-r--r--drivers/infiniband/core/user_mad.c399
-rw-r--r--drivers/infiniband/core/uverbs.h62
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c858
-rw-r--r--drivers/infiniband/core/uverbs_main.c503
-rw-r--r--drivers/infiniband/core/verbs.c18
-rw-r--r--drivers/infiniband/hw/mthca/Makefile3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c153
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c11
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h22
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c21
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c72
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c11
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mcg.c11
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c49
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c16
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c43
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h23
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c120
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c15
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c9
-rw-r--r--include/net/sctp/user.h8
-rw-r--r--include/rdma/ib_cm.h10
-rw-r--r--include/rdma/ib_mad.h66
-rw-r--r--include/rdma/ib_user_cm.h10
-rw-r--r--include/rdma/ib_user_verbs.h222
-rw-r--r--include/rdma/ib_verbs.h6
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/socket.c90
-rw-r--r--net/sctp/ulpevent.c6
47 files changed, 2735 insertions, 1811 deletions
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 5ac86f5..0c3c695 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -37,58 +37,41 @@
* $Id: agent.c 1389 2004-12-27 22:56:47Z roland $
*/
-#include <linux/dma-mapping.h>
-
-#include <asm/bug.h>
+#include "agent.h"
+#include "smi.h"
-#include <rdma/ib_smi.h>
+#define SPFX "ib_agent: "
-#include "smi.h"
-#include "agent_priv.h"
-#include "mad_priv.h"
-#include "agent.h"
+struct ib_agent_port_private {
+ struct list_head port_list;
+ struct ib_mad_agent *agent[2];
+};
-spinlock_t ib_agent_port_list_lock;
+static DEFINE_SPINLOCK(ib_agent_port_list_lock);
static LIST_HEAD(ib_agent_port_list);
-/*
- * Caller must hold ib_agent_port_list_lock
- */
-static inline struct ib_agent_port_private *
-__ib_get_agent_port(struct ib_device *device, int port_num,
- struct ib_mad_agent *mad_agent)
+static struct ib_agent_port_private *
+__ib_get_agent_port(struct ib_device *device, int port_num)
{
struct ib_agent_port_private *entry;
- BUG_ON(!(!!device ^ !!mad_agent)); /* Exactly one MUST be (!NULL) */
-
- if (device) {
- list_for_each_entry(entry, &ib_agent_port_list, port_list) {
- if (entry->smp_agent->device == device &&
- entry->port_num == port_num)
- return entry;
- }
- } else {
- list_for_each_entry(entry, &ib_agent_port_list, port_list) {
- if ((entry->smp_agent == mad_agent) ||
- (entry->perf_mgmt_agent == mad_agent))
- return entry;
- }
+ list_for_each_entry(entry, &ib_agent_port_list, port_list) {
+ if (entry->agent[0]->device == device &&
+ entry->agent[0]->port_num == port_num)
+ return entry;
}
return NULL;
}
-static inline struct ib_agent_port_private *
-ib_get_agent_port(struct ib_device *device, int port_num,
- struct ib_mad_agent *mad_agent)
+static struct ib_agent_port_private *
+ib_get_agent_port(struct ib_device *device, int port_num)
{
struct ib_agent_port_private *entry;
unsigned long flags;
spin_lock_irqsave(&ib_agent_port_list_lock, flags);
- entry = __ib_get_agent_port(device, port_num, mad_agent);
+ entry = __ib_get_agent_port(device, port_num);
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
-
return entry;
}
@@ -100,192 +83,76 @@ int smi_check_local_dr_smp(struct ib_smp *smp,
if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
return 1;
- port_priv = ib_get_agent_port(device, port_num, NULL);
+
+ port_priv = ib_get_agent_port(device, port_num);
if (!port_priv) {
printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d "
- "not open\n",
- device->name, port_num);
+ "not open\n", device->name, port_num);
return 1;
}
- return smi_check_local_smp(port_priv->smp_agent, smp);
+ return smi_check_local_smp(port_priv->agent[0], smp);
}
-static int agent_mad_send(struct ib_mad_agent *mad_agent,
- struct ib_agent_port_private *port_priv,
- struct ib_mad_private *mad_priv,
- struct ib_grh *grh,
- struct ib_wc *wc)
+int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
+ struct ib_wc *wc, struct ib_device *device,
+ int port_num, int qpn)
{
- struct ib_agent_send_wr *agent_send_wr;
- struct ib_sge gather_list;
- struct ib_send_wr send_wr;
- struct ib_send_wr *bad_send_wr;
- struct ib_ah_attr ah_attr;
- unsigned long flags;
- int ret = 1;
-
- agent_send_wr = kmalloc(sizeof(*agent_send_wr), GFP_KERNEL);
- if (!agent_send_wr)
- goto out;
- agent_send_wr->mad = mad_priv;
-
- gather_list.addr = dma_map_single(mad_agent->device->dma_device,
- &mad_priv->mad,
- sizeof(mad_priv->mad),
- DMA_TO_DEVICE);
- gather_list.length = sizeof(mad_priv->mad);
- gather_list.lkey = mad_agent->mr->lkey;
-
- send_wr.next = NULL;
- send_wr.opcode = IB_WR_SEND;
- send_wr.sg_list = &gather_list;
- send_wr.num_sge = 1;
- send_wr.wr.ud.remote_qpn = wc->src_qp; /* DQPN */
- send_wr.wr.ud.timeout_ms = 0;
- send_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ struct ib_agent_port_private *port_priv;
+ struct ib_mad_agent *agent;
+ struct ib_mad_send_buf *send_buf;
+ struct ib_ah *ah;
+ int ret;
- ah_attr.dlid = wc->slid;
- ah_attr.port_num = mad_agent->port_num;
- ah_attr.src_path_bits = wc->dlid_path_bits;
- ah_attr.sl = wc->sl;
- ah_attr.static_rate = 0;
- ah_attr.ah_flags = 0; /* No GRH */
- if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) {
- if (wc->wc_flags & IB_WC_GRH) {
- ah_attr.ah_flags = IB_AH_GRH;
- /* Should sgid be looked up ? */
- ah_attr.grh.sgid_index = 0;
- ah_attr.grh.hop_limit = grh->hop_limit;
- ah_attr.grh.flow_label = be32_to_cpu(
- grh->version_tclass_flow) & 0xfffff;
- ah_attr.grh.traffic_class = (be32_to_cpu(
- grh->version_tclass_flow) >> 20) & 0xff;
- memcpy(ah_attr.grh.dgid.raw,
- grh->sgid.raw,
- sizeof(ah_attr.grh.dgid));
- }
+ port_priv = ib_get_agent_port(device, port_num);
+ if (!port_priv) {
+ printk(KERN_ERR SPFX "Unable to find port agent\n");
+ return -ENODEV;
}
- agent_send_wr->ah = ib_create_ah(mad_agent->qp->pd, &ah_attr);
- if (IS_ERR(agent_send_wr->ah)) {
- printk(KERN_ERR SPFX "No memory for address handle\n");
- kfree(agent_send_wr);
- goto out;
+ agent = port_priv->agent[qpn];
+ ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
+ if (IS_ERR(ah)) {
+ ret = PTR_ERR(ah);
+ printk(KERN_ERR SPFX "ib_create_ah_from_wc error:%d\n", ret);
+ return ret;
}
- send_wr.wr.ud.ah = agent_send_wr->ah;
- if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) {
- send_wr.wr.ud.pkey_index = wc->pkey_index;
- send_wr.wr.ud.remote_qkey = IB_QP1_QKEY;
- } else { /* for SMPs */
- send_wr.wr.ud.pkey_index = 0;
- send_wr.wr.ud.remote_qkey = 0;
+ send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0,
+ IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
+ GFP_KERNEL);
+ if (IS_ERR(send_buf)) {
+ ret = PTR_ERR(send_buf);
+ printk(KERN_ERR SPFX "ib_create_send_mad error:%d\n", ret);
+ goto err1;
}
- send_wr.wr.ud.mad_hdr = &mad_priv->mad.mad.mad_hdr;
- send_wr.wr_id = (unsigned long)agent_send_wr;
- pci_unmap_addr_set(agent_send_wr, mapping, gather_list.addr);
-
- /* Send */
- spin_lock_irqsave(&port_priv->send_list_lock, flags);
- if (ib_post_send_mad(mad_agent, &send_wr, &bad_send_wr)) {
- spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
- dma_unmap_single(mad_agent->device->dma_device,
- pci_unmap_addr(agent_send_wr, mapping),
- sizeof(mad_priv->mad),
- DMA_TO_DEVICE);
- ib_destroy_ah(agent_send_wr->ah);
- kfree(agent_send_wr);
- } else {
- list_add_tail(&agent_send_wr->send_list,
- &port_priv->send_posted_list);
- spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
- ret = 0;
+ memcpy(send_buf->mad, mad, sizeof *mad);
+ send_buf->ah = ah;
+ if ((ret = ib_post_send_mad(send_buf, NULL))) {
+ printk(KERN_ERR SPFX "ib_post_send_mad error:%d\n", ret);
+ goto err2;
}
-
-out:
+ return 0;
+err2:
+ ib_free_send_mad(send_buf);
+err1:
+ ib_destroy_ah(ah);
return ret;
}
-int agent_send(struct ib_mad_private *mad,
- struct ib_grh *grh,
- struct ib_wc *wc,
- struct ib_device *device,
- int port_num)
-{
- struct ib_agent_port_private *port_priv;
- struct ib_mad_agent *mad_agent;
-
- port_priv = ib_get_agent_port(device, port_num, NULL);
- if (!port_priv) {
- printk(KERN_DEBUG SPFX "agent_send %s port %d not open\n",
- device->name, port_num);
- return 1;
- }
-
- /* Get mad agent based on mgmt_class in MAD */
- switch (mad->mad.mad.mad_hdr.mgmt_class) {
- case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
- case IB_MGMT_CLASS_SUBN_LID_ROUTED:
- mad_agent = port_priv->smp_agent;
- break;
- case IB_MGMT_CLASS_PERF_MGMT:
- mad_agent = port_priv->perf_mgmt_agent;
- break;
- default:
- return 1;
- }
-
- return agent_mad_send(mad_agent, port_priv, mad, grh, wc);
-}
-
static void agent_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
- struct ib_agent_port_private *port_priv;
- struct ib_agent_send_wr *agent_send_wr;
- unsigned long flags;
-
- /* Find matching MAD agent */
- port_priv = ib_get_agent_port(NULL, 0, mad_agent);
- if (!port_priv) {
- printk(KERN_ERR SPFX "agent_send_handler: no matching MAD "
- "agent %p\n", mad_agent);
- return;
- }
-
- agent_send_wr = (struct ib_agent_send_wr *)(unsigned long)mad_send_wc->wr_id;
- spin_lock_irqsave(&port_priv->send_list_lock, flags);
- /* Remove completed send from posted send MAD list */
- list_del(&agent_send_wr->send_list);
- spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
-
- dma_unmap_single(mad_agent->device->dma_device,
- pci_unmap_addr(agent_send_wr, mapping),
- sizeof(agent_send_wr->mad->mad),
- DMA_TO_DEVICE);
-
- ib_destroy_ah(agent_send_wr->ah);
-
- /* Release allocated memory */
- kmem_cache_free(ib_mad_cache, agent_send_wr->mad);
- kfree(agent_send_wr);
+ ib_destroy_ah(mad_send_wc->send_buf->ah);
+ ib_free_send_mad(mad_send_wc->send_buf);
}
int ib_agent_port_open(struct ib_device *device, int port_num)
{
- int ret;
struct ib_agent_port_private *port_priv;
unsigned long flags;
-
- /* First, check if port already open for SMI */
- port_priv = ib_get_agent_port(device, port_num, NULL);
- if (port_priv) {
- printk(KERN_DEBUG SPFX "%s port %d already open\n",
- device->name, port_num);
- return 0;
- }
+ int ret;
/* Create new device info */
port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL);
@@ -294,32 +161,25 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
ret = -ENOMEM;
goto error1;
}
-
memset(port_priv, 0, sizeof *port_priv);
- port_priv->port_num = port_num;
- spin_lock_init(&port_priv->send_list_lock);
- INIT_LIST_HEAD(&port_priv->send_posted_list);
- /* Obtain send only MAD agent for SM class (SMI QP) */
- port_priv->smp_agent = ib_register_mad_agent(device, port_num,
- IB_QPT_SMI,
- NULL, 0,
+ /* Obtain send only MAD agent for SMI QP */
+ port_priv->agent[0] = ib_register_mad_agent(device, port_num,
+ IB_QPT_SMI, NULL, 0,
&agent_send_handler,
- NULL, NULL);
-
- if (IS_ERR(port_priv->smp_agent)) {
- ret = PTR_ERR(port_priv->smp_agent);
+ NULL, NULL);
+ if (IS_ERR(port_priv->agent[0])) {
+ ret = PTR_ERR(port_priv->agent[0]);
goto error2;
}
- /* Obtain send only MAD agent for PerfMgmt class (GSI QP) */
- port_priv->perf_mgmt_agent = ib_register_mad_agent(device, port_num,
- IB_QPT_GSI,
- NULL, 0,
- &agent_send_handler,
- NULL, NULL);
- if (IS_ERR(port_priv->perf_mgmt_agent)) {
- ret = PTR_ERR(port_priv->perf_mgmt_agent);
+ /* Obtain send only MAD agent for GSI QP */
+ port_priv->agent[1] = ib_register_mad_agent(device, port_num,
+ IB_QPT_GSI, NULL, 0,
+ &agent_send_handler,
+ NULL, NULL);
+ if (IS_ERR(port_priv->agent[1])) {
+ ret = PTR_ERR(port_priv->agent[1]);
goto error3;
}
@@ -330,7 +190,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
return 0;
error3:
- ib_unregister_mad_agent(port_priv->smp_agent);
+ ib_unregister_mad_agent(port_priv->agent[0]);
error2:
kfree(port_priv);
error1:
@@ -343,7 +203,7 @@ int ib_agent_port_close(struct ib_device *device, int port_num)
unsigned long flags;
spin_lock_irqsave(&ib_agent_port_list_lock, flags);
- port_priv = __ib_get_agent_port(device, port_num, NULL);
+ port_priv = __ib_get_agent_port(device, port_num);
if (port_priv == NULL) {
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
printk(KERN_ERR SPFX "Port %d not found\n", port_num);
@@ -352,9 +212,8 @@ int ib_agent_port_close(struct ib_device *device, int port_num)
list_del(&port_priv->port_list);
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
- ib_unregister_mad_agent(port_priv->perf_mgmt_agent);
- ib_unregister_mad_agent(port_priv->smp_agent);
+ ib_unregister_mad_agent(port_priv->agent[1]);
+ ib_unregister_mad_agent(port_priv->agent[0]);
kfree(port_priv);
-
return 0;
}
diff --git a/drivers/infiniband/core/agent.h b/drivers/infiniband/core/agent.h
index d942684..c5f3cfe 100644
--- a/drivers/infiniband/core/agent.h
+++ b/drivers/infiniband/core/agent.h
@@ -39,17 +39,14 @@
#ifndef __AGENT_H_
#define __AGENT_H_
-extern spinlock_t ib_agent_port_list_lock;
+#include <rdma/ib_mad.h>
-extern int ib_agent_port_open(struct ib_device *device,
- int port_num);
+extern int ib_agent_port_open(struct ib_device *device, int port_num);
extern int ib_agent_port_close(struct ib_device *device, int port_num);
-extern int agent_send(struct ib_mad_private *mad,
- struct ib_grh *grh,
- struct ib_wc *wc,
- struct ib_device *device,
- int port_num);
+extern int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
+ struct ib_wc *wc, struct ib_device *device,
+ int port_num, int qpn);
#endif /* __AGENT_H_ */
diff --git a/drivers/infiniband/core/agent_priv.h b/drivers/infiniband/core/agent_priv.h
deleted file mode 100644
index 2ec6d7f..0000000
--- a/drivers/infiniband/core/agent_priv.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved.
- * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved.
- * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
- * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved.
- * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id: agent_priv.h 1640 2005-01-24 22:39:02Z halr $
- */
-
-#ifndef __IB_AGENT_PRIV_H__
-#define __IB_AGENT_PRIV_H__
-
-#include <linux/pci.h>
-
-#define SPFX "ib_agent: "
-
-struct ib_agent_send_wr {
- struct list_head send_list;
- struct ib_ah *ah;
- struct ib_mad_private *mad;
- DECLARE_PCI_UNMAP_ADDR(mapping)
-};
-
-struct ib_agent_port_private {
- struct list_head port_list;
- struct list_head send_posted_list;
- spinlock_t send_list_lock;
- int port_num;
- struct ib_mad_agent *smp_agent; /* SM class */
- struct ib_mad_agent *perf_mgmt_agent; /* PerfMgmt class */
-};
-
-#endif /* __IB_AGENT_PRIV_H__ */
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 54db6d4..580c3a2 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -135,6 +135,7 @@ struct cm_id_private {
__be64 tid;
__be32 local_qpn;
__be32 remote_qpn;
+ enum ib_qp_type qp_type;
__be32 sq_psn;
__be32 rq_psn;
int timeout_ms;
@@ -175,8 +176,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
cm_id_priv->av.pkey_index,
- ah, 0, sizeof(struct ib_mad_hdr),
- sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr),
+ 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_ATOMIC);
if (IS_ERR(m)) {
ib_destroy_ah(ah);
@@ -184,7 +184,8 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
}
/* Timeout set by caller if response is expected. */
- m->send_wr.wr.ud.retries = cm_id_priv->max_cm_retries;
+ m->ah = ah;
+ m->retries = cm_id_priv->max_cm_retries;
atomic_inc(&cm_id_priv->refcount);
m->context[0] = cm_id_priv;
@@ -205,20 +206,20 @@ static int cm_alloc_response_msg(struct cm_port *port,
return PTR_ERR(ah);
m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
- ah, 0, sizeof(struct ib_mad_hdr),
- sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr),
+ 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_ATOMIC);
if (IS_ERR(m)) {
ib_destroy_ah(ah);
return PTR_ERR(m);
}
+ m->ah = ah;
*msg = m;
return 0;
}
static void cm_free_msg(struct ib_mad_send_buf *msg)
{
- ib_destroy_ah(msg->send_wr.wr.ud.ah);
+ ib_destroy_ah(msg->ah);
if (msg->context[0])
cm_deref_id(msg->context[0]);
ib_free_send_mad(msg);
@@ -366,9 +367,15 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
service_node);
if ((cur_cm_id_priv->id.service_mask & service_id) ==
- (service_mask & cur_cm_id_priv->id.service_id))
- return cm_id_priv;
- if (service_id < cur_cm_id_priv->id.service_id)
+ (service_mask & cur_cm_id_priv->id.service_id) &&
+ (cm_id_priv->id.device == cur_cm_id_priv->id.device))
+ return cur_cm_id_priv;
+
+ if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
+ link = &(*link)->rb_left;
+ else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
+ link = &(*link)->rb_right;
+ else if (service_id < cur_cm_id_priv->id.service_id)
link = &(*link)->rb_left;
else
link = &(*link)->rb_right;
@@ -378,7 +385,8 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
return NULL;
}
-static struct cm_id_private * cm_find_listen(__be64 service_id)
+static struct cm_id_private * cm_find_listen(struct ib_device *device,
+ __be64 service_id)
{
struct rb_node *node = cm.listen_service_table.rb_node;
struct cm_id_private *cm_id_priv;
@@ -386,9 +394,15 @@ static struct cm_id_private * cm_find_listen(__be64 service_id)
while (node) {
cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
if ((cm_id_priv->id.service_mask & service_id) ==
- (cm_id_priv->id.service_mask & cm_id_priv->id.service_id))
+ cm_id_priv->id.service_id &&
+ (cm_id_priv->id.device == device))
return cm_id_priv;
- if (service_id < cm_id_priv->id.service_id)
+
+ if (device < cm_id_priv->id.device)
+ node = node->rb_left;
+ else if (device > cm_id_priv->id.device)
+ node = node->rb_right;
+ else if (service_id < cm_id_priv->id.service_id)
node = node->rb_left;
else
node = node->rb_right;
@@ -523,7 +537,8 @@ static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
}
-struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler,
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+ ib_cm_handler cm_handler,
void *context)
{
struct cm_id_private *cm_id_priv;
@@ -535,6 +550,7 @@ struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler,
memset(cm_id_priv, 0, sizeof *cm_id_priv);
cm_id_priv->id.state = IB_CM_IDLE;
+ cm_id_priv->id.device = device;
cm_id_priv->id.cm_handler = cm_handler;
cm_id_priv->id.context = context;
cm_id_priv->id.remote_cm_qpn = 1;
@@ -662,8 +678,7 @@ retest:
break;
case IB_CM_SIDR_REQ_SENT:
cm_id->state = IB_CM_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
break;
case IB_CM_SIDR_REQ_RCVD:
@@ -674,8 +689,7 @@ retest:
case IB_CM_MRA_REQ_RCVD:
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
/* Fall through */
case IB_CM_REQ_RCVD:
case IB_CM_MRA_REQ_SENT:
@@ -692,8 +706,7 @@ retest:
ib_send_cm_dreq(cm_id, NULL, 0);
goto retest;
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
cm_enter_timewait(cm_id_priv);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
break;
@@ -867,7 +880,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
struct ib_cm_req_param *param)
{
struct cm_id_private *cm_id_priv;
- struct ib_send_wr *bad_send_wr;
struct cm_req_msg *req_msg;
unsigned long flags;
int ret;
@@ -911,6 +923,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
cm_id_priv->responder_resources = param->responder_resources;
cm_id_priv->retry_count = param->retry_count;
cm_id_priv->path_mtu = param->primary_path->mtu;
+ cm_id_priv->qp_type = param->qp_type;
ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
if (ret)
@@ -919,7 +932,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
cm_format_req(req_msg, cm_id_priv, param);
cm_id_priv->tid = req_msg->hdr.tid;
- cm_id_priv->msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+ cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
@@ -928,8 +941,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
cm_req_get_primary_local_ack_timeout(req_msg);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &cm_id_priv->msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(cm_id_priv->msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
goto error2;
@@ -952,7 +964,6 @@ static int cm_issue_rej(struct cm_port *port,
void *ari, u8 ari_length)
{
struct ib_mad_send_buf *msg = NULL;
- struct ib_send_wr *bad_send_wr;
struct cm_rej_msg *rej_msg, *rcv_msg;
int ret;
@@ -975,7 +986,7 @@ static int cm_issue_rej(struct cm_port *port,
memcpy(rej_msg->ari, ari, ari_length);
}
- ret = ib_post_send_mad(port->mad_agent, &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_msg(msg);
@@ -1047,7 +1058,6 @@ static void cm_format_req_event(struct cm_work *work,
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
param = &work->cm_event.param.req_rcvd;
param->listen_id = listen_id;
- param->device = cm_id_priv->av.port->mad_agent->device;
param->port = cm_id_priv->av.port->port_num;
param->primary_path = &work->path[0];
if (req_msg->alt_local_lid)
@@ -1156,7 +1166,6 @@ static void cm_dup_req_handler(struct cm_work *work,
struct cm_id_private *cm_id_priv)
{
struct ib_mad_send_buf *msg = NULL;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -1185,8 +1194,7 @@ static void cm_dup_req_handler(struct cm_work *work,
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr,
- &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
return;
@@ -1226,7 +1234,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
}
/* Find matching listen request. */
- listen_cm_id_priv = cm_find_listen(req_msg->service_id);
+ listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
+ req_msg->service_id);
if (!listen_cm_id_priv) {
spin_unlock_irqrestore(&cm.lock, flags);
cm_issue_rej(work->port, work->mad_recv_wc,
@@ -1254,7 +1263,7 @@ static int cm_req_handler(struct cm_work *work)
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id = ib_create_cm_id(NULL, NULL);
+ cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
@@ -1305,6 +1314,7 @@ static int cm_req_handler(struct cm_work *work)
cm_req_get_primary_local_ack_timeout(req_msg);
cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
+ cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
cm_process_work(cm_id_priv, work);
@@ -1349,7 +1359,6 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
struct cm_rep_msg *rep_msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -1371,11 +1380,10 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
rep_msg = (struct cm_rep_msg *) msg->mad;
cm_format_rep(rep_msg, cm_id_priv, param);
- msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+ msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
@@ -1413,7 +1421,6 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
void *data;
int ret;
@@ -1440,8 +1447,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
@@ -1486,7 +1492,6 @@ static void cm_dup_rep_handler(struct cm_work *work)
struct cm_id_private *cm_id_priv;
struct cm_rep_msg *rep_msg;
struct ib_mad_send_buf *msg = NULL;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -1514,8 +1519,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
goto unlock;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr,
- &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
goto deref;
@@ -1583,8 +1587,7 @@ static int cm_rep_handler(struct cm_work *work)
/* todo: handle peer_to_peer */
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret)
list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1618,8 +1621,7 @@ static int cm_establish_handler(struct cm_work *work)
goto out;
}
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret)
list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1658,8 +1660,7 @@ static int cm_rtu_handler(struct cm_work *work)
}
cm_id_priv->id.state = IB_CM_ESTABLISHED;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret)
list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1696,7 +1697,6 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -1718,11 +1718,10 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
- msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+ msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_enter_timewait(cm_id_priv);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -1756,7 +1755,6 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
void *data;
int ret;
@@ -1786,8 +1784,7 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id,
cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr,
- &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
@@ -1804,7 +1801,6 @@ static int cm_dreq_handler(struct cm_work *work)
struct cm_id_private *cm_id_priv;
struct cm_dreq_msg *dreq_msg;
struct ib_mad_send_buf *msg = NULL;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -1823,8 +1819,7 @@ static int cm_dreq_handler(struct cm_work *work)
switch (cm_id_priv->id.state) {
case IB_CM_REP_SENT:
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
break;
case IB_CM_ESTABLISHED:
case IB_CM_MRA_REP_RCVD:
@@ -1838,8 +1833,7 @@ static int cm_dreq_handler(struct cm_work *work)
cm_id_priv->private_data_len);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- if (ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr))
+ if (ib_post_send_mad(msg, NULL))
cm_free_msg(msg);
goto deref;
default:
@@ -1886,8 +1880,7 @@ static int cm_drep_handler(struct cm_work *work)
}
cm_enter_timewait(cm_id_priv);
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret)
list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1912,7 +1905,6 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -1956,8 +1948,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
if (ret)
goto out;
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_msg(msg);
@@ -2033,8 +2024,7 @@ static int cm_rej_handler(struct cm_work *work)
case IB_CM_MRA_REQ_RCVD:
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
/* fall through */
case IB_CM_REQ_RCVD:
case IB_CM_MRA_REQ_SENT:
@@ -2044,8 +2034,7 @@ static int cm_rej_handler(struct cm_work *work)
cm_reset_to_idle(cm_id_priv);
break;
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
/* fall through */
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
@@ -2080,7 +2069,6 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
void *data;
unsigned long flags;
int ret;
@@ -2104,8 +2092,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
CM_MSG_RESPONSE_REQ, service_timeout,
private_data, private_data_len);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret)
goto error2;
cm_id->state = IB_CM_MRA_REQ_SENT;
@@ -2118,8 +2105,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
CM_MSG_RESPONSE_REP, service_timeout,
private_data, private_data_len);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret)
goto error2;
cm_id->state = IB_CM_MRA_REP_SENT;
@@ -2132,8 +2118,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
CM_MSG_RESPONSE_OTHER, service_timeout,
private_data, private_data_len);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret)
goto error2;
cm_id->lap_state = IB_CM_MRA_LAP_SENT;
@@ -2195,14 +2180,14 @@ static int cm_mra_handler(struct cm_work *work)
case IB_CM_REQ_SENT:
if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
ib_modify_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg, timeout))
+ cm_id_priv->msg, timeout))
goto out;
cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
break;
case IB_CM_REP_SENT:
if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
ib_modify_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg, timeout))
+ cm_id_priv->msg, timeout))
goto out;
cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
break;
@@ -2210,7 +2195,7 @@ static int cm_mra_handler(struct cm_work *work)
if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
ib_modify_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg, timeout))
+ cm_id_priv->msg, timeout))
goto out;
cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
break;
@@ -2273,7 +2258,6 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -2294,11 +2278,10 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
alternate_path, private_data, private_data_len);
- msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+ msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
@@ -2342,7 +2325,6 @@ static int cm_lap_handler(struct cm_work *work)
struct cm_lap_msg *lap_msg;
struct ib_cm_lap_event_param *param;
struct ib_mad_send_buf *msg = NULL;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -2376,8 +2358,7 @@ static int cm_lap_handler(struct cm_work *work)
cm_id_priv->private_data_len);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- if (ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr))
+ if (ib_post_send_mad(msg, NULL))
cm_free_msg(msg);
goto deref;
default:
@@ -2433,7 +2414,6 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -2456,8 +2436,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id,
cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
info, info_length, private_data, private_data_len);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
@@ -2496,8 +2475,7 @@ static int cm_apr_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
cm_id_priv->msg = NULL;
ret = atomic_inc_and_test(&cm_id_priv->work_count);
@@ -2572,7 +2550,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -2595,13 +2572,12 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
param);
- msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+ msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state == IB_CM_IDLE)
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
else
ret = -EINVAL;
@@ -2629,7 +2605,6 @@ static void cm_format_sidr_req_event(struct cm_work *work,
param = &work->cm_event.param.sidr_req_rcvd;
param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
param->listen_id = listen_id;
- param->device = work->port->mad_agent->device;
param->port = work->port->port_num;
work->cm_event.private_data = &sidr_req_msg->private_data;
}
@@ -2642,7 +2617,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
struct ib_wc *wc;
unsigned long flags;
- cm_id = ib_create_cm_id(NULL, NULL);
+ cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -2666,7 +2641,8 @@ static int cm_sidr_req_handler(struct cm_work *work)
spin_unlock_irqrestore(&cm.lock, flags);
goto out; /* Duplicate message. */
}
- cur_cm_id_priv = cm_find_listen(sidr_req_msg->service_id);
+ cur_cm_id_priv = cm_find_listen(cm_id->device,
+ sidr_req_msg->service_id);
if (!cur_cm_id_priv) {
rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
spin_unlock_irqrestore(&cm.lock, flags);
@@ -2715,7 +2691,6 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
@@ -2737,8 +2712,7 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
param);
- ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
- &msg->send_wr, &bad_send_wr);
+ ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
@@ -2791,8 +2765,7 @@ static int cm_sidr_rep_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.state = IB_CM_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- (unsigned long) cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_format_sidr_rep_event(work);
@@ -2860,9 +2833,7 @@ discard:
static void cm_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
- struct ib_mad_send_buf *msg;
-
- msg = (struct ib_mad_send_buf *)(unsigned long)mad_send_wc->wr_id;
+ struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
switch (mad_send_wc->status) {
case IB_WC_SUCCESS:
@@ -3064,10 +3035,10 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
case IB_CM_ESTABLISHED:
*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX | IB_QP_PORT;
- qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+ qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE;
if (cm_id_priv->responder_resources)
- qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ;
+ qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ;
qp_attr->pkey_index = cm_id_priv->av.pkey_index;
qp_attr->port_num = cm_id_priv->av.port->port_num;
ret = 0;
@@ -3097,14 +3068,18 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
case IB_CM_MRA_REP_RCVD:
case IB_CM_ESTABLISHED:
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
- IB_QP_DEST_QPN | IB_QP_RQ_PSN |
- IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
+ IB_QP_DEST_QPN | IB_QP_RQ_PSN;
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
qp_attr->path_mtu = cm_id_priv->path_mtu;
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
- qp_attr->max_dest_rd_atomic = cm_id_priv->responder_resources;
- qp_attr->min_rnr_timer = 0;
+ if (cm_id_priv->qp_type == IB_QPT_RC) {
+ *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
+ IB_QP_MIN_RNR_TIMER;
+ qp_attr->max_dest_rd_atomic =
+ cm_id_priv->responder_resources;
+ qp_attr->min_rnr_timer = 0;
+ }
if (cm_id_priv->alt_av.ah_attr.dlid) {
*qp_attr_mask |= IB_QP_ALT_PATH;
qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
@@ -3133,14 +3108,17 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
case IB_CM_ESTABLISHED:
- *qp_attr_mask = IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
- IB_QP_RNR_RETRY | IB_QP_SQ_PSN |
- IB_QP_MAX_QP_RD_ATOMIC;
- qp_attr->timeout = cm_id_priv->local_ack_timeout;
- qp_attr->retry_cnt = cm_id_priv->retry_count;
- qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
+ *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
- qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
+ if (cm_id_priv->qp_type == IB_QPT_RC) {
+ *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
+ IB_QP_RNR_RETRY |
+ IB_QP_MAX_QP_RD_ATOMIC;
+ qp_attr->timeout = cm_id_priv->local_ack_timeout;
+ qp_attr->retry_cnt = cm_id_priv->retry_count;
+ qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
+ qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
+ }
if (cm_id_priv->alt_av.ah_attr.dlid) {
*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
qp_attr->path_mig_state = IB_MIG_REARM;
@@ -3323,6 +3301,7 @@ static void __exit ib_cm_cleanup(void)
flush_workqueue(cm.wq);
destroy_workqueue(cm.wq);
ib_unregister_client(&cm_client);
+ idr_destroy(&cm.local_id_table);
}
module_init(ib_cm_init);
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 813ab70..4d3aee9 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -186,6 +186,7 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
req_msg->offset40 = cpu_to_be32((be32_to_cpu(
req_msg->offset40) &
0xFFFFFFF9) | 0x2);
+ break;
default:
req_msg->offset40 = cpu_to_be32(be32_to_cpu(
req_msg->offset40) &
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index d3cf84e..5a6e449 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -514,6 +514,12 @@ int ib_query_port(struct ib_device *device,
u8 port_num,
struct ib_port_attr *port_attr)
{
+ if (device->node_type == IB_NODE_SWITCH) {
+ if (port_num)
+ return -EINVAL;
+ } else if (port_num < 1 || port_num > device->phys_port_cnt)
+ return -EINVAL;
+
return device->query_port(device, port_num, port_attr);
}
EXPORT_SYMBOL(ib_query_port);
@@ -583,6 +589,12 @@ int ib_modify_port(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify)
{
+ if (device->node_type == IB_NODE_SWITCH) {
+ if (port_num)
+ return -EINVAL;
+ } else if (port_num < 1 || port_num > device->phys_port_cnt)
+ return -EINVAL;
+
return device->modify_port(device, port_num, port_modify_mask,
port_modify);
}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index a14ca87..88f9f8c 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -579,7 +579,7 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list)
}
static void snoop_send(struct ib_mad_qp_info *qp_info,
- struct ib_send_wr *send_wr,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_send_wc *mad_send_wc,
int mad_snoop_flags)
{
@@ -597,7 +597,7 @@ static void snoop_send(struct ib_mad_qp_info *qp_info,
atomic_inc(&mad_snoop_priv->refcount);
spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
- send_wr, mad_send_wc);
+ send_buf, mad_send_wc);
if (atomic_dec_and_test(&mad_snoop_priv->refcount))
wake_up(&mad_snoop_priv->wait);
spin_lock_irqsave(&qp_info->snoop_lock, flags);
@@ -654,10 +654,10 @@ static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
* Return < 0 if error
*/
static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
- struct ib_smp *smp,
- struct ib_send_wr *send_wr)
+ struct ib_mad_send_wr_private *mad_send_wr)
{
int ret;
+ struct ib_smp *smp = mad_send_wr->send_buf.mad;
unsigned long flags;
struct ib_mad_local_private *local;
struct ib_mad_private *mad_priv;
@@ -666,6 +666,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
struct ib_device *device = mad_agent_priv->agent.device;
u8 port_num = mad_agent_priv->agent.port_num;
struct ib_wc mad_wc;
+ struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
if (!smi_handle_dr_smp_send(smp, device->node_type, port_num)) {
ret = -EINVAL;
@@ -745,13 +746,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
goto out;
}
- local->send_wr = *send_wr;
- local->send_wr.sg_list = local->sg_list;
- memcpy(local->sg_list, send_wr->sg_list,
- sizeof *send_wr->sg_list * send_wr->num_sge);
- local->send_wr.next = NULL;
- local->tid = send_wr->wr.ud.mad_hdr->tid;
- local->wr_id = send_wr->wr_id;
+ local->mad_send_wr = mad_send_wr;
/* Reference MAD agent until send side of local completion handled */
atomic_inc(&mad_agent_priv->refcount);
/* Queue local completion to local list */
@@ -781,17 +776,17 @@ static int get_buf_length(int hdr_len, int data_len)
struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
u32 remote_qpn, u16 pkey_index,
- struct ib_ah *ah, int rmpp_active,
+ int rmpp_active,
int hdr_len, int data_len,
gfp_t gfp_mask)
{
struct ib_mad_agent_private *mad_agent_priv;
- struct ib_mad_send_buf *send_buf;
+ struct ib_mad_send_wr_private *mad_send_wr;
int buf_size;
void *buf;
- mad_agent_priv = container_of(mad_agent,
- struct ib_mad_agent_private, agent);
+ mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
+ agent);
buf_size = get_buf_length(hdr_len, data_len);
if ((!mad_agent->rmpp_version &&
@@ -799,45 +794,40 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
(!rmpp_active && buf_size > sizeof(struct ib_mad)))
return ERR_PTR(-EINVAL);
- buf = kmalloc(sizeof *send_buf + buf_size, gfp_mask);
+ buf = kmalloc(sizeof *mad_send_wr + buf_size, gfp_mask);
if (!buf)
return ERR_PTR(-ENOMEM);
- memset(buf, 0, sizeof *send_buf + buf_size);
-
- send_buf = buf + buf_size;
- send_buf->mad = buf;
-
- send_buf->sge.addr = dma_map_single(mad_agent->device->dma_device,
- buf, buf_size, DMA_TO_DEVICE);
- pci_unmap_addr_set(send_buf, mapping, send_buf->sge.addr);
- send_buf->sge.length = buf_size;
- send_buf->sge.lkey = mad_agent->mr->lkey;
-
- send_buf->send_wr.wr_id = (unsigned long) send_buf;
- send_buf->send_wr.sg_list = &send_buf->sge;
- send_buf->send_wr.num_sge = 1;
- send_buf->send_wr.opcode = IB_WR_SEND;
- send_buf->send_wr.send_flags = IB_SEND_SIGNALED;
- send_buf->send_wr.wr.ud.ah = ah;
- send_buf->send_wr.wr.ud.mad_hdr = &send_buf->mad->mad_hdr;
- send_buf->send_wr.wr.ud.remote_qpn = remote_qpn;
- send_buf->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
- send_buf->send_wr.wr.ud.pkey_index = pkey_index;
+ memset(buf, 0, sizeof *mad_send_wr + buf_size);
+
+ mad_send_wr = buf + buf_size;
+ mad_send_wr->send_buf.mad = buf;
+
+ mad_send_wr->mad_agent_priv = mad_agent_priv;
+ mad_send_wr->sg_list[0].length = buf_size;
+ mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
+
+ mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
+ mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
+ mad_send_wr->send_wr.num_sge = 1;
+ mad_send_wr->send_wr.opcode = IB_WR_SEND;
+ mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
+ mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
+ mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
+ mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
if (rmpp_active) {
- struct ib_rmpp_mad *rmpp_mad;
- rmpp_mad = (struct ib_rmpp_mad *)send_buf->mad;
+ struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad;
rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len -
- offsetof(struct ib_rmpp_mad, data) + data_len);
+ IB_MGMT_RMPP_HDR + data_len);
rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version;
rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr,
IB_MGMT_RMPP_FLAG_ACTIVE);
}
- send_buf->mad_agent = mad_agent;
+ mad_send_wr->send_buf.mad_agent = mad_agent;
atomic_inc(&mad_agent_priv->refcount);
- return send_buf;
+ return &mad_send_wr->send_buf;
}
EXPORT_SYMBOL(ib_create_send_mad);
@@ -847,10 +837,6 @@ void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
mad_agent_priv = container_of(send_buf->mad_agent,
struct ib_mad_agent_private, agent);
-
- dma_unmap_single(send_buf->mad_agent->device->dma_device,
- pci_unmap_addr(send_buf, mapping),
- send_buf->sge.length, DMA_TO_DEVICE);
kfree(send_buf->mad);
if (atomic_dec_and_test(&mad_agent_priv->refcount))
@@ -861,8 +847,10 @@ EXPORT_SYMBOL(ib_free_send_mad);
int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_mad_qp_info *qp_info;
- struct ib_send_wr *bad_send_wr;
struct list_head *list;
+ struct ib_send_wr *bad_send_wr;
+ struct ib_mad_agent *mad_agent;
+ struct ib_sge *sge;
unsigned long flags;
int ret;
@@ -871,10 +859,17 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
+ mad_agent = mad_send_wr->send_buf.mad_agent;
+ sge = mad_send_wr->sg_list;
+ sge->addr = dma_map_single(mad_agent->device->dma_device,
+ mad_send_wr->send_buf.mad, sge->length,
+ DMA_TO_DEVICE);
+ pci_unmap_addr_set(mad_send_wr, mapping, sge->addr);
+
spin_lock_irqsave(&qp_info->send_queue.lock, flags);
if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
- ret = ib_post_send(mad_send_wr->mad_agent_priv->agent.qp,
- &mad_send_wr->send_wr, &bad_send_wr);
+ ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr,
+ &bad_send_wr);
list = &qp_info->send_queue.list;
} else {
ret = 0;
@@ -886,6 +881,11 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
list_add_tail(&mad_send_wr->mad_list.list, list);
}
spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
+ if (ret)
+ dma_unmap_single(mad_agent->device->dma_device,
+ pci_unmap_addr(mad_send_wr, mapping),
+ sge->length, DMA_TO_DEVICE);
+
return ret;
}
@@ -893,45 +893,28 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
* ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
* with the registered client
*/
-int ib_post_send_mad(struct ib_mad_agent *mad_agent,
- struct ib_send_wr *send_wr,
- struct ib_send_wr **bad_send_wr)
+int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
+ struct ib_mad_send_buf **bad_send_buf)
{
- int ret = -EINVAL;
struct ib_mad_agent_private *mad_agent_priv;
-
- /* Validate supplied parameters */
- if (!bad_send_wr)
- goto error1;
-
- if (!mad_agent || !send_wr)
- goto error2;
-
- if (!mad_agent->send_handler)
- goto error2;
-
- mad_agent_priv = container_of(mad_agent,
- struct ib_mad_agent_private,
- agent);
+ struct ib_mad_send_buf *next_send_buf;
+ struct ib_mad_send_wr_private *mad_send_wr;
+ unsigned long flags;
+ int ret = -EINVAL;
/* Walk list of send WRs and post each on send list */
- while (send_wr) {
- unsigned long flags;
- struct ib_send_wr *next_send_wr;
- struct ib_mad_send_wr_private *mad_send_wr;
- struct ib_smp *smp;
-
- /* Validate more parameters */
- if (send_wr->num_sge > IB_MAD_SEND_REQ_MAX_SG)
- goto error2;
+ for (; send_buf; send_buf = next_send_buf) {
- if (send_wr->wr.ud.timeout_ms && !mad_agent->recv_handler)
- goto error2;
-
- if (!send_wr->wr.ud.mad_hdr) {
- printk(KERN_ERR PFX "MAD header must be supplied "
- "in WR %p\n", send_wr);
- goto error2;
+ mad_send_wr = container_of(send_buf,
+ struct ib_mad_send_wr_private,
+ send_buf);
+ mad_agent_priv = mad_send_wr->mad_agent_priv;
+
+ if (!send_buf->mad_agent->send_handler ||
+ (send_buf->timeout_ms &&
+ !send_buf->mad_agent->recv_handler)) {
+ ret = -EINVAL;
+ goto error;
}
/*
@@ -939,40 +922,24 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent,
* current one completes, and the user modifies the work
* request associated with the completion
*/
- next_send_wr = (struct ib_send_wr *)send_wr->next;
+ next_send_buf = send_buf->next;
+ mad_send_wr->send_wr.wr.ud.ah = send_buf->ah;
- smp = (struct ib_smp *)send_wr->wr.ud.mad_hdr;
- if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
- ret = handle_outgoing_dr_smp(mad_agent_priv, smp,
- send_wr);
+ if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
+ IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ ret = handle_outgoing_dr_smp(mad_agent_priv,
+ mad_send_wr);
if (ret < 0) /* error */
- goto error2;
+ goto error;
else if (ret == 1) /* locally consumed */
- goto next;
+ continue;
}
- /* Allocate MAD send WR tracking structure */
- mad_send_wr = kmalloc(sizeof *mad_send_wr, GFP_ATOMIC);
- if (!mad_send_wr) {
- printk(KERN_ERR PFX "No memory for "
- "ib_mad_send_wr_private\n");
- ret = -ENOMEM;
- goto error2;
- }
- memset(mad_send_wr, 0, sizeof *mad_send_wr);
-
- mad_send_wr->send_wr = *send_wr;
- mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
- memcpy(mad_send_wr->sg_list, send_wr->sg_list,
- sizeof *send_wr->sg_list * send_wr->num_sge);
- mad_send_wr->wr_id = send_wr->wr_id;
- mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid;
- mad_send_wr->mad_agent_priv = mad_agent_priv;
+ mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
/* Timeout will be updated after send completes */
- mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr.
- ud.timeout_ms);
- mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries;
- /* One reference for each work request to QP + response */
+ mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
+ mad_send_wr->retries = send_buf->retries;
+ /* Reference for work request to QP + response */
mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
mad_send_wr->status = IB_WC_SUCCESS;
@@ -995,16 +962,13 @@ int ib_post_send_mad(struct ib_mad_agent *mad_agent,
list_del(&mad_send_wr->agent_list);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
atomic_dec(&mad_agent_priv->refcount);
- goto error2;
+ goto error;
}
-next:
- send_wr = next_send_wr;
}
return 0;
-
-error2:
- *bad_send_wr = send_wr;
-error1:
+error:
+ if (bad_send_buf)
+ *bad_send_buf = send_buf;
return ret;
}
EXPORT_SYMBOL(ib_post_send_mad);
@@ -1447,8 +1411,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
* of MAD.
*/
hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32;
- list_for_each_entry(entry, &port_priv->agent_list,
- agent_list) {
+ list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
if (entry->agent.hi_tid == hi_tid) {
mad_agent = entry;
break;
@@ -1571,8 +1534,7 @@ ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid)
*/
list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
agent_list) {
- if (is_data_mad(mad_agent_priv,
- mad_send_wr->send_wr.wr.ud.mad_hdr) &&
+ if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
mad_send_wr->tid == tid && mad_send_wr->timeout) {
/* Verify request has not been canceled */
return (mad_send_wr->status == IB_WC_SUCCESS) ?
@@ -1628,14 +1590,14 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
/* Defined behavior is to complete response before request */
- mad_recv_wc->wc->wr_id = mad_send_wr->wr_id;
+ mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
mad_recv_wc);
atomic_dec(&mad_agent_priv->refcount);
mad_send_wc.status = IB_WC_SUCCESS;
mad_send_wc.vendor_err = 0;
- mad_send_wc.wr_id = mad_send_wr->wr_id;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
} else {
mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
@@ -1728,11 +1690,11 @@ local:
if (ret & IB_MAD_RESULT_CONSUMED)
goto out;
if (ret & IB_MAD_RESULT_REPLY) {
- /* Send response */
- if (!agent_send(response, &recv->grh, wc,
- port_priv->device,
- port_priv->port_num))
- response = NULL;
+ agent_send_response(&response->mad.mad,
+ &recv->grh, wc,
+ port_priv->device,
+ port_priv->port_num,
+ qp_info->qp->qp_num);
goto out;
}
}
@@ -1866,15 +1828,15 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
if (mad_send_wr->status != IB_WC_SUCCESS )
mad_send_wc->status = mad_send_wr->status;
- if (ret != IB_RMPP_RESULT_INTERNAL)
+ if (ret == IB_RMPP_RESULT_INTERNAL)
+ ib_rmpp_send_handler(mad_send_wc);
+ else
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
mad_send_wc);
/* Release reference on agent taken when sending */
if (atomic_dec_and_test(&mad_agent_priv->refcount))
wake_up(&mad_agent_priv->wait);
-
- kfree(mad_send_wr);
return;
done:
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
@@ -1888,6 +1850,7 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
struct ib_mad_qp_info *qp_info;
struct ib_mad_queue *send_queue;
struct ib_send_wr *bad_send_wr;
+ struct ib_mad_send_wc mad_send_wc;
unsigned long flags;
int ret;
@@ -1898,6 +1861,9 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
qp_info = send_queue->qp_info;
retry:
+ dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
+ pci_unmap_addr(mad_send_wr, mapping),
+ mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
queued_send_wr = NULL;
spin_lock_irqsave(&send_queue->lock, flags);
list_del(&mad_list->list);
@@ -1914,17 +1880,17 @@ retry:
}
spin_unlock_irqrestore(&send_queue->lock, flags);
- /* Restore client wr_id in WC and complete send */
- wc->wr_id = mad_send_wr->wr_id;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ mad_send_wc.status = wc->status;
+ mad_send_wc.vendor_err = wc->vendor_err;
if (atomic_read(&qp_info->snoop_count))
- snoop_send(qp_info, &mad_send_wr->send_wr,
- (struct ib_mad_send_wc *)wc,
+ snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc,
IB_MAD_SNOOP_SEND_COMPLETIONS);
- ib_mad_complete_send_wr(mad_send_wr, (struct ib_mad_send_wc *)wc);
+ ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
if (queued_send_wr) {
ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
- &bad_send_wr);
+ &bad_send_wr);
if (ret) {
printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
mad_send_wr = queued_send_wr;
@@ -2066,38 +2032,37 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
&cancel_list, agent_list) {
- mad_send_wc.wr_id = mad_send_wr->wr_id;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ list_del(&mad_send_wr->agent_list);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
-
- list_del(&mad_send_wr->agent_list);
- kfree(mad_send_wr);
atomic_dec(&mad_agent_priv->refcount);
}
}
static struct ib_mad_send_wr_private*
-find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, u64 wr_id)
+find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
+ struct ib_mad_send_buf *send_buf)
{
struct ib_mad_send_wr_private *mad_send_wr;
list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
agent_list) {
- if (mad_send_wr->wr_id == wr_id)
+ if (&mad_send_wr->send_buf == send_buf)
return mad_send_wr;
}
list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
agent_list) {
- if (is_data_mad(mad_agent_priv,
- mad_send_wr->send_wr.wr.ud.mad_hdr) &&
- mad_send_wr->wr_id == wr_id)
+ if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
+ &mad_send_wr->send_buf == send_buf)
return mad_send_wr;
}
return NULL;
}
-int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms)
+int ib_modify_mad(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf, u32 timeout_ms)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
@@ -2107,7 +2072,7 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms)
mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
agent);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
- mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id);
+ mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
return -EINVAL;
@@ -2119,7 +2084,7 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms)
mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
}
- mad_send_wr->send_wr.wr.ud.timeout_ms = timeout_ms;
+ mad_send_wr->send_buf.timeout_ms = timeout_ms;
if (active)
mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
else
@@ -2130,9 +2095,10 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms)
}
EXPORT_SYMBOL(ib_modify_mad);
-void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id)
+void ib_cancel_mad(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf)
{
- ib_modify_mad(mad_agent, wr_id, 0);
+ ib_modify_mad(mad_agent, send_buf, 0);
}
EXPORT_SYMBOL(ib_cancel_mad);
@@ -2166,10 +2132,9 @@ static void local_completions(void *data)
* Defined behavior is to complete response
* before request
*/
- build_smp_wc(local->wr_id,
+ build_smp_wc((unsigned long) local->mad_send_wr,
be16_to_cpu(IB_LID_PERMISSIVE),
- 0 /* pkey index */,
- recv_mad_agent->agent.port_num, &wc);
+ 0, recv_mad_agent->agent.port_num, &wc);
local->mad_priv->header.recv_wc.wc = &wc;
local->mad_priv->header.recv_wc.mad_len =
@@ -2196,11 +2161,11 @@ local_send_completion:
/* Complete send */
mad_send_wc.status = IB_WC_SUCCESS;
mad_send_wc.vendor_err = 0;
- mad_send_wc.wr_id = local->wr_id;
+ mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
- snoop_send(mad_agent_priv->qp_info, &local->send_wr,
- &mad_send_wc,
- IB_MAD_SNOOP_SEND_COMPLETIONS);
+ snoop_send(mad_agent_priv->qp_info,
+ &local->mad_send_wr->send_buf,
+ &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
@@ -2221,8 +2186,7 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
if (!mad_send_wr->retries--)
return -ETIMEDOUT;
- mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_wr.
- wr.ud.timeout_ms);
+ mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
ret = ib_retry_rmpp(mad_send_wr);
@@ -2285,11 +2249,10 @@ static void timeout_sends(void *data)
mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
else
mad_send_wc.status = mad_send_wr->status;
- mad_send_wc.wr_id = mad_send_wr->wr_id;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
- kfree(mad_send_wr);
atomic_dec(&mad_agent_priv->refcount);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
}
@@ -2683,40 +2646,47 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
static void ib_mad_init_device(struct ib_device *device)
{
- int num_ports, cur_port, i;
+ int start, end, i;
if (device->node_type == IB_NODE_SWITCH) {
- num_ports = 1;
- cur_port = 0;
+ start = 0;
+ end = 0;
} else {
- num_ports = device->phys_port_cnt;
- cur_port = 1;
+ start = 1;
+ end = device->phys_port_cnt;
}
- for (i = 0; i < num_ports; i++, cur_port++) {
- if (ib_mad_port_open(device, cur_port)) {
+
+ for (i = start; i <= end; i++) {
+ if (ib_mad_port_open(device, i)) {
printk(KERN_ERR PFX "Couldn't open %s port %d\n",
- device->name, cur_port);
- goto error_device_open;
+ device->name, i);
+ goto error;
}
- if (ib_agent_port_open(device, cur_port)) {
+ if (ib_agent_port_open(device, i)) {
printk(KERN_ERR PFX "Couldn't open %s port %d "
"for agents\n",
- device->name, cur_port);
- goto error_device_open;
+ device->name, i);
+ goto error_agent;
}
}
return;
-error_device_open:
- while (i > 0) {
- cur_port--;
- if (ib_agent_port_close(device, cur_port))
+error_agent:
+ if (ib_mad_port_close(device, i))
+ printk(KERN_ERR PFX "Couldn't close %s port %d\n",
+ device->name, i);
+
+error:
+ i--;
+
+ while (i >= start) {
+ if (ib_agent_port_close(device, i))
printk(KERN_ERR PFX "Couldn't close %s port %d "
"for agents\n",
- device->name, cur_port);
- if (ib_mad_port_close(device, cur_port))
+ device->name, i);
+ if (ib_mad_port_close(device, i))
printk(KERN_ERR PFX "Couldn't close %s port %d\n",
- device->name, cur_port);
+ device->name, i);
i--;
}
}
@@ -2754,7 +2724,6 @@ static int __init ib_mad_init_module(void)
int ret;
spin_lock_init(&ib_mad_port_list_lock);
- spin_lock_init(&ib_agent_port_list_lock);
ib_mad_cache = kmem_cache_create("ib_mad",
sizeof(struct ib_mad_private),
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index f1ba794..570f786 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -118,9 +118,10 @@ struct ib_mad_send_wr_private {
struct ib_mad_list_head mad_list;
struct list_head agent_list;
struct ib_mad_agent_private *mad_agent_priv;
+ struct ib_mad_send_buf send_buf;
+ DECLARE_PCI_UNMAP_ADDR(mapping)
struct ib_send_wr send_wr;
struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
- u64 wr_id; /* client WR ID */
__be64 tid;
unsigned long timeout;
int retries;
@@ -141,10 +142,7 @@ struct ib_mad_local_private {
struct list_head completion_list;
struct ib_mad_private *mad_priv;
struct ib_mad_agent_private *recv_mad_agent;
- struct ib_send_wr send_wr;
- struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
- u64 wr_id; /* client WR ID */
- __be64 tid;
+ struct ib_mad_send_wr_private *mad_send_wr;
};
struct ib_mad_mgmt_method_table {
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index e23836d..3249e1d 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -103,12 +103,12 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
static int data_offset(u8 mgmt_class)
{
if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
- return offsetof(struct ib_sa_mad, data);
+ return IB_MGMT_SA_HDR;
else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
(mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
- return offsetof(struct ib_vendor_mad, data);
+ return IB_MGMT_VENDOR_HDR;
else
- return offsetof(struct ib_rmpp_mad, data);
+ return IB_MGMT_RMPP_HDR;
}
static void format_ack(struct ib_rmpp_mad *ack,
@@ -135,55 +135,52 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv,
struct ib_mad_recv_wc *recv_wc)
{
struct ib_mad_send_buf *msg;
- struct ib_send_wr *bad_send_wr;
- int hdr_len, ret;
+ int ret;
- hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr);
msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp,
- recv_wc->wc->pkey_index, rmpp_recv->ah, 1,
- hdr_len, sizeof(struct ib_rmpp_mad) - hdr_len,
- GFP_KERNEL);
+ recv_wc->wc->pkey_index, 1, IB_MGMT_RMPP_HDR,
+ IB_MGMT_RMPP_DATA, GFP_KERNEL);
if (!msg)
return;
- format_ack((struct ib_rmpp_mad *) msg->mad,
- (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv);
- ret = ib_post_send_mad(&rmpp_recv->agent->agent, &msg->send_wr,
- &bad_send_wr);
+ format_ack(msg->mad, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad,
+ rmpp_recv);
+ msg->ah = rmpp_recv->ah;
+ ret = ib_post_send_mad(msg, NULL);
if (ret)
ib_free_send_mad(msg);
}
-static int alloc_response_msg(struct ib_mad_agent *agent,
- struct ib_mad_recv_wc *recv_wc,
- struct ib_mad_send_buf **msg)
+static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
+ struct ib_mad_recv_wc *recv_wc)
{
- struct ib_mad_send_buf *m;
+ struct ib_mad_send_buf *msg;
struct ib_ah *ah;
- int hdr_len;
ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
recv_wc->recv_buf.grh, agent->port_num);
if (IS_ERR(ah))
- return PTR_ERR(ah);
-
- hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr);
- m = ib_create_send_mad(agent, recv_wc->wc->src_qp,
- recv_wc->wc->pkey_index, ah, 1, hdr_len,
- sizeof(struct ib_rmpp_mad) - hdr_len,
- GFP_KERNEL);
- if (IS_ERR(m)) {
+ return (void *) ah;
+
+ msg = ib_create_send_mad(agent, recv_wc->wc->src_qp,
+ recv_wc->wc->pkey_index, 1,
+ IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA,
+ GFP_KERNEL);
+ if (IS_ERR(msg))
ib_destroy_ah(ah);
- return PTR_ERR(m);
- }
- *msg = m;
- return 0;
+ else
+ msg->ah = ah;
+
+ return msg;
}
-static void free_msg(struct ib_mad_send_buf *msg)
+void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
{
- ib_destroy_ah(msg->send_wr.wr.ud.ah);
- ib_free_send_mad(msg);
+ struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad;
+
+ if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_ACK)
+ ib_destroy_ah(mad_send_wc->send_buf->ah);
+ ib_free_send_mad(mad_send_wc->send_buf);
}
static void nack_recv(struct ib_mad_agent_private *agent,
@@ -191,14 +188,13 @@ static void nack_recv(struct ib_mad_agent_private *agent,
{
struct ib_mad_send_buf *msg;
struct ib_rmpp_mad *rmpp_mad;
- struct ib_send_wr *bad_send_wr;
int ret;
- ret = alloc_response_msg(&agent->agent, recv_wc, &msg);
- if (ret)
+ msg = alloc_response_msg(&agent->agent, recv_wc);
+ if (IS_ERR(msg))
return;
- rmpp_mad = (struct ib_rmpp_mad *) msg->mad;
+ rmpp_mad = msg->mad;
memcpy(rmpp_mad, recv_wc->recv_buf.mad,
data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class));
@@ -210,9 +206,11 @@ static void nack_recv(struct ib_mad_agent_private *agent,
rmpp_mad->rmpp_hdr.seg_num = 0;
rmpp_mad->rmpp_hdr.paylen_newwin = 0;
- ret = ib_post_send_mad(&agent->agent, &msg->send_wr, &bad_send_wr);
- if (ret)
- free_msg(msg);
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret) {
+ ib_destroy_ah(msg->ah);
+ ib_free_send_mad(msg);
+ }
}
static void recv_timeout_handler(void *data)
@@ -585,7 +583,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
int timeout;
u32 paylen;
- rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
+ rmpp_mad = mad_send_wr->send_buf.mad;
ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num);
@@ -612,7 +610,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
}
/* 2 seconds for an ACK until we can find the packet lifetime */
- timeout = mad_send_wr->send_wr.wr.ud.timeout_ms;
+ timeout = mad_send_wr->send_buf.timeout_ms;
if (!timeout || timeout > 2000)
mad_send_wr->timeout = msecs_to_jiffies(2000);
mad_send_wr->seg_num++;
@@ -640,7 +638,7 @@ static void abort_send(struct ib_mad_agent_private *agent, __be64 tid,
wc.status = IB_WC_REM_ABORT_ERR;
wc.vendor_err = rmpp_status;
- wc.wr_id = mad_send_wr->wr_id;
+ wc.send_buf = &mad_send_wr->send_buf;
ib_mad_complete_send_wr(mad_send_wr, &wc);
return;
out:
@@ -694,12 +692,12 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
if (seg_num > mad_send_wr->last_ack) {
mad_send_wr->last_ack = seg_num;
- mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries;
+ mad_send_wr->retries = mad_send_wr->send_buf.retries;
}
mad_send_wr->newwin = newwin;
if (mad_send_wr->last_ack == mad_send_wr->total_seg) {
/* If no response is expected, the ACK completes the send */
- if (!mad_send_wr->send_wr.wr.ud.timeout_ms) {
+ if (!mad_send_wr->send_buf.timeout_ms) {
struct ib_mad_send_wc wc;
ib_mark_mad_done(mad_send_wr);
@@ -707,13 +705,13 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
wc.status = IB_WC_SUCCESS;
wc.vendor_err = 0;
- wc.wr_id = mad_send_wr->wr_id;
+ wc.send_buf = &mad_send_wr->send_buf;
ib_mad_complete_send_wr(mad_send_wr, &wc);
return;
}
if (mad_send_wr->refcount == 1)
- ib_reset_mad_timeout(mad_send_wr, mad_send_wr->
- send_wr.wr.ud.timeout_ms);
+ ib_reset_mad_timeout(mad_send_wr,
+ mad_send_wr->send_buf.timeout_ms);
} else if (mad_send_wr->refcount == 1 &&
mad_send_wr->seg_num < mad_send_wr->newwin &&
mad_send_wr->seg_num <= mad_send_wr->total_seg) {
@@ -842,7 +840,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
struct ib_rmpp_mad *rmpp_mad;
int i, total_len, ret;
- rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
+ rmpp_mad = mad_send_wr->send_buf.mad;
if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE))
return IB_RMPP_RESULT_UNHANDLED;
@@ -863,7 +861,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) /
(sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset);
- mad_send_wr->pad = total_len - offsetof(struct ib_rmpp_mad, data) -
+ mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR -
be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
/* We need to wait for the final ACK even if there isn't a response */
@@ -878,23 +876,15 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_send_wc *mad_send_wc)
{
struct ib_rmpp_mad *rmpp_mad;
- struct ib_mad_send_buf *msg;
int ret;
- rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
+ rmpp_mad = mad_send_wr->send_buf.mad;
if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE))
return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
- if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) {
- msg = (struct ib_mad_send_buf *) (unsigned long)
- mad_send_wc->wr_id;
- if (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_ACK)
- ib_free_send_mad(msg);
- else
- free_msg(msg);
+ if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA)
return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */
- }
if (mad_send_wc->status != IB_WC_SUCCESS ||
mad_send_wr->status != IB_WC_SUCCESS)
@@ -905,7 +895,7 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
if (mad_send_wr->last_ack == mad_send_wr->total_seg) {
mad_send_wr->timeout =
- msecs_to_jiffies(mad_send_wr->send_wr.wr.ud.timeout_ms);
+ msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
return IB_RMPP_RESULT_PROCESSED; /* Send done */
}
@@ -926,7 +916,7 @@ int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr)
struct ib_rmpp_mad *rmpp_mad;
int ret;
- rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
+ rmpp_mad = mad_send_wr->send_buf.mad;
if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE))
return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
diff --git a/drivers/infiniband/core/mad_rmpp.h b/drivers/infiniband/core/mad_rmpp.h
index c4924df..f0616fd 100644
--- a/drivers/infiniband/core/mad_rmpp.h
+++ b/drivers/infiniband/core/mad_rmpp.h
@@ -51,6 +51,8 @@ ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_send_wc *mad_send_wc);
+void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc);
+
void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent);
int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 2626182..89ce9dc 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -73,11 +73,10 @@ struct ib_sa_device {
struct ib_sa_query {
void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
void (*release)(struct ib_sa_query *);
- struct ib_sa_port *port;
- struct ib_sa_mad *mad;
- struct ib_sa_sm_ah *sm_ah;
- DECLARE_PCI_UNMAP_ADDR(mapping)
- int id;
+ struct ib_sa_port *port;
+ struct ib_mad_send_buf *mad_buf;
+ struct ib_sa_sm_ah *sm_ah;
+ int id;
};
struct ib_sa_service_query {
@@ -426,6 +425,7 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
{
unsigned long flags;
struct ib_mad_agent *agent;
+ struct ib_mad_send_buf *mad_buf;
spin_lock_irqsave(&idr_lock, flags);
if (idr_find(&query_idr, id) != query) {
@@ -433,9 +433,10 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
return;
}
agent = query->port->agent;
+ mad_buf = query->mad_buf;
spin_unlock_irqrestore(&idr_lock, flags);
- ib_cancel_mad(agent, id);
+ ib_cancel_mad(agent, mad_buf);
}
EXPORT_SYMBOL(ib_sa_cancel_query);
@@ -457,71 +458,46 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
static int send_mad(struct ib_sa_query *query, int timeout_ms)
{
- struct ib_sa_port *port = query->port;
unsigned long flags;
- int ret;
- struct ib_sge gather_list;
- struct ib_send_wr *bad_wr, wr = {
- .opcode = IB_WR_SEND,
- .sg_list = &gather_list,
- .num_sge = 1,
- .send_flags = IB_SEND_SIGNALED,
- .wr = {
- .ud = {
- .mad_hdr = &query->mad->mad_hdr,
- .remote_qpn = 1,
- .remote_qkey = IB_QP1_QKEY,
- .timeout_ms = timeout_ms,
- }
- }
- };
+ int ret, id;
retry:
if (!idr_pre_get(&query_idr, GFP_ATOMIC))
return -ENOMEM;
spin_lock_irqsave(&idr_lock, flags);
- ret = idr_get_new(&query_idr, query, &query->id);
+ ret = idr_get_new(&query_idr, query, &id);
spin_unlock_irqrestore(&idr_lock, flags);
if (ret == -EAGAIN)
goto retry;
if (ret)
return ret;
- wr.wr_id = query->id;
+ query->mad_buf->timeout_ms = timeout_ms;
+ query->mad_buf->context[0] = query;
+ query->id = id;
- spin_lock_irqsave(&port->ah_lock, flags);
- kref_get(&port->sm_ah->ref);
- query->sm_ah = port->sm_ah;
- wr.wr.ud.ah = port->sm_ah->ah;
- spin_unlock_irqrestore(&port->ah_lock, flags);
+ spin_lock_irqsave(&query->port->ah_lock, flags);
+ kref_get(&query->port->sm_ah->ref);
+ query->sm_ah = query->port->sm_ah;
+ spin_unlock_irqrestore(&query->port->ah_lock, flags);
- gather_list.addr = dma_map_single(port->agent->device->dma_device,
- query->mad,
- sizeof (struct ib_sa_mad),
- DMA_TO_DEVICE);
- gather_list.length = sizeof (struct ib_sa_mad);
- gather_list.lkey = port->agent->mr->lkey;
- pci_unmap_addr_set(query, mapping, gather_list.addr);
+ query->mad_buf->ah = query->sm_ah->ah;
- ret = ib_post_send_mad(port->agent, &wr, &bad_wr);
+ ret = ib_post_send_mad(query->mad_buf, NULL);
if (ret) {
- dma_unmap_single(port->agent->device->dma_device,
- pci_unmap_addr(query, mapping),
- sizeof (struct ib_sa_mad),
- DMA_TO_DEVICE);
- kref_put(&query->sm_ah->ref, free_sm_ah);
spin_lock_irqsave(&idr_lock, flags);
- idr_remove(&query_idr, query->id);
+ idr_remove(&query_idr, id);
spin_unlock_irqrestore(&idr_lock, flags);
+
+ kref_put(&query->sm_ah->ref, free_sm_ah);
}
/*
* It's not safe to dereference query any more, because the
* send may already have completed and freed the query in
- * another context. So use wr.wr_id, which has a copy of the
- * query's id.
+ * another context.
*/
- return ret ? ret : wr.wr_id;
+ return ret ? ret : id;
}
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
@@ -543,7 +519,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
{
- kfree(sa_query->mad);
kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
}
@@ -583,43 +558,58 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
{
struct ib_sa_path_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port];
- struct ib_mad_agent *agent = port->agent;
+ struct ib_sa_port *port;
+ struct ib_mad_agent *agent;
+ struct ib_sa_mad *mad;
int ret;
+ if (!sa_dev)
+ return -ENODEV;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ agent = port->agent;
+
query = kmalloc(sizeof *query, gfp_mask);
if (!query)
return -ENOMEM;
- query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
- if (!query->sa_query.mad) {
- kfree(query);
- return -ENOMEM;
+
+ query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
+ 0, IB_MGMT_SA_HDR,
+ IB_MGMT_SA_DATA, gfp_mask);
+ if (!query->sa_query.mad_buf) {
+ ret = -ENOMEM;
+ goto err1;
}
query->callback = callback;
query->context = context;
- init_mad(query->sa_query.mad, agent);
+ mad = query->sa_query.mad_buf->mad;
+ init_mad(mad, agent);
- query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
- query->sa_query.release = ib_sa_path_rec_release;
- query->sa_query.port = port;
- query->sa_query.mad->mad_hdr.method = IB_MGMT_METHOD_GET;
- query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
- query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
+ query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
+ query->sa_query.release = ib_sa_path_rec_release;
+ query->sa_query.port = port;
+ mad->mad_hdr.method = IB_MGMT_METHOD_GET;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
+ mad->sa_hdr.comp_mask = comp_mask;
- ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
- rec, query->sa_query.mad->data);
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
*sa_query = &query->sa_query;
ret = send_mad(&query->sa_query, timeout_ms);
- if (ret < 0) {
- *sa_query = NULL;
- kfree(query->sa_query.mad);
- kfree(query);
- }
+ if (ret < 0)
+ goto err2;
+
+ return ret;
+err2:
+ *sa_query = NULL;
+ ib_free_send_mad(query->sa_query.mad_buf);
+
+err1:
+ kfree(query);
return ret;
}
EXPORT_SYMBOL(ib_sa_path_rec_get);
@@ -643,7 +633,6 @@ static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
{
- kfree(sa_query->mad);
kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
}
@@ -685,10 +674,17 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
{
struct ib_sa_service_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port];
- struct ib_mad_agent *agent = port->agent;
+ struct ib_sa_port *port;
+ struct ib_mad_agent *agent;
+ struct ib_sa_mad *mad;
int ret;
+ if (!sa_dev)
+ return -ENODEV;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ agent = port->agent;
+
if (method != IB_MGMT_METHOD_GET &&
method != IB_MGMT_METHOD_SET &&
method != IB_SA_METHOD_DELETE)
@@ -697,37 +693,45 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
query = kmalloc(sizeof *query, gfp_mask);
if (!query)
return -ENOMEM;
- query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
- if (!query->sa_query.mad) {
- kfree(query);
- return -ENOMEM;
+
+ query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
+ 0, IB_MGMT_SA_HDR,
+ IB_MGMT_SA_DATA, gfp_mask);
+ if (!query->sa_query.mad_buf) {
+ ret = -ENOMEM;
+ goto err1;
}
query->callback = callback;
query->context = context;
- init_mad(query->sa_query.mad, agent);
+ mad = query->sa_query.mad_buf->mad;
+ init_mad(mad, agent);
- query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
- query->sa_query.release = ib_sa_service_rec_release;
- query->sa_query.port = port;
- query->sa_query.mad->mad_hdr.method = method;
- query->sa_query.mad->mad_hdr.attr_id =
- cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
- query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
+ query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
+ query->sa_query.release = ib_sa_service_rec_release;
+ query->sa_query.port = port;
+ mad->mad_hdr.method = method;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
+ mad->sa_hdr.comp_mask = comp_mask;
ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
- rec, query->sa_query.mad->data);
+ rec, mad->data);
*sa_query = &query->sa_query;
ret = send_mad(&query->sa_query, timeout_ms);
- if (ret < 0) {
- *sa_query = NULL;
- kfree(query->sa_query.mad);
- kfree(query);
- }
+ if (ret < 0)
+ goto err2;
+
+ return ret;
+err2:
+ *sa_query = NULL;
+ ib_free_send_mad(query->sa_query.mad_buf);
+
+err1:
+ kfree(query);
return ret;
}
EXPORT_SYMBOL(ib_sa_service_rec_query);
@@ -751,7 +755,6 @@ static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
{
- kfree(sa_query->mad);
kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
}
@@ -768,60 +771,69 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
{
struct ib_sa_mcmember_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port = &sa_dev->port[port_num - sa_dev->start_port];
- struct ib_mad_agent *agent = port->agent;
+ struct ib_sa_port *port;
+ struct ib_mad_agent *agent;
+ struct ib_sa_mad *mad;
int ret;
+ if (!sa_dev)
+ return -ENODEV;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ agent = port->agent;
+
query = kmalloc(sizeof *query, gfp_mask);
if (!query)
return -ENOMEM;
- query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
- if (!query->sa_query.mad) {
- kfree(query);
- return -ENOMEM;
+
+ query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
+ 0, IB_MGMT_SA_HDR,
+ IB_MGMT_SA_DATA, gfp_mask);
+ if (!query->sa_query.mad_buf) {
+ ret = -ENOMEM;
+ goto err1;
}
query->callback = callback;
query->context = context;
- init_mad(query->sa_query.mad, agent);
+ mad = query->sa_query.mad_buf->mad;
+ init_mad(mad, agent);
- query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
- query->sa_query.release = ib_sa_mcmember_rec_release;
- query->sa_query.port = port;
- query->sa_query.mad->mad_hdr.method = method;
- query->sa_query.mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
- query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
+ query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
+ query->sa_query.release = ib_sa_mcmember_rec_release;
+ query->sa_query.port = port;
+ mad->mad_hdr.method = method;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
+ mad->sa_hdr.comp_mask = comp_mask;
ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
- rec, query->sa_query.mad->data);
+ rec, mad->data);
*sa_query = &query->sa_query;
ret = send_mad(&query->sa_query, timeout_ms);
- if (ret < 0) {
- *sa_query = NULL;
- kfree(query->sa_query.mad);
- kfree(query);
- }
+ if (ret < 0)
+ goto err2;
return ret;
+
+err2:
+ *sa_query = NULL;
+ ib_free_send_mad(query->sa_query.mad_buf);
+
+err1:
+ kfree(query);
+ return ret;
}
EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
- struct ib_sa_query *query;
+ struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
unsigned long flags;
- spin_lock_irqsave(&idr_lock, flags);
- query = idr_find(&query_idr, mad_send_wc->wr_id);
- spin_unlock_irqrestore(&idr_lock, flags);
-
- if (!query)
- return;
-
if (query->callback)
switch (mad_send_wc->status) {
case IB_WC_SUCCESS:
@@ -838,30 +850,25 @@ static void send_handler(struct ib_mad_agent *agent,
break;
}
- dma_unmap_single(agent->device->dma_device,
- pci_unmap_addr(query, mapping),
- sizeof (struct ib_sa_mad),
- DMA_TO_DEVICE);
- kref_put(&query->sm_ah->ref, free_sm_ah);
-
- query->release(query);
-
spin_lock_irqsave(&idr_lock, flags);
- idr_remove(&query_idr, mad_send_wc->wr_id);
+ idr_remove(&query_idr, query->id);
spin_unlock_irqrestore(&idr_lock, flags);
+
+ ib_free_send_mad(mad_send_wc->send_buf);
+ kref_put(&query->sm_ah->ref, free_sm_ah);
+ query->release(query);
}
static void recv_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_sa_query *query;
- unsigned long flags;
+ struct ib_mad_send_buf *mad_buf;
- spin_lock_irqsave(&idr_lock, flags);
- query = idr_find(&query_idr, mad_recv_wc->wc->wr_id);
- spin_unlock_irqrestore(&idr_lock, flags);
+ mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
+ query = mad_buf->context[0];
- if (query && query->callback) {
+ if (query->callback) {
if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
query->callback(query,
mad_recv_wc->recv_buf.mad->mad_hdr.status ?
@@ -975,6 +982,7 @@ static int __init ib_sa_init(void)
static void __exit ib_sa_cleanup(void)
{
ib_unregister_client(&sa_client);
+ idr_destroy(&query_idr);
}
module_init(ib_sa_init);
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index db25503..2b3c401 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -39,6 +39,8 @@
#ifndef __SMI_H_
#define __SMI_H_
+#include <rdma/ib_smi.h>
+
int smi_handle_dr_smp_recv(struct ib_smp *smp,
u8 node_type,
int port_num,
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 211ba32..7ce7a6c 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -65,6 +65,11 @@ struct port_table_attribute {
int index;
};
+static inline int ibdev_is_alive(const struct ib_device *dev)
+{
+ return dev->reg_state == IB_DEV_REGISTERED;
+}
+
static ssize_t port_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
@@ -74,6 +79,8 @@ static ssize_t port_attr_show(struct kobject *kobj,
if (!port_attr->show)
return -EIO;
+ if (!ibdev_is_alive(p->ibdev))
+ return -ENODEV;
return port_attr->show(p, port_attr, buf);
}
@@ -581,6 +588,9 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf)
{
struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
+ if (!ibdev_is_alive(dev))
+ return -ENODEV;
+
switch (dev->node_type) {
case IB_NODE_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
@@ -595,6 +605,9 @@ static ssize_t show_sys_image_guid(struct class_device *cdev, char *buf)
struct ib_device_attr attr;
ssize_t ret;
+ if (!ibdev_is_alive(dev))
+ return -ENODEV;
+
ret = ib_query_device(dev, &attr);
if (ret)
return ret;
@@ -612,6 +625,9 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf)
struct ib_device_attr attr;
ssize_t ret;
+ if (!ibdev_is_alive(dev))
+ return -ENODEV;
+
ret = ib_query_device(dev, &attr);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 021b8f1..2847756 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -41,37 +41,81 @@
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/cdev.h>
+#include <linux/idr.h>
#include <asm/uaccess.h>
-#include "ucm.h"
+#include <rdma/ib_cm.h>
+#include <rdma/ib_user_cm.h>
MODULE_AUTHOR("Libor Michalek");
MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
MODULE_LICENSE("Dual BSD/GPL");
-static int ucm_debug_level;
+struct ib_ucm_device {
+ int devnum;
+ struct cdev dev;
+ struct class_device class_dev;
+ struct ib_device *ib_dev;
+};
+
+struct ib_ucm_file {
+ struct semaphore mutex;
+ struct file *filp;
+ struct ib_ucm_device *device;
+
+ struct list_head ctxs;
+ struct list_head events;
+ wait_queue_head_t poll_wait;
+};
+
+struct ib_ucm_context {
+ int id;
+ wait_queue_head_t wait;
+ atomic_t ref;
+ int events_reported;
+
+ struct ib_ucm_file *file;
+ struct ib_cm_id *cm_id;
+ __u64 uid;
+
+ struct list_head events; /* list of pending events. */
+ struct list_head file_list; /* member in file ctx list */
+};
+
+struct ib_ucm_event {
+ struct ib_ucm_context *ctx;
+ struct list_head file_list; /* member in file event list */
+ struct list_head ctx_list; /* member in ctx event list */
-module_param_named(debug_level, ucm_debug_level, int, 0644);
-MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
+ struct ib_cm_id *cm_id;
+ struct ib_ucm_event_resp resp;
+ void *data;
+ void *info;
+ int data_len;
+ int info_len;
+};
enum {
IB_UCM_MAJOR = 231,
- IB_UCM_MINOR = 255
+ IB_UCM_BASE_MINOR = 224,
+ IB_UCM_MAX_DEVICES = 32
};
-#define IB_UCM_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_MINOR)
+#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
-#define PFX "UCM: "
+static void ib_ucm_add_one(struct ib_device *device);
+static void ib_ucm_remove_one(struct ib_device *device);
-#define ucm_dbg(format, arg...) \
- do { \
- if (ucm_debug_level > 0) \
- printk(KERN_DEBUG PFX format, ## arg); \
- } while (0)
+static struct ib_client ucm_client = {
+ .name = "ucm",
+ .add = ib_ucm_add_one,
+ .remove = ib_ucm_remove_one
+};
-static struct semaphore ctx_id_mutex;
-static struct idr ctx_id_table;
+static DECLARE_MUTEX(ctx_id_mutex);
+static DEFINE_IDR(ctx_id_table);
+static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES);
static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id)
{
@@ -152,17 +196,13 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
goto error;
list_add_tail(&ctx->file_list, &file->ctxs);
- ucm_dbg("Allocated CM ID <%d>\n", ctx->id);
return ctx;
error:
kfree(ctx);
return NULL;
}
-/*
- * Event portion of the API, handle CM events
- * and allow event polling.
- */
+
static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath,
struct ib_sa_path_rec *kpath)
{
@@ -209,6 +249,7 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
ureq->retry_count = kreq->retry_count;
ureq->rnr_retry_count = kreq->rnr_retry_count;
ureq->srq = kreq->srq;
+ ureq->port = kreq->port;
ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path);
ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path);
@@ -295,6 +336,8 @@ static int ib_ucm_event_process(struct ib_cm_event *evt,
case IB_CM_SIDR_REQ_RECEIVED:
uvt->resp.u.sidr_req_resp.pkey =
evt->param.sidr_req_rcvd.pkey;
+ uvt->resp.u.sidr_req_resp.port =
+ evt->param.sidr_req_rcvd.port;
uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
break;
case IB_CM_SIDR_REP_RECEIVED:
@@ -387,9 +430,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- /*
- * wait
- */
+
down(&file->mutex);
while (list_empty(&file->events)) {
@@ -471,7 +512,6 @@ done:
return result;
}
-
static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
const char __user *inbuf,
int in_len, int out_len)
@@ -494,29 +534,27 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
return -ENOMEM;
ctx->uid = cmd.uid;
- ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, ctx);
+ ctx->cm_id = ib_create_cm_id(file->device->ib_dev,
+ ib_ucm_event_handler, ctx);
if (IS_ERR(ctx->cm_id)) {
result = PTR_ERR(ctx->cm_id);
- goto err;
+ goto err1;
}
resp.id = ctx->id;
if (copy_to_user((void __user *)(unsigned long)cmd.response,
&resp, sizeof(resp))) {
result = -EFAULT;
- goto err;
+ goto err2;
}
-
return 0;
-err:
+err2:
+ ib_destroy_cm_id(ctx->cm_id);
+err1:
down(&ctx_id_mutex);
idr_remove(&ctx_id_table, ctx->id);
up(&ctx_id_mutex);
-
- if (!IS_ERR(ctx->cm_id))
- ib_destroy_cm_id(ctx->cm_id);
-
kfree(ctx);
return result;
}
@@ -1184,9 +1222,6 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof(hdr)))
return -EFAULT;
- ucm_dbg("Write. cmd <%d> in <%d> out <%d> len <%Zu>\n",
- hdr.cmd, hdr.in, hdr.out, len);
-
if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
return -EINVAL;
@@ -1231,8 +1266,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp)
filp->private_data = file;
file->filp = filp;
-
- ucm_dbg("Created struct\n");
+ file->device = container_of(inode->i_cdev, struct ib_ucm_device, dev);
return 0;
}
@@ -1263,7 +1297,17 @@ static int ib_ucm_close(struct inode *inode, struct file *filp)
return 0;
}
-static struct file_operations ib_ucm_fops = {
+static void ib_ucm_release_class_dev(struct class_device *class_dev)
+{
+ struct ib_ucm_device *dev;
+
+ dev = container_of(class_dev, struct ib_ucm_device, class_dev);
+ cdev_del(&dev->dev);
+ clear_bit(dev->devnum, dev_map);
+ kfree(dev);
+}
+
+static struct file_operations ucm_fops = {
.owner = THIS_MODULE,
.open = ib_ucm_open,
.release = ib_ucm_close,
@@ -1271,55 +1315,142 @@ static struct file_operations ib_ucm_fops = {
.poll = ib_ucm_poll,
};
+static struct class ucm_class = {
+ .name = "infiniband_cm",
+ .release = ib_ucm_release_class_dev
+};
-static struct class *ib_ucm_class;
-static struct cdev ib_ucm_cdev;
+static ssize_t show_dev(struct class_device *class_dev, char *buf)
+{
+ struct ib_ucm_device *dev;
+
+ dev = container_of(class_dev, struct ib_ucm_device, class_dev);
+ return print_dev_t(buf, dev->dev.dev);
+}
+static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);
-static int __init ib_ucm_init(void)
+static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
{
- int result;
+ struct ib_ucm_device *dev;
+
+ dev = container_of(class_dev, struct ib_ucm_device, class_dev);
+ return sprintf(buf, "%s\n", dev->ib_dev->name);
+}
+static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
- result = register_chrdev_region(IB_UCM_DEV, 1, "infiniband_cm");
- if (result) {
- ucm_dbg("Error <%d> registering dev\n", result);
- goto err_chr;
- }
+static void ib_ucm_add_one(struct ib_device *device)
+{
+ struct ib_ucm_device *ucm_dev;
+
+ if (!device->alloc_ucontext)
+ return;
+
+ ucm_dev = kmalloc(sizeof *ucm_dev, GFP_KERNEL);
+ if (!ucm_dev)
+ return;
- cdev_init(&ib_ucm_cdev, &ib_ucm_fops);
+ memset(ucm_dev, 0, sizeof *ucm_dev);
+ ucm_dev->ib_dev = device;
+
+ ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
+ if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES)
+ goto err;
+
+ set_bit(ucm_dev->devnum, dev_map);
+
+ cdev_init(&ucm_dev->dev, &ucm_fops);
+ ucm_dev->dev.owner = THIS_MODULE;
+ kobject_set_name(&ucm_dev->dev.kobj, "ucm%d", ucm_dev->devnum);
+ if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1))
+ goto err;
- result = cdev_add(&ib_ucm_cdev, IB_UCM_DEV, 1);
- if (result) {
- ucm_dbg("Error <%d> adding cdev\n", result);
+ ucm_dev->class_dev.class = &ucm_class;
+ ucm_dev->class_dev.dev = device->dma_device;
+ snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d",
+ ucm_dev->devnum);
+ if (class_device_register(&ucm_dev->class_dev))
goto err_cdev;
- }
- ib_ucm_class = class_create(THIS_MODULE, "infiniband_cm");
- if (IS_ERR(ib_ucm_class)) {
- result = PTR_ERR(ib_ucm_class);
- ucm_dbg("Error <%d> creating class\n", result);
+ if (class_device_create_file(&ucm_dev->class_dev,
+ &class_device_attr_dev))
+ goto err_class;
+ if (class_device_create_file(&ucm_dev->class_dev,
+ &class_device_attr_ibdev))
goto err_class;
+
+ ib_set_client_data(device, &ucm_client, ucm_dev);
+ return;
+
+err_class:
+ class_device_unregister(&ucm_dev->class_dev);
+err_cdev:
+ cdev_del(&ucm_dev->dev);
+ clear_bit(ucm_dev->devnum, dev_map);
+err:
+ kfree(ucm_dev);
+ return;
+}
+
+static void ib_ucm_remove_one(struct ib_device *device)
+{
+ struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client);
+
+ if (!ucm_dev)
+ return;
+
+ class_device_unregister(&ucm_dev->class_dev);
+}
+
+static ssize_t show_abi_version(struct class *class, char *buf)
+{
+ return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION);
+}
+static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+
+static int __init ib_ucm_init(void)
+{
+ int ret;
+
+ ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES,
+ "infiniband_cm");
+ if (ret) {
+ printk(KERN_ERR "ucm: couldn't register device number\n");
+ goto err;
}
- class_device_create(ib_ucm_class, NULL, IB_UCM_DEV, NULL, "ucm");
+ ret = class_register(&ucm_class);
+ if (ret) {
+ printk(KERN_ERR "ucm: couldn't create class infiniband_cm\n");
+ goto err_chrdev;
+ }
- idr_init(&ctx_id_table);
- init_MUTEX(&ctx_id_mutex);
+ ret = class_create_file(&ucm_class, &class_attr_abi_version);
+ if (ret) {
+ printk(KERN_ERR "ucm: couldn't create abi_version attribute\n");
+ goto err_class;
+ }
+ ret = ib_register_client(&ucm_client);
+ if (ret) {
+ printk(KERN_ERR "ucm: couldn't register client\n");
+ goto err_class;
+ }
return 0;
+
err_class:
- cdev_del(&ib_ucm_cdev);
-err_cdev:
- unregister_chrdev_region(IB_UCM_DEV, 1);
-err_chr:
- return result;
+ class_unregister(&ucm_class);
+err_chrdev:
+ unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
+err:
+ return ret;
}
static void __exit ib_ucm_cleanup(void)
{
- class_device_destroy(ib_ucm_class, IB_UCM_DEV);
- class_destroy(ib_ucm_class);
- cdev_del(&ib_ucm_cdev);
- unregister_chrdev_region(IB_UCM_DEV, 1);
+ ib_unregister_client(&ucm_client);
+ class_unregister(&ucm_class);
+ unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
+ idr_destroy(&ctx_id_table);
}
module_init(ib_ucm_init);
diff --git a/drivers/infiniband/core/ucm.h b/drivers/infiniband/core/ucm.h
deleted file mode 100644
index f46f37b..0000000
--- a/drivers/infiniband/core/ucm.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id: ucm.h 2208 2005-04-22 23:24:31Z libor $
- */
-
-#ifndef UCM_H
-#define UCM_H
-
-#include <linux/fs.h>
-#include <linux/device.h>
-#include <linux/cdev.h>
-#include <linux/idr.h>
-
-#include <rdma/ib_cm.h>
-#include <rdma/ib_user_cm.h>
-
-struct ib_ucm_file {
- struct semaphore mutex;
- struct file *filp;
-
- struct list_head ctxs; /* list of active connections */
- struct list_head events; /* list of pending events */
- wait_queue_head_t poll_wait;
-};
-
-struct ib_ucm_context {
- int id;
- wait_queue_head_t wait;
- atomic_t ref;
- int events_reported;
-
- struct ib_ucm_file *file;
- struct ib_cm_id *cm_id;
- __u64 uid;
-
- struct list_head events; /* list of pending events. */
- struct list_head file_list; /* member in file ctx list */
-};
-
-struct ib_ucm_event {
- struct ib_ucm_context *ctx;
- struct list_head file_list; /* member in file event list */
- struct list_head ctx_list; /* member in ctx event list */
-
- struct ib_cm_id *cm_id;
- struct ib_ucm_event_resp resp;
- void *data;
- void *info;
- int data_len;
- int info_len;
-};
-
-#endif /* UCM_H */
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index a64d6b4..97128e2 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -64,18 +64,39 @@ enum {
IB_UMAD_MINOR_BASE = 0
};
+/*
+ * Our lifetime rules for these structs are the following: each time a
+ * device special file is opened, we look up the corresponding struct
+ * ib_umad_port by minor in the umad_port[] table while holding the
+ * port_lock. If this lookup succeeds, we take a reference on the
+ * ib_umad_port's struct ib_umad_device while still holding the
+ * port_lock; if the lookup fails, we fail the open(). We drop these
+ * references in the corresponding close().
+ *
+ * In addition to references coming from open character devices, there
+ * is one more reference to each ib_umad_device representing the
+ * module's reference taken when allocating the ib_umad_device in
+ * ib_umad_add_one().
+ *
+ * When destroying an ib_umad_device, we clear all of its
+ * ib_umad_ports from umad_port[] while holding port_lock before
+ * dropping the module's reference to the ib_umad_device. This is
+ * always safe because any open() calls will either succeed and obtain
+ * a reference before we clear the umad_port[] entries, or fail after
+ * we clear the umad_port[] entries.
+ */
+
struct ib_umad_port {
- int devnum;
- struct cdev dev;
- struct class_device class_dev;
+ struct cdev *dev;
+ struct class_device *class_dev;
- int sm_devnum;
- struct cdev sm_dev;
- struct class_device sm_class_dev;
+ struct cdev *sm_dev;
+ struct class_device *sm_class_dev;
struct semaphore sm_sem;
struct ib_device *ib_dev;
struct ib_umad_device *umad_dev;
+ int dev_num;
u8 port_num;
};
@@ -96,21 +117,31 @@ struct ib_umad_file {
};
struct ib_umad_packet {
- struct ib_ah *ah;
struct ib_mad_send_buf *msg;
struct list_head list;
int length;
- DECLARE_PCI_UNMAP_ADDR(mapping)
struct ib_user_mad mad;
};
+static struct class *umad_class;
+
static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
-static spinlock_t map_lock;
+
+static DEFINE_SPINLOCK(port_lock);
+static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS];
static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2);
static void ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device);
+static void ib_umad_release_dev(struct kref *ref)
+{
+ struct ib_umad_device *dev =
+ container_of(ref, struct ib_umad_device, ref);
+
+ kfree(dev);
+}
+
static int queue_packet(struct ib_umad_file *file,
struct ib_mad_agent *agent,
struct ib_umad_packet *packet)
@@ -139,22 +170,19 @@ static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *send_wc)
{
struct ib_umad_file *file = agent->context;
- struct ib_umad_packet *timeout, *packet =
- (void *) (unsigned long) send_wc->wr_id;
+ struct ib_umad_packet *timeout;
+ struct ib_umad_packet *packet = send_wc->send_buf->context[0];
- ib_destroy_ah(packet->msg->send_wr.wr.ud.ah);
+ ib_destroy_ah(packet->msg->ah);
ib_free_send_mad(packet->msg);
if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
- timeout = kmalloc(sizeof *timeout + sizeof (struct ib_mad_hdr),
- GFP_KERNEL);
+ timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, GFP_KERNEL);
if (!timeout)
goto out;
- memset(timeout, 0, sizeof *timeout + sizeof (struct ib_mad_hdr));
-
- timeout->length = sizeof (struct ib_mad_hdr);
- timeout->mad.hdr.id = packet->mad.hdr.id;
+ timeout->length = IB_MGMT_MAD_HDR;
+ timeout->mad.hdr.id = packet->mad.hdr.id;
timeout->mad.hdr.status = ETIMEDOUT;
memcpy(timeout->mad.data, packet->mad.data,
sizeof (struct ib_mad_hdr));
@@ -177,11 +205,10 @@ static void recv_handler(struct ib_mad_agent *agent,
goto out;
length = mad_recv_wc->mad_len;
- packet = kmalloc(sizeof *packet + length, GFP_KERNEL);
+ packet = kzalloc(sizeof *packet + length, GFP_KERNEL);
if (!packet)
goto out;
- memset(packet, 0, sizeof *packet + length);
packet->length = length;
ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data);
@@ -247,7 +274,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
else
ret = -ENOSPC;
} else if (copy_to_user(buf, &packet->mad,
- packet->length + sizeof (struct ib_user_mad)))
+ packet->length + sizeof (struct ib_user_mad)))
ret = -EFAULT;
else
ret = packet->length + sizeof (struct ib_user_mad);
@@ -268,26 +295,23 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
struct ib_umad_packet *packet;
struct ib_mad_agent *agent;
struct ib_ah_attr ah_attr;
- struct ib_send_wr *bad_wr;
+ struct ib_ah *ah;
struct ib_rmpp_mad *rmpp_mad;
u8 method;
__be64 *tid;
- int ret, length, hdr_len, data_len, rmpp_hdr_size;
+ int ret, length, hdr_len, copy_offset;
int rmpp_active = 0;
if (count < sizeof (struct ib_user_mad))
return -EINVAL;
length = count - sizeof (struct ib_user_mad);
- packet = kmalloc(sizeof *packet + sizeof(struct ib_mad_hdr) +
- sizeof(struct ib_rmpp_hdr), GFP_KERNEL);
+ packet = kmalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
if (!packet)
return -ENOMEM;
if (copy_from_user(&packet->mad, buf,
- sizeof (struct ib_user_mad) +
- sizeof(struct ib_mad_hdr) +
- sizeof(struct ib_rmpp_hdr))) {
+ sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) {
ret = -EFAULT;
goto err;
}
@@ -298,8 +322,6 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
goto err;
}
- packet->length = length;
-
down_read(&file->agent_mutex);
agent = file->agent[packet->mad.hdr.id];
@@ -321,9 +343,9 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
}
- packet->ah = ib_create_ah(agent->qp->pd, &ah_attr);
- if (IS_ERR(packet->ah)) {
- ret = PTR_ERR(packet->ah);
+ ah = ib_create_ah(agent->qp->pd, &ah_attr);
+ if (IS_ERR(ah)) {
+ ret = PTR_ERR(ah);
goto err_up;
}
@@ -337,64 +359,44 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
/* Validate that the management class can support RMPP */
if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) {
- hdr_len = offsetof(struct ib_sa_mad, data);
- data_len = length - hdr_len;
+ hdr_len = IB_MGMT_SA_HDR;
} else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
(rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) {
- hdr_len = offsetof(struct ib_vendor_mad, data);
- data_len = length - hdr_len;
+ hdr_len = IB_MGMT_VENDOR_HDR;
} else {
ret = -EINVAL;
goto err_ah;
}
rmpp_active = 1;
+ copy_offset = IB_MGMT_RMPP_HDR;
} else {
- if (length > sizeof(struct ib_mad)) {
- ret = -EINVAL;
- goto err_ah;
- }
- hdr_len = offsetof(struct ib_mad, data);
- data_len = length - hdr_len;
+ hdr_len = IB_MGMT_MAD_HDR;
+ copy_offset = IB_MGMT_MAD_HDR;
}
packet->msg = ib_create_send_mad(agent,
be32_to_cpu(packet->mad.hdr.qpn),
- 0, packet->ah, rmpp_active,
- hdr_len, data_len,
+ 0, rmpp_active,
+ hdr_len, length - hdr_len,
GFP_KERNEL);
if (IS_ERR(packet->msg)) {
ret = PTR_ERR(packet->msg);
goto err_ah;
}
- packet->msg->send_wr.wr.ud.timeout_ms = packet->mad.hdr.timeout_ms;
- packet->msg->send_wr.wr.ud.retries = packet->mad.hdr.retries;
-
- /* Override send WR WRID initialized in ib_create_send_mad */
- packet->msg->send_wr.wr_id = (unsigned long) packet;
-
- if (!rmpp_active) {
- /* Copy message from user into send buffer */
- if (copy_from_user(packet->msg->mad,
- buf + sizeof(struct ib_user_mad), length)) {
- ret = -EFAULT;
- goto err_msg;
- }
- } else {
- rmpp_hdr_size = sizeof(struct ib_mad_hdr) +
- sizeof(struct ib_rmpp_hdr);
+ packet->msg->ah = ah;
+ packet->msg->timeout_ms = packet->mad.hdr.timeout_ms;
+ packet->msg->retries = packet->mad.hdr.retries;
+ packet->msg->context[0] = packet;
- /* Only copy MAD headers (RMPP header in place) */
- memcpy(packet->msg->mad, packet->mad.data,
- sizeof(struct ib_mad_hdr));
-
- /* Now, copy rest of message from user into send buffer */
- if (copy_from_user(((struct ib_rmpp_mad *) packet->msg->mad)->data,
- buf + sizeof (struct ib_user_mad) + rmpp_hdr_size,
- length - rmpp_hdr_size)) {
- ret = -EFAULT;
- goto err_msg;
- }
+ /* Copy MAD headers (RMPP header in place) */
+ memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
+ /* Now, copy rest of message from user into send buffer */
+ if (copy_from_user(packet->msg->mad + copy_offset,
+ buf + sizeof (struct ib_user_mad) + copy_offset,
+ length - copy_offset)) {
+ ret = -EFAULT;
+ goto err_msg;
}
/*
@@ -403,29 +405,29 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
* transaction ID matches the agent being used to send the
* MAD.
*/
- method = packet->msg->mad->mad_hdr.method;
+ method = ((struct ib_mad_hdr *) packet->msg->mad)->method;
if (!(method & IB_MGMT_METHOD_RESP) &&
method != IB_MGMT_METHOD_TRAP_REPRESS &&
method != IB_MGMT_METHOD_SEND) {
- tid = &packet->msg->mad->mad_hdr.tid;
+ tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid;
*tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
(be64_to_cpup(tid) & 0xffffffff));
}
- ret = ib_post_send_mad(agent, &packet->msg->send_wr, &bad_wr);
+ ret = ib_post_send_mad(packet->msg, NULL);
if (ret)
goto err_msg;
up_read(&file->agent_mutex);
- return sizeof (struct ib_user_mad_hdr) + packet->length;
+ return count;
err_msg:
ib_free_send_mad(packet->msg);
err_ah:
- ib_destroy_ah(packet->ah);
+ ib_destroy_ah(ah);
err_up:
up_read(&file->agent_mutex);
@@ -565,15 +567,23 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
static int ib_umad_open(struct inode *inode, struct file *filp)
{
- struct ib_umad_port *port =
- container_of(inode->i_cdev, struct ib_umad_port, dev);
+ struct ib_umad_port *port;
struct ib_umad_file *file;
- file = kmalloc(sizeof *file, GFP_KERNEL);
- if (!file)
- return -ENOMEM;
+ spin_lock(&port_lock);
+ port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE];
+ if (port)
+ kref_get(&port->umad_dev->ref);
+ spin_unlock(&port_lock);
- memset(file, 0, sizeof *file);
+ if (!port)
+ return -ENXIO;
+
+ file = kzalloc(sizeof *file, GFP_KERNEL);
+ if (!file) {
+ kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+ return -ENOMEM;
+ }
spin_lock_init(&file->recv_lock);
init_rwsem(&file->agent_mutex);
@@ -589,6 +599,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
static int ib_umad_close(struct inode *inode, struct file *filp)
{
struct ib_umad_file *file = filp->private_data;
+ struct ib_umad_device *dev = file->port->umad_dev;
struct ib_umad_packet *packet, *tmp;
int i;
@@ -603,6 +614,8 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
kfree(file);
+ kref_put(&dev->ref, ib_umad_release_dev);
+
return 0;
}
@@ -619,30 +632,46 @@ static struct file_operations umad_fops = {
static int ib_umad_sm_open(struct inode *inode, struct file *filp)
{
- struct ib_umad_port *port =
- container_of(inode->i_cdev, struct ib_umad_port, sm_dev);
+ struct ib_umad_port *port;
struct ib_port_modify props = {
.set_port_cap_mask = IB_PORT_SM
};
int ret;
+ spin_lock(&port_lock);
+ port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS];
+ if (port)
+ kref_get(&port->umad_dev->ref);
+ spin_unlock(&port_lock);
+
+ if (!port)
+ return -ENXIO;
+
if (filp->f_flags & O_NONBLOCK) {
- if (down_trylock(&port->sm_sem))
- return -EAGAIN;
+ if (down_trylock(&port->sm_sem)) {
+ ret = -EAGAIN;
+ goto fail;
+ }
} else {
- if (down_interruptible(&port->sm_sem))
- return -ERESTARTSYS;
+ if (down_interruptible(&port->sm_sem)) {
+ ret = -ERESTARTSYS;
+ goto fail;
+ }
}
ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
if (ret) {
up(&port->sm_sem);
- return ret;
+ goto fail;
}
filp->private_data = port;
return 0;
+
+fail:
+ kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+ return ret;
}
static int ib_umad_sm_close(struct inode *inode, struct file *filp)
@@ -656,6 +685,8 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
up(&port->sm_sem);
+ kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+
return ret;
}
@@ -671,21 +702,13 @@ static struct ib_client umad_client = {
.remove = ib_umad_remove_one
};
-static ssize_t show_dev(struct class_device *class_dev, char *buf)
-{
- struct ib_umad_port *port = class_get_devdata(class_dev);
-
- if (class_dev == &port->class_dev)
- return print_dev_t(buf, port->dev.dev);
- else
- return print_dev_t(buf, port->sm_dev.dev);
-}
-static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);
-
static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
{
struct ib_umad_port *port = class_get_devdata(class_dev);
+ if (!port)
+ return -ENODEV;
+
return sprintf(buf, "%s\n", port->ib_dev->name);
}
static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
@@ -694,38 +717,13 @@ static ssize_t show_port(struct class_device *class_dev, char *buf)
{
struct ib_umad_port *port = class_get_devdata(class_dev);
+ if (!port)
+ return -ENODEV;
+
return sprintf(buf, "%d\n", port->port_num);
}
static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
-static void ib_umad_release_dev(struct kref *ref)
-{
- struct ib_umad_device *dev =
- container_of(ref, struct ib_umad_device, ref);
-
- kfree(dev);
-}
-
-static void ib_umad_release_port(struct class_device *class_dev)
-{
- struct ib_umad_port *port = class_get_devdata(class_dev);
-
- if (class_dev == &port->class_dev) {
- cdev_del(&port->dev);
- clear_bit(port->devnum, dev_map);
- } else {
- cdev_del(&port->sm_dev);
- clear_bit(port->sm_devnum, dev_map);
- }
-
- kref_put(&port->umad_dev->ref, ib_umad_release_dev);
-}
-
-static struct class umad_class = {
- .name = "infiniband_mad",
- .release = ib_umad_release_port
-};
-
static ssize_t show_abi_version(struct class *class, char *buf)
{
return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
@@ -735,91 +733,102 @@ static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
static int ib_umad_init_port(struct ib_device *device, int port_num,
struct ib_umad_port *port)
{
- spin_lock(&map_lock);
- port->devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
- if (port->devnum >= IB_UMAD_MAX_PORTS) {
- spin_unlock(&map_lock);
+ spin_lock(&port_lock);
+ port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
+ if (port->dev_num >= IB_UMAD_MAX_PORTS) {
+ spin_unlock(&port_lock);
return -1;
}
- port->sm_devnum = find_next_zero_bit(dev_map, IB_UMAD_MAX_PORTS * 2, IB_UMAD_MAX_PORTS);
- if (port->sm_devnum >= IB_UMAD_MAX_PORTS * 2) {
- spin_unlock(&map_lock);
- return -1;
- }
- set_bit(port->devnum, dev_map);
- set_bit(port->sm_devnum, dev_map);
- spin_unlock(&map_lock);
+ set_bit(port->dev_num, dev_map);
+ spin_unlock(&port_lock);
port->ib_dev = device;
port->port_num = port_num;
init_MUTEX(&port->sm_sem);
- cdev_init(&port->dev, &umad_fops);
- port->dev.owner = THIS_MODULE;
- kobject_set_name(&port->dev.kobj, "umad%d", port->devnum);
- if (cdev_add(&port->dev, base_dev + port->devnum, 1))
+ port->dev = cdev_alloc();
+ if (!port->dev)
return -1;
-
- port->class_dev.class = &umad_class;
- port->class_dev.dev = device->dma_device;
-
- snprintf(port->class_dev.class_id, BUS_ID_SIZE, "umad%d", port->devnum);
-
- if (class_device_register(&port->class_dev))
+ port->dev->owner = THIS_MODULE;
+ port->dev->ops = &umad_fops;
+ kobject_set_name(&port->dev->kobj, "umad%d", port->dev_num);
+ if (cdev_add(port->dev, base_dev + port->dev_num, 1))
goto err_cdev;
- class_set_devdata(&port->class_dev, port);
- kref_get(&port->umad_dev->ref);
+ port->class_dev = class_device_create(umad_class, NULL, port->dev->dev,
+ device->dma_device,
+ "umad%d", port->dev_num);
+ if (IS_ERR(port->class_dev))
+ goto err_cdev;
- if (class_device_create_file(&port->class_dev, &class_device_attr_dev))
- goto err_class;
- if (class_device_create_file(&port->class_dev, &class_device_attr_ibdev))
+ if (class_device_create_file(port->class_dev, &class_device_attr_ibdev))
goto err_class;
- if (class_device_create_file(&port->class_dev, &class_device_attr_port))
+ if (class_device_create_file(port->class_dev, &class_device_attr_port))
goto err_class;
- cdev_init(&port->sm_dev, &umad_sm_fops);
- port->sm_dev.owner = THIS_MODULE;
- kobject_set_name(&port->dev.kobj, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS);
- if (cdev_add(&port->sm_dev, base_dev + port->sm_devnum, 1))
- return -1;
-
- port->sm_class_dev.class = &umad_class;
- port->sm_class_dev.dev = device->dma_device;
-
- snprintf(port->sm_class_dev.class_id, BUS_ID_SIZE, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS);
+ port->sm_dev = cdev_alloc();
+ if (!port->sm_dev)
+ goto err_class;
+ port->sm_dev->owner = THIS_MODULE;
+ port->sm_dev->ops = &umad_sm_fops;
+ kobject_set_name(&port->dev->kobj, "issm%d", port->dev_num);
+ if (cdev_add(port->sm_dev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1))
+ goto err_sm_cdev;
- if (class_device_register(&port->sm_class_dev))
+ port->sm_class_dev = class_device_create(umad_class, NULL, port->sm_dev->dev,
+ device->dma_device,
+ "issm%d", port->dev_num);
+ if (IS_ERR(port->sm_class_dev))
goto err_sm_cdev;
- class_set_devdata(&port->sm_class_dev, port);
- kref_get(&port->umad_dev->ref);
+ class_set_devdata(port->class_dev, port);
+ class_set_devdata(port->sm_class_dev, port);
- if (class_device_create_file(&port->sm_class_dev, &class_device_attr_dev))
- goto err_sm_class;
- if (class_device_create_file(&port->sm_class_dev, &class_device_attr_ibdev))
+ if (class_device_create_file(port->sm_class_dev, &class_device_attr_ibdev))
goto err_sm_class;
- if (class_device_create_file(&port->sm_class_dev, &class_device_attr_port))
+ if (class_device_create_file(port->sm_class_dev, &class_device_attr_port))
goto err_sm_class;
+ spin_lock(&port_lock);
+ umad_port[port->dev_num] = port;
+ spin_unlock(&port_lock);
+
return 0;
err_sm_class:
- class_device_unregister(&port->sm_class_dev);
+ class_device_destroy(umad_class, port->sm_dev->dev);
err_sm_cdev:
- cdev_del(&port->sm_dev);
+ cdev_del(port->sm_dev);
err_class:
- class_device_unregister(&port->class_dev);
+ class_device_destroy(umad_class, port->dev->dev);
err_cdev:
- cdev_del(&port->dev);
- clear_bit(port->devnum, dev_map);
+ cdev_del(port->dev);
+ clear_bit(port->dev_num, dev_map);
return -1;
}
+static void ib_umad_kill_port(struct ib_umad_port *port)
+{
+ class_set_devdata(port->class_dev, NULL);
+ class_set_devdata(port->sm_class_dev, NULL);
+
+ class_device_destroy(umad_class, port->dev->dev);
+ class_device_destroy(umad_class, port->sm_dev->dev);
+
+ cdev_del(port->dev);
+ cdev_del(port->sm_dev);
+
+ spin_lock(&port_lock);
+ umad_port[port->dev_num] = NULL;
+ spin_unlock(&port_lock);
+
+ clear_bit(port->dev_num, dev_map);
+}
+
static void ib_umad_add_one(struct ib_device *device)
{
struct ib_umad_device *umad_dev;
@@ -832,15 +841,12 @@ static void ib_umad_add_one(struct ib_device *device)
e = device->phys_port_cnt;
}
- umad_dev = kmalloc(sizeof *umad_dev +
+ umad_dev = kzalloc(sizeof *umad_dev +
(e - s + 1) * sizeof (struct ib_umad_port),
GFP_KERNEL);
if (!umad_dev)
return;
- memset(umad_dev, 0, sizeof *umad_dev +
- (e - s + 1) * sizeof (struct ib_umad_port));
-
kref_init(&umad_dev->ref);
umad_dev->start_port = s;
@@ -858,10 +864,8 @@ static void ib_umad_add_one(struct ib_device *device)
return;
err:
- while (--i >= s) {
- class_device_unregister(&umad_dev->port[i - s].class_dev);
- class_device_unregister(&umad_dev->port[i - s].sm_class_dev);
- }
+ while (--i >= s)
+ ib_umad_kill_port(&umad_dev->port[i]);
kref_put(&umad_dev->ref, ib_umad_release_dev);
}
@@ -874,10 +878,8 @@ static void ib_umad_remove_one(struct ib_device *device)
if (!umad_dev)
return;
- for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) {
- class_device_unregister(&umad_dev->port[i].class_dev);
- class_device_unregister(&umad_dev->port[i].sm_class_dev);
- }
+ for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i)
+ ib_umad_kill_port(&umad_dev->port[i]);
kref_put(&umad_dev->ref, ib_umad_release_dev);
}
@@ -886,8 +888,6 @@ static int __init ib_umad_init(void)
{
int ret;
- spin_lock_init(&map_lock);
-
ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
"infiniband_mad");
if (ret) {
@@ -895,13 +895,14 @@ static int __init ib_umad_init(void)
goto out;
}
- ret = class_register(&umad_class);
- if (ret) {
+ umad_class = class_create(THIS_MODULE, "infiniband_mad");
+ if (IS_ERR(umad_class)) {
+ ret = PTR_ERR(umad_class);
printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n");
goto out_chrdev;
}
- ret = class_create_file(&umad_class, &class_attr_abi_version);
+ ret = class_create_file(umad_class, &class_attr_abi_version);
if (ret) {
printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n");
goto out_class;
@@ -916,7 +917,7 @@ static int __init ib_umad_init(void)
return 0;
out_class:
- class_unregister(&umad_class);
+ class_destroy(umad_class);
out_chrdev:
unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
@@ -928,7 +929,7 @@ out:
static void __exit ib_umad_cleanup(void)
{
ib_unregister_client(&umad_client);
- class_unregister(&umad_class);
+ class_destroy(umad_class);
unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
}
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index cc12434..031cdf3 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -3,6 +3,7 @@
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -38,29 +39,47 @@
#ifndef UVERBS_H
#define UVERBS_H
-/* Include device.h and fs.h until cdev.h is self-sufficient */
-#include <linux/fs.h>
-#include <linux/device.h>
-#include <linux/cdev.h>
#include <linux/kref.h>
#include <linux/idr.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
+/*
+ * Our lifetime rules for these structs are the following:
+ *
+ * struct ib_uverbs_device: One reference is held by the module and
+ * released in ib_uverbs_remove_one(). Another reference is taken by
+ * ib_uverbs_open() each time the character special file is opened,
+ * and released in ib_uverbs_release_file() when the file is released.
+ *
+ * struct ib_uverbs_file: One reference is held by the VFS and
+ * released when the file is closed. Another reference is taken when
+ * an asynchronous event queue file is created and released when the
+ * event file is closed.
+ *
+ * struct ib_uverbs_event_file: One reference is held by the VFS and
+ * released when the file is closed. For asynchronous event files,
+ * another reference is held by the corresponding main context file
+ * and released when that file is closed. For completion event files,
+ * a reference is taken when a CQ is created that uses the file, and
+ * released when the CQ is destroyed.
+ */
+
struct ib_uverbs_device {
+ struct kref ref;
int devnum;
- struct cdev dev;
- struct class_device class_dev;
+ struct cdev *dev;
+ struct class_device *class_dev;
struct ib_device *ib_dev;
- int num_comp;
+ int num_comp_vectors;
};
struct ib_uverbs_event_file {
struct kref ref;
+ struct file *file;
struct ib_uverbs_file *uverbs_file;
spinlock_t lock;
- int fd;
int is_async;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
@@ -73,8 +92,7 @@ struct ib_uverbs_file {
struct ib_uverbs_device *device;
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
- struct ib_uverbs_event_file async_file;
- struct ib_uverbs_event_file comp_file[1];
+ struct ib_uverbs_event_file *async_file;
};
struct ib_uverbs_event {
@@ -110,10 +128,23 @@ extern struct idr ib_uverbs_cq_idr;
extern struct idr ib_uverbs_qp_idr;
extern struct idr ib_uverbs_srq_idr;
+struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
+ int is_async, int *fd);
+void ib_uverbs_release_event_file(struct kref *ref);
+struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
+
+void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
+ struct ib_uverbs_event_file *ev_file,
+ struct ib_ucq_object *uobj);
+void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
+ struct ib_uevent_object *uobj);
+
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
+void ib_uverbs_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event);
int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
void *addr, size_t size, int write);
@@ -125,21 +156,26 @@ void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
const char __user *buf, int in_len, \
int out_len)
-IB_UVERBS_DECLARE_CMD(query_params);
IB_UVERBS_DECLARE_CMD(get_context);
IB_UVERBS_DECLARE_CMD(query_device);
IB_UVERBS_DECLARE_CMD(query_port);
-IB_UVERBS_DECLARE_CMD(query_gid);
-IB_UVERBS_DECLARE_CMD(query_pkey);
IB_UVERBS_DECLARE_CMD(alloc_pd);
IB_UVERBS_DECLARE_CMD(dealloc_pd);
IB_UVERBS_DECLARE_CMD(reg_mr);
IB_UVERBS_DECLARE_CMD(dereg_mr);
+IB_UVERBS_DECLARE_CMD(create_comp_channel);
IB_UVERBS_DECLARE_CMD(create_cq);
+IB_UVERBS_DECLARE_CMD(poll_cq);
+IB_UVERBS_DECLARE_CMD(req_notify_cq);
IB_UVERBS_DECLARE_CMD(destroy_cq);
IB_UVERBS_DECLARE_CMD(create_qp);
IB_UVERBS_DECLARE_CMD(modify_qp);
IB_UVERBS_DECLARE_CMD(destroy_qp);
+IB_UVERBS_DECLARE_CMD(post_send);
+IB_UVERBS_DECLARE_CMD(post_recv);
+IB_UVERBS_DECLARE_CMD(post_srq_recv);
+IB_UVERBS_DECLARE_CMD(create_ah);
+IB_UVERBS_DECLARE_CMD(destroy_ah);
IB_UVERBS_DECLARE_CMD(attach_mcast);
IB_UVERBS_DECLARE_CMD(detach_mcast);
IB_UVERBS_DECLARE_CMD(create_srq);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 5624451..8c89abc 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -33,6 +34,9 @@
* $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $
*/
+#include <linux/file.h>
+#include <linux/fs.h>
+
#include <asm/uaccess.h>
#include "uverbs.h"
@@ -45,29 +49,6 @@
(udata)->outlen = (olen); \
} while (0)
-ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
-{
- struct ib_uverbs_query_params cmd;
- struct ib_uverbs_query_params_resp resp;
-
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- memset(&resp, 0, sizeof resp);
-
- resp.num_cq_events = file->device->num_comp;
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
-}
-
ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
const char __user *buf,
int in_len, int out_len)
@@ -77,7 +58,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
struct ib_udata udata;
struct ib_device *ibdev = file->device->ib_dev;
struct ib_ucontext *ucontext;
- int i;
+ struct file *filp;
int ret;
if (out_len < sizeof resp)
@@ -110,26 +91,42 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list);
- resp.async_fd = file->async_file.fd;
- for (i = 0; i < file->device->num_comp; ++i)
- if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab +
- i * sizeof (__u32),
- &file->comp_file[i].fd, sizeof (__u32))) {
- ret = -EFAULT;
- goto err_free;
- }
+ resp.num_comp_vectors = file->device->num_comp_vectors;
+
+ filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd);
+ if (IS_ERR(filp)) {
+ ret = PTR_ERR(filp);
+ goto err_free;
+ }
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
- goto err_free;
+ goto err_file;
}
+ file->async_file = filp->private_data;
+
+ INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev,
+ ib_uverbs_event_handler);
+ ret = ib_register_event_handler(&file->event_handler);
+ if (ret)
+ goto err_file;
+
+ kref_get(&file->async_file->ref);
+ kref_get(&file->ref);
file->ucontext = ucontext;
+
+ fd_install(resp.async_fd, filp);
+
up(&file->mutex);
return in_len;
+err_file:
+ put_unused_fd(resp.async_fd);
+ fput(filp);
+
err_free:
ibdev->dealloc_ucontext(ucontext);
@@ -255,62 +252,6 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
return in_len;
}
-ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
-{
- struct ib_uverbs_query_gid cmd;
- struct ib_uverbs_query_gid_resp resp;
- int ret;
-
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- memset(&resp, 0, sizeof resp);
-
- ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index,
- (union ib_gid *) resp.gid);
- if (ret)
- return ret;
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
-}
-
-ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
-{
- struct ib_uverbs_query_pkey cmd;
- struct ib_uverbs_query_pkey_resp resp;
- int ret;
-
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- memset(&resp, 0, sizeof resp);
-
- ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index,
- &resp.pkey);
- if (ret)
- return ret;
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
-}
-
ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
const char __user *buf,
int in_len, int out_len)
@@ -349,24 +290,20 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
pd->uobject = uobj;
atomic_set(&pd->usecnt, 0);
+ down(&ib_uverbs_idr_mutex);
+
retry:
if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) {
ret = -ENOMEM;
- goto err_pd;
+ goto err_up;
}
- down(&ib_uverbs_idr_mutex);
ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id);
- up(&ib_uverbs_idr_mutex);
if (ret == -EAGAIN)
goto retry;
if (ret)
- goto err_pd;
-
- down(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->pd_list);
- up(&file->mutex);
+ goto err_up;
memset(&resp, 0, sizeof resp);
resp.pd_handle = uobj->id;
@@ -374,21 +311,22 @@ retry:
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
- goto err_list;
+ goto err_idr;
}
- return in_len;
-
-err_list:
- down(&file->mutex);
- list_del(&uobj->list);
+ down(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->pd_list);
up(&file->mutex);
- down(&ib_uverbs_idr_mutex);
- idr_remove(&ib_uverbs_pd_idr, uobj->id);
up(&ib_uverbs_idr_mutex);
-err_pd:
+ return in_len;
+
+err_idr:
+ idr_remove(&ib_uverbs_pd_idr, uobj->id);
+
+err_up:
+ up(&ib_uverbs_idr_mutex);
ib_dealloc_pd(pd);
err:
@@ -459,6 +397,14 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
return -EINVAL;
+ /*
+ * Local write permission is required if remote write or
+ * remote atomic permission is also requested.
+ */
+ if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
+ !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
+ return -EINVAL;
+
obj = kmalloc(sizeof *obj, GFP_KERNEL);
if (!obj)
return -ENOMEM;
@@ -524,24 +470,22 @@ retry:
resp.mr_handle = obj->uobject.id;
- down(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
- up(&file->mutex);
-
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
- goto err_list;
+ goto err_idr;
}
+ down(&file->mutex);
+ list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
+ up(&file->mutex);
+
up(&ib_uverbs_idr_mutex);
return in_len;
-err_list:
- down(&file->mutex);
- list_del(&obj->uobject.list);
- up(&file->mutex);
+err_idr:
+ idr_remove(&ib_uverbs_mr_idr, obj->uobject.id);
err_unreg:
ib_dereg_mr(mr);
@@ -595,6 +539,35 @@ out:
return ret ? ret : in_len;
}
+ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_comp_channel cmd;
+ struct ib_uverbs_create_comp_channel_resp resp;
+ struct file *filp;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd);
+ if (IS_ERR(filp))
+ return PTR_ERR(filp);
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ put_unused_fd(resp.fd);
+ fput(filp);
+ return -EFAULT;
+ }
+
+ fd_install(resp.fd, filp);
+ return in_len;
+}
+
ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
@@ -603,6 +576,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
struct ib_uverbs_create_cq_resp resp;
struct ib_udata udata;
struct ib_ucq_object *uobj;
+ struct ib_uverbs_event_file *ev_file = NULL;
struct ib_cq *cq;
int ret;
@@ -616,9 +590,12 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
- if (cmd.event_handler >= file->device->num_comp)
+ if (cmd.comp_vector >= file->device->num_comp_vectors)
return -EINVAL;
+ if (cmd.comp_channel >= 0)
+ ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel);
+
uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
if (!uobj)
return -ENOMEM;
@@ -641,27 +618,23 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
cq->uobject = &uobj->uobject;
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
- cq->cq_context = file;
+ cq->cq_context = ev_file;
atomic_set(&cq->usecnt, 0);
+ down(&ib_uverbs_idr_mutex);
+
retry:
if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) {
ret = -ENOMEM;
- goto err_cq;
+ goto err_up;
}
- down(&ib_uverbs_idr_mutex);
ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id);
- up(&ib_uverbs_idr_mutex);
if (ret == -EAGAIN)
goto retry;
if (ret)
- goto err_cq;
-
- down(&file->mutex);
- list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list);
- up(&file->mutex);
+ goto err_up;
memset(&resp, 0, sizeof resp);
resp.cq_handle = uobj->uobject.id;
@@ -670,21 +643,22 @@ retry:
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
- goto err_list;
+ goto err_idr;
}
- return in_len;
-
-err_list:
- down(&file->mutex);
- list_del(&uobj->uobject.list);
+ down(&file->mutex);
+ list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list);
up(&file->mutex);
- down(&ib_uverbs_idr_mutex);
- idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id);
up(&ib_uverbs_idr_mutex);
-err_cq:
+ return in_len;
+
+err_idr:
+ idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id);
+
+err_up:
+ up(&ib_uverbs_idr_mutex);
ib_destroy_cq(cq);
err:
@@ -692,6 +666,93 @@ err:
return ret;
}
+ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_poll_cq cmd;
+ struct ib_uverbs_poll_cq_resp *resp;
+ struct ib_cq *cq;
+ struct ib_wc *wc;
+ int ret = 0;
+ int i;
+ int rsize;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL);
+ if (!wc)
+ return -ENOMEM;
+
+ rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc);
+ resp = kmalloc(rsize, GFP_KERNEL);
+ if (!resp) {
+ ret = -ENOMEM;
+ goto out_wc;
+ }
+
+ down(&ib_uverbs_idr_mutex);
+ cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
+ if (!cq || cq->uobject->context != file->ucontext) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ resp->count = ib_poll_cq(cq, cmd.ne, wc);
+
+ for (i = 0; i < resp->count; i++) {
+ resp->wc[i].wr_id = wc[i].wr_id;
+ resp->wc[i].status = wc[i].status;
+ resp->wc[i].opcode = wc[i].opcode;
+ resp->wc[i].vendor_err = wc[i].vendor_err;
+ resp->wc[i].byte_len = wc[i].byte_len;
+ resp->wc[i].imm_data = wc[i].imm_data;
+ resp->wc[i].qp_num = wc[i].qp_num;
+ resp->wc[i].src_qp = wc[i].src_qp;
+ resp->wc[i].wc_flags = wc[i].wc_flags;
+ resp->wc[i].pkey_index = wc[i].pkey_index;
+ resp->wc[i].slid = wc[i].slid;
+ resp->wc[i].sl = wc[i].sl;
+ resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits;
+ resp->wc[i].port_num = wc[i].port_num;
+ }
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize))
+ ret = -EFAULT;
+
+out:
+ up(&ib_uverbs_idr_mutex);
+ kfree(resp);
+
+out_wc:
+ kfree(wc);
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_req_notify_cq cmd;
+ struct ib_cq *cq;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+ cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
+ if (cq && cq->uobject->context == file->ucontext) {
+ ib_req_notify_cq(cq, cmd.solicited_only ?
+ IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
+ ret = in_len;
+ }
+ up(&ib_uverbs_idr_mutex);
+
+ return ret;
+}
+
ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
@@ -700,7 +761,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
struct ib_uverbs_destroy_cq_resp resp;
struct ib_cq *cq;
struct ib_ucq_object *uobj;
- struct ib_uverbs_event *evt, *tmp;
+ struct ib_uverbs_event_file *ev_file;
u64 user_handle;
int ret = -EINVAL;
@@ -716,7 +777,8 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
goto out;
user_handle = cq->uobject->user_handle;
- uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
+ uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
+ ev_file = cq->cq_context;
ret = ib_destroy_cq(cq);
if (ret)
@@ -728,19 +790,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
list_del(&uobj->uobject.list);
up(&file->mutex);
- spin_lock_irq(&file->comp_file[0].lock);
- list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
- list_del(&evt->list);
- kfree(evt);
- }
- spin_unlock_irq(&file->comp_file[0].lock);
-
- spin_lock_irq(&file->async_file.lock);
- list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
- list_del(&evt->list);
- kfree(evt);
- }
- spin_unlock_irq(&file->async_file.lock);
+ ib_uverbs_release_ucq(file, ev_file, uobj);
resp.comp_events_reported = uobj->comp_events_reported;
resp.async_events_reported = uobj->async_events_reported;
@@ -859,24 +909,22 @@ retry:
resp.qp_handle = uobj->uobject.id;
- down(&file->mutex);
- list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list);
- up(&file->mutex);
-
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
- goto err_list;
+ goto err_idr;
}
+ down(&file->mutex);
+ list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list);
+ up(&file->mutex);
+
up(&ib_uverbs_idr_mutex);
return in_len;
-err_list:
- down(&file->mutex);
- list_del(&uobj->uobject.list);
- up(&file->mutex);
+err_idr:
+ idr_remove(&ib_uverbs_qp_idr, uobj->uobject.id);
err_destroy:
ib_destroy_qp(qp);
@@ -979,7 +1027,6 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
struct ib_uverbs_destroy_qp_resp resp;
struct ib_qp *qp;
struct ib_uevent_object *uobj;
- struct ib_uverbs_event *evt, *tmp;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1005,12 +1052,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
list_del(&uobj->uobject.list);
up(&file->mutex);
- spin_lock_irq(&file->async_file.lock);
- list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
- list_del(&evt->list);
- kfree(evt);
- }
- spin_unlock_irq(&file->async_file.lock);
+ ib_uverbs_release_uevent(file, uobj);
resp.events_reported = uobj->events_reported;
@@ -1026,6 +1068,468 @@ out:
return ret ? ret : in_len;
}
+ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_post_send cmd;
+ struct ib_uverbs_post_send_resp resp;
+ struct ib_uverbs_send_wr *user_wr;
+ struct ib_send_wr *wr = NULL, *last, *next, *bad_wr;
+ struct ib_qp *qp;
+ int i, sg_ind;
+ ssize_t ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
+ cmd.sge_count * sizeof (struct ib_uverbs_sge))
+ return -EINVAL;
+
+ if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
+ return -EINVAL;
+
+ user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
+ if (!user_wr)
+ return -ENOMEM;
+
+ down(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (!qp || qp->uobject->context != file->ucontext)
+ goto out;
+
+ sg_ind = 0;
+ last = NULL;
+ for (i = 0; i < cmd.wr_count; ++i) {
+ if (copy_from_user(user_wr,
+ buf + sizeof cmd + i * cmd.wqe_size,
+ cmd.wqe_size)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (user_wr->num_sge + sg_ind > cmd.sge_count) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
+ user_wr->num_sge * sizeof (struct ib_sge),
+ GFP_KERNEL);
+ if (!next) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (!last)
+ wr = next;
+ else
+ last->next = next;
+ last = next;
+
+ next->next = NULL;
+ next->wr_id = user_wr->wr_id;
+ next->num_sge = user_wr->num_sge;
+ next->opcode = user_wr->opcode;
+ next->send_flags = user_wr->send_flags;
+ next->imm_data = user_wr->imm_data;
+
+ if (qp->qp_type == IB_QPT_UD) {
+ next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr,
+ user_wr->wr.ud.ah);
+ if (!next->wr.ud.ah) {
+ ret = -EINVAL;
+ goto out;
+ }
+ next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
+ next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
+ } else {
+ switch (next->opcode) {
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ case IB_WR_RDMA_READ:
+ next->wr.rdma.remote_addr =
+ user_wr->wr.rdma.remote_addr;
+ next->wr.rdma.rkey =
+ user_wr->wr.rdma.rkey;
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ next->wr.atomic.remote_addr =
+ user_wr->wr.atomic.remote_addr;
+ next->wr.atomic.compare_add =
+ user_wr->wr.atomic.compare_add;
+ next->wr.atomic.swap = user_wr->wr.atomic.swap;
+ next->wr.atomic.rkey = user_wr->wr.atomic.rkey;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (next->num_sge) {
+ next->sg_list = (void *) next +
+ ALIGN(sizeof *next, sizeof (struct ib_sge));
+ if (copy_from_user(next->sg_list,
+ buf + sizeof cmd +
+ cmd.wr_count * cmd.wqe_size +
+ sg_ind * sizeof (struct ib_sge),
+ next->num_sge * sizeof (struct ib_sge))) {
+ ret = -EFAULT;
+ goto out;
+ }
+ sg_ind += next->num_sge;
+ } else
+ next->sg_list = NULL;
+ }
+
+ resp.bad_wr = 0;
+ ret = qp->device->post_send(qp, wr, &bad_wr);
+ if (ret)
+ for (next = wr; next; next = next->next) {
+ ++resp.bad_wr;
+ if (next == bad_wr)
+ break;
+ }
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ while (wr) {
+ next = wr->next;
+ kfree(wr);
+ wr = next;
+ }
+
+ kfree(user_wr);
+
+ return ret ? ret : in_len;
+}
+
+static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
+ int in_len,
+ u32 wr_count,
+ u32 sge_count,
+ u32 wqe_size)
+{
+ struct ib_uverbs_recv_wr *user_wr;
+ struct ib_recv_wr *wr = NULL, *last, *next;
+ int sg_ind;
+ int i;
+ int ret;
+
+ if (in_len < wqe_size * wr_count +
+ sge_count * sizeof (struct ib_uverbs_sge))
+ return ERR_PTR(-EINVAL);
+
+ if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
+ return ERR_PTR(-EINVAL);
+
+ user_wr = kmalloc(wqe_size, GFP_KERNEL);
+ if (!user_wr)
+ return ERR_PTR(-ENOMEM);
+
+ sg_ind = 0;
+ last = NULL;
+ for (i = 0; i < wr_count; ++i) {
+ if (copy_from_user(user_wr, buf + i * wqe_size,
+ wqe_size)) {
+ ret = -EFAULT;
+ goto err;
+ }
+
+ if (user_wr->num_sge + sg_ind > sge_count) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
+ user_wr->num_sge * sizeof (struct ib_sge),
+ GFP_KERNEL);
+ if (!next) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ if (!last)
+ wr = next;
+ else
+ last->next = next;
+ last = next;
+
+ next->next = NULL;
+ next->wr_id = user_wr->wr_id;
+ next->num_sge = user_wr->num_sge;
+
+ if (next->num_sge) {
+ next->sg_list = (void *) next +
+ ALIGN(sizeof *next, sizeof (struct ib_sge));
+ if (copy_from_user(next->sg_list,
+ buf + wr_count * wqe_size +
+ sg_ind * sizeof (struct ib_sge),
+ next->num_sge * sizeof (struct ib_sge))) {
+ ret = -EFAULT;
+ goto err;
+ }
+ sg_ind += next->num_sge;
+ } else
+ next->sg_list = NULL;
+ }
+
+ kfree(user_wr);
+ return wr;
+
+err:
+ kfree(user_wr);
+
+ while (wr) {
+ next = wr->next;
+ kfree(wr);
+ wr = next;
+ }
+
+ return ERR_PTR(ret);
+}
+
+ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_post_recv cmd;
+ struct ib_uverbs_post_recv_resp resp;
+ struct ib_recv_wr *wr, *next, *bad_wr;
+ struct ib_qp *qp;
+ ssize_t ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
+ in_len - sizeof cmd, cmd.wr_count,
+ cmd.sge_count, cmd.wqe_size);
+ if (IS_ERR(wr))
+ return PTR_ERR(wr);
+
+ down(&ib_uverbs_idr_mutex);
+
+ qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+ if (!qp || qp->uobject->context != file->ucontext)
+ goto out;
+
+ resp.bad_wr = 0;
+ ret = qp->device->post_recv(qp, wr, &bad_wr);
+ if (ret)
+ for (next = wr; next; next = next->next) {
+ ++resp.bad_wr;
+ if (next == bad_wr)
+ break;
+ }
+
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ while (wr) {
+ next = wr->next;
+ kfree(wr);
+ wr = next;
+ }
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_post_srq_recv cmd;
+ struct ib_uverbs_post_srq_recv_resp resp;
+ struct ib_recv_wr *wr, *next, *bad_wr;
+ struct ib_srq *srq;
+ ssize_t ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
+ in_len - sizeof cmd, cmd.wr_count,
+ cmd.sge_count, cmd.wqe_size);
+ if (IS_ERR(wr))
+ return PTR_ERR(wr);
+
+ down(&ib_uverbs_idr_mutex);
+
+ srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
+ if (!srq || srq->uobject->context != file->ucontext)
+ goto out;
+
+ resp.bad_wr = 0;
+ ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
+ if (ret)
+ for (next = wr; next; next = next->next) {
+ ++resp.bad_wr;
+ if (next == bad_wr)
+ break;
+ }
+
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ while (wr) {
+ next = wr->next;
+ kfree(wr);
+ wr = next;
+ }
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_ah cmd;
+ struct ib_uverbs_create_ah_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ struct ib_ah *ah;
+ struct ib_ah_attr attr;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+ if (!uobj)
+ return -ENOMEM;
+
+ down(&ib_uverbs_idr_mutex);
+
+ pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
+ if (!pd || pd->uobject->context != file->ucontext) {
+ ret = -EINVAL;
+ goto err_up;
+ }
+
+ uobj->user_handle = cmd.user_handle;
+ uobj->context = file->ucontext;
+
+ attr.dlid = cmd.attr.dlid;
+ attr.sl = cmd.attr.sl;
+ attr.src_path_bits = cmd.attr.src_path_bits;
+ attr.static_rate = cmd.attr.static_rate;
+ attr.port_num = cmd.attr.port_num;
+ attr.grh.flow_label = cmd.attr.grh.flow_label;
+ attr.grh.sgid_index = cmd.attr.grh.sgid_index;
+ attr.grh.hop_limit = cmd.attr.grh.hop_limit;
+ attr.grh.traffic_class = cmd.attr.grh.traffic_class;
+ memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
+
+ ah = ib_create_ah(pd, &attr);
+ if (IS_ERR(ah)) {
+ ret = PTR_ERR(ah);
+ goto err_up;
+ }
+
+ ah->uobject = uobj;
+
+retry:
+ if (!idr_pre_get(&ib_uverbs_ah_idr, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_destroy;
+ }
+
+ ret = idr_get_new(&ib_uverbs_ah_idr, ah, &uobj->id);
+
+ if (ret == -EAGAIN)
+ goto retry;
+ if (ret)
+ goto err_destroy;
+
+ resp.ah_handle = uobj->id;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_idr;
+ }
+
+ down(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->ah_list);
+ up(&file->mutex);
+
+ up(&ib_uverbs_idr_mutex);
+
+ return in_len;
+
+err_idr:
+ idr_remove(&ib_uverbs_ah_idr, uobj->id);
+
+err_destroy:
+ ib_destroy_ah(ah);
+
+err_up:
+ up(&ib_uverbs_idr_mutex);
+
+ kfree(uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len, int out_len)
+{
+ struct ib_uverbs_destroy_ah cmd;
+ struct ib_ah *ah;
+ struct ib_uobject *uobj;
+ int ret = -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ down(&ib_uverbs_idr_mutex);
+
+ ah = idr_find(&ib_uverbs_ah_idr, cmd.ah_handle);
+ if (!ah || ah->uobject->context != file->ucontext)
+ goto out;
+
+ uobj = ah->uobject;
+
+ ret = ib_destroy_ah(ah);
+ if (ret)
+ goto out;
+
+ idr_remove(&ib_uverbs_ah_idr, cmd.ah_handle);
+
+ down(&file->mutex);
+ list_del(&uobj->list);
+ up(&file->mutex);
+
+ kfree(uobj);
+
+out:
+ up(&ib_uverbs_idr_mutex);
+
+ return ret ? ret : in_len;
+}
+
ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
@@ -1148,24 +1652,22 @@ retry:
resp.srq_handle = uobj->uobject.id;
- down(&file->mutex);
- list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list);
- up(&file->mutex);
-
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp)) {
ret = -EFAULT;
- goto err_list;
+ goto err_idr;
}
+ down(&file->mutex);
+ list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list);
+ up(&file->mutex);
+
up(&ib_uverbs_idr_mutex);
return in_len;
-err_list:
- down(&file->mutex);
- list_del(&uobj->uobject.list);
- up(&file->mutex);
+err_idr:
+ idr_remove(&ib_uverbs_srq_idr, uobj->uobject.id);
err_destroy:
ib_destroy_srq(srq);
@@ -1217,7 +1719,6 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
struct ib_uverbs_destroy_srq_resp resp;
struct ib_srq *srq;
struct ib_uevent_object *uobj;
- struct ib_uverbs_event *evt, *tmp;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1243,12 +1744,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
list_del(&uobj->uobject.list);
up(&file->mutex);
- spin_lock_irq(&file->async_file.lock);
- list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
- list_del(&evt->list);
- kfree(evt);
- }
- spin_unlock_irq(&file->async_file.lock);
+ ib_uverbs_release_uevent(file, uobj);
resp.events_reported = uobj->events_reported;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 1251180..0eb38f4 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -3,6 +3,7 @@
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -43,6 +44,7 @@
#include <linux/poll.h>
#include <linux/file.h>
#include <linux/mount.h>
+#include <linux/cdev.h>
#include <asm/uaccess.h>
@@ -62,6 +64,8 @@ enum {
#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
+static struct class *uverbs_class;
+
DECLARE_MUTEX(ib_uverbs_idr_mutex);
DEFINE_IDR(ib_uverbs_pd_idr);
DEFINE_IDR(ib_uverbs_mr_idr);
@@ -72,31 +76,37 @@ DEFINE_IDR(ib_uverbs_qp_idr);
DEFINE_IDR(ib_uverbs_srq_idr);
static spinlock_t map_lock;
+static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len) = {
- [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params,
- [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
- [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
- [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
- [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid,
- [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey,
- [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
- [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
- [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
- [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
- [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
- [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
- [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
- [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
- [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
- [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
- [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
- [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
- [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
- [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
+ [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
+ [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
+ [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
+ [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
+ [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
+ [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
+ [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
+ [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
+ [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
+ [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
+ [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
+ [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
+ [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
+ [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
+ [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
+ [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
+ [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
+ [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
+ [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
+ [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
+ [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
+ [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
+ [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
+ [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
+ [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
};
static struct vfsmount *uverbs_event_mnt;
@@ -104,7 +114,54 @@ static struct vfsmount *uverbs_event_mnt;
static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device);
-static int ib_dealloc_ucontext(struct ib_ucontext *context)
+static void ib_uverbs_release_dev(struct kref *ref)
+{
+ struct ib_uverbs_device *dev =
+ container_of(ref, struct ib_uverbs_device, ref);
+
+ kfree(dev);
+}
+
+void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
+ struct ib_uverbs_event_file *ev_file,
+ struct ib_ucq_object *uobj)
+{
+ struct ib_uverbs_event *evt, *tmp;
+
+ if (ev_file) {
+ spin_lock_irq(&ev_file->lock);
+ list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
+ list_del(&evt->list);
+ kfree(evt);
+ }
+ spin_unlock_irq(&ev_file->lock);
+
+ kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+ }
+
+ spin_lock_irq(&file->async_file->lock);
+ list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
+ list_del(&evt->list);
+ kfree(evt);
+ }
+ spin_unlock_irq(&file->async_file->lock);
+}
+
+void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
+ struct ib_uevent_object *uobj)
+{
+ struct ib_uverbs_event *evt, *tmp;
+
+ spin_lock_irq(&file->async_file->lock);
+ list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
+ list_del(&evt->list);
+ kfree(evt);
+ }
+ spin_unlock_irq(&file->async_file->lock);
+}
+
+static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
+ struct ib_ucontext *context)
{
struct ib_uobject *uobj, *tmp;
@@ -113,30 +170,46 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context)
down(&ib_uverbs_idr_mutex);
- /* XXX Free AHs */
+ list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
+ struct ib_ah *ah = idr_find(&ib_uverbs_ah_idr, uobj->id);
+ idr_remove(&ib_uverbs_ah_idr, uobj->id);
+ ib_destroy_ah(ah);
+ list_del(&uobj->list);
+ kfree(uobj);
+ }
list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id);
+ struct ib_uevent_object *uevent =
+ container_of(uobj, struct ib_uevent_object, uobject);
idr_remove(&ib_uverbs_qp_idr, uobj->id);
ib_destroy_qp(qp);
list_del(&uobj->list);
- kfree(container_of(uobj, struct ib_uevent_object, uobject));
+ ib_uverbs_release_uevent(file, uevent);
+ kfree(uevent);
}
list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id);
+ struct ib_uverbs_event_file *ev_file = cq->cq_context;
+ struct ib_ucq_object *ucq =
+ container_of(uobj, struct ib_ucq_object, uobject);
idr_remove(&ib_uverbs_cq_idr, uobj->id);
ib_destroy_cq(cq);
list_del(&uobj->list);
- kfree(container_of(uobj, struct ib_ucq_object, uobject));
+ ib_uverbs_release_ucq(file, ev_file, ucq);
+ kfree(ucq);
}
list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
struct ib_srq *srq = idr_find(&ib_uverbs_srq_idr, uobj->id);
+ struct ib_uevent_object *uevent =
+ container_of(uobj, struct ib_uevent_object, uobject);
idr_remove(&ib_uverbs_srq_idr, uobj->id);
ib_destroy_srq(srq);
list_del(&uobj->list);
- kfree(container_of(uobj, struct ib_uevent_object, uobject));
+ ib_uverbs_release_uevent(file, uevent);
+ kfree(uevent);
}
/* XXX Free MWs */
@@ -175,6 +248,8 @@ static void ib_uverbs_release_file(struct kref *ref)
container_of(ref, struct ib_uverbs_file, ref);
module_put(file->device->ib_dev->owner);
+ kref_put(&file->device->ref, ib_uverbs_release_dev);
+
kfree(file);
}
@@ -188,25 +263,19 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
spin_lock_irq(&file->lock);
- while (list_empty(&file->event_list) && file->fd >= 0) {
+ while (list_empty(&file->event_list)) {
spin_unlock_irq(&file->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
if (wait_event_interruptible(file->poll_wait,
- !list_empty(&file->event_list) ||
- file->fd < 0))
+ !list_empty(&file->event_list)))
return -ERESTARTSYS;
spin_lock_irq(&file->lock);
}
- if (file->fd < 0) {
- spin_unlock_irq(&file->lock);
- return -ENODEV;
- }
-
event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
if (file->is_async)
@@ -248,26 +317,19 @@ static unsigned int ib_uverbs_event_poll(struct file *filp,
poll_wait(filp, &file->poll_wait, wait);
spin_lock_irq(&file->lock);
- if (file->fd < 0)
- pollflags = POLLERR;
- else if (!list_empty(&file->event_list))
+ if (!list_empty(&file->event_list))
pollflags = POLLIN | POLLRDNORM;
spin_unlock_irq(&file->lock);
return pollflags;
}
-static void ib_uverbs_event_release(struct ib_uverbs_event_file *file)
+void ib_uverbs_release_event_file(struct kref *ref)
{
- struct ib_uverbs_event *entry, *tmp;
+ struct ib_uverbs_event_file *file =
+ container_of(ref, struct ib_uverbs_event_file, ref);
- spin_lock_irq(&file->lock);
- if (file->fd != -1) {
- file->fd = -1;
- list_for_each_entry_safe(entry, tmp, &file->event_list, list)
- kfree(entry);
- }
- spin_unlock_irq(&file->lock);
+ kfree(file);
}
static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
@@ -280,21 +342,30 @@ static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_event_file *file = filp->private_data;
+ struct ib_uverbs_event *entry, *tmp;
+
+ spin_lock_irq(&file->lock);
+ file->file = NULL;
+ list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
+ if (entry->counter)
+ list_del(&entry->obj_list);
+ kfree(entry);
+ }
+ spin_unlock_irq(&file->lock);
- ib_uverbs_event_release(file);
ib_uverbs_event_fasync(-1, filp, 0);
- kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
+
+ if (file->is_async) {
+ ib_unregister_event_handler(&file->uverbs_file->event_handler);
+ kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
+ }
+ kref_put(&file->ref, ib_uverbs_release_event_file);
return 0;
}
static struct file_operations uverbs_event_fops = {
- /*
- * No .owner field since we artificially create event files,
- * so there is no increment to the module reference count in
- * the open path. All event files come from a uverbs command
- * file, which already takes a module reference, so this is OK.
- */
+ .owner = THIS_MODULE,
.read = ib_uverbs_event_read,
.poll = ib_uverbs_event_poll,
.release = ib_uverbs_event_close,
@@ -303,27 +374,37 @@ static struct file_operations uverbs_event_fops = {
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
- struct ib_uverbs_file *file = cq_context;
- struct ib_ucq_object *uobj;
- struct ib_uverbs_event *entry;
- unsigned long flags;
+ struct ib_uverbs_event_file *file = cq_context;
+ struct ib_ucq_object *uobj;
+ struct ib_uverbs_event *entry;
+ unsigned long flags;
+
+ if (!file)
+ return;
+
+ spin_lock_irqsave(&file->lock, flags);
+ if (!file->file) {
+ spin_unlock_irqrestore(&file->lock, flags);
+ return;
+ }
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
- if (!entry)
+ if (!entry) {
+ spin_unlock_irqrestore(&file->lock, flags);
return;
+ }
uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
entry->desc.comp.cq_handle = cq->uobject->user_handle;
entry->counter = &uobj->comp_events_reported;
- spin_lock_irqsave(&file->comp_file[0].lock, flags);
- list_add_tail(&entry->list, &file->comp_file[0].event_list);
+ list_add_tail(&entry->list, &file->event_list);
list_add_tail(&entry->obj_list, &uobj->comp_list);
- spin_unlock_irqrestore(&file->comp_file[0].lock, flags);
+ spin_unlock_irqrestore(&file->lock, flags);
- wake_up_interruptible(&file->comp_file[0].poll_wait);
- kill_fasync(&file->comp_file[0].async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&file->poll_wait);
+ kill_fasync(&file->async_queue, SIGIO, POLL_IN);
}
static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
@@ -334,32 +415,40 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
struct ib_uverbs_event *entry;
unsigned long flags;
+ spin_lock_irqsave(&file->async_file->lock, flags);
+ if (!file->async_file->file) {
+ spin_unlock_irqrestore(&file->async_file->lock, flags);
+ return;
+ }
+
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
- if (!entry)
+ if (!entry) {
+ spin_unlock_irqrestore(&file->async_file->lock, flags);
return;
+ }
entry->desc.async.element = element;
entry->desc.async.event_type = event;
entry->counter = counter;
- spin_lock_irqsave(&file->async_file.lock, flags);
- list_add_tail(&entry->list, &file->async_file.event_list);
+ list_add_tail(&entry->list, &file->async_file->event_list);
if (obj_list)
list_add_tail(&entry->obj_list, obj_list);
- spin_unlock_irqrestore(&file->async_file.lock, flags);
+ spin_unlock_irqrestore(&file->async_file->lock, flags);
- wake_up_interruptible(&file->async_file.poll_wait);
- kill_fasync(&file->async_file.async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&file->async_file->poll_wait);
+ kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
}
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
{
+ struct ib_uverbs_event_file *ev_file = context_ptr;
struct ib_ucq_object *uobj;
uobj = container_of(event->element.cq->uobject,
struct ib_ucq_object, uobject);
- ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
+ ib_uverbs_async_handler(ev_file->uverbs_file, uobj->uobject.user_handle,
event->event, &uobj->async_list,
&uobj->async_events_reported);
@@ -389,8 +478,8 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
&uobj->events_reported);
}
-static void ib_uverbs_event_handler(struct ib_event_handler *handler,
- struct ib_event *event)
+void ib_uverbs_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
{
struct ib_uverbs_file *file =
container_of(handler, struct ib_uverbs_file, event_handler);
@@ -399,38 +488,90 @@ static void ib_uverbs_event_handler(struct ib_event_handler *handler,
NULL, NULL);
}
-static int ib_uverbs_event_init(struct ib_uverbs_event_file *file,
- struct ib_uverbs_file *uverbs_file)
+struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
+ int is_async, int *fd)
{
+ struct ib_uverbs_event_file *ev_file;
struct file *filp;
+ int ret;
- spin_lock_init(&file->lock);
- INIT_LIST_HEAD(&file->event_list);
- init_waitqueue_head(&file->poll_wait);
- file->uverbs_file = uverbs_file;
- file->async_queue = NULL;
-
- file->fd = get_unused_fd();
- if (file->fd < 0)
- return file->fd;
+ ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL);
+ if (!ev_file)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&ev_file->ref);
+ spin_lock_init(&ev_file->lock);
+ INIT_LIST_HEAD(&ev_file->event_list);
+ init_waitqueue_head(&ev_file->poll_wait);
+ ev_file->uverbs_file = uverbs_file;
+ ev_file->async_queue = NULL;
+ ev_file->is_async = is_async;
+
+ *fd = get_unused_fd();
+ if (*fd < 0) {
+ ret = *fd;
+ goto err;
+ }
filp = get_empty_filp();
if (!filp) {
- put_unused_fd(file->fd);
- return -ENFILE;
+ ret = -ENFILE;
+ goto err_fd;
}
- filp->f_op = &uverbs_event_fops;
+ ev_file->file = filp;
+
+ /*
+ * fops_get() can't fail here, because we're coming from a
+ * system call on a uverbs file, which will already have a
+ * module reference.
+ */
+ filp->f_op = fops_get(&uverbs_event_fops);
filp->f_vfsmnt = mntget(uverbs_event_mnt);
filp->f_dentry = dget(uverbs_event_mnt->mnt_root);
filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
filp->f_flags = O_RDONLY;
filp->f_mode = FMODE_READ;
- filp->private_data = file;
+ filp->private_data = ev_file;
- fd_install(file->fd, filp);
+ return filp;
- return 0;
+err_fd:
+ put_unused_fd(*fd);
+
+err:
+ kfree(ev_file);
+ return ERR_PTR(ret);
+}
+
+/*
+ * Look up a completion event file by FD. If lookup is successful,
+ * takes a ref to the event file struct that it returns; if
+ * unsuccessful, returns NULL.
+ */
+struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
+{
+ struct ib_uverbs_event_file *ev_file = NULL;
+ struct file *filp;
+
+ filp = fget(fd);
+ if (!filp)
+ return NULL;
+
+ if (filp->f_op != &uverbs_event_fops)
+ goto out;
+
+ ev_file = filp->private_data;
+ if (ev_file->is_async) {
+ ev_file = NULL;
+ goto out;
+ }
+
+ kref_get(&ev_file->ref);
+
+out:
+ fput(filp);
+ return ev_file;
}
static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
@@ -450,11 +591,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
if (hdr.command < 0 ||
hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
- !uverbs_cmd_table[hdr.command])
+ !uverbs_cmd_table[hdr.command] ||
+ !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
return -EINVAL;
- if (!file->ucontext &&
- hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS &&
+ if (!file->ucontext &&
hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
return -EINVAL;
@@ -474,84 +615,57 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
static int ib_uverbs_open(struct inode *inode, struct file *filp)
{
- struct ib_uverbs_device *dev =
- container_of(inode->i_cdev, struct ib_uverbs_device, dev);
+ struct ib_uverbs_device *dev;
struct ib_uverbs_file *file;
- int i = 0;
int ret;
- if (!try_module_get(dev->ib_dev->owner))
- return -ENODEV;
+ spin_lock(&map_lock);
+ dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR];
+ if (dev)
+ kref_get(&dev->ref);
+ spin_unlock(&map_lock);
+
+ if (!dev)
+ return -ENXIO;
+
+ if (!try_module_get(dev->ib_dev->owner)) {
+ ret = -ENODEV;
+ goto err;
+ }
- file = kmalloc(sizeof *file +
- (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file),
- GFP_KERNEL);
+ file = kmalloc(sizeof *file, GFP_KERNEL);
if (!file) {
ret = -ENOMEM;
- goto err;
+ goto err_module;
}
- file->device = dev;
+ file->device = dev;
+ file->ucontext = NULL;
+ file->async_file = NULL;
kref_init(&file->ref);
init_MUTEX(&file->mutex);
- file->ucontext = NULL;
-
- kref_get(&file->ref);
- ret = ib_uverbs_event_init(&file->async_file, file);
- if (ret)
- goto err_kref;
-
- file->async_file.is_async = 1;
-
- for (i = 0; i < dev->num_comp; ++i) {
- kref_get(&file->ref);
- ret = ib_uverbs_event_init(&file->comp_file[i], file);
- if (ret)
- goto err_async;
- file->comp_file[i].is_async = 0;
- }
-
-
filp->private_data = file;
- INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev,
- ib_uverbs_event_handler);
- if (ib_register_event_handler(&file->event_handler))
- goto err_async;
-
return 0;
-err_async:
- while (i--)
- ib_uverbs_event_release(&file->comp_file[i]);
-
- ib_uverbs_event_release(&file->async_file);
-
-err_kref:
- /*
- * One extra kref_put() because we took a reference before the
- * event file creation that failed and got us here.
- */
- kref_put(&file->ref, ib_uverbs_release_file);
- kref_put(&file->ref, ib_uverbs_release_file);
+err_module:
+ module_put(dev->ib_dev->owner);
err:
- module_put(dev->ib_dev->owner);
+ kref_put(&dev->ref, ib_uverbs_release_dev);
+
return ret;
}
static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_file *file = filp->private_data;
- int i;
- ib_unregister_event_handler(&file->event_handler);
- ib_uverbs_event_release(&file->async_file);
- ib_dealloc_ucontext(file->ucontext);
+ ib_uverbs_cleanup_ucontext(file, file->ucontext);
- for (i = 0; i < file->device->num_comp; ++i)
- ib_uverbs_event_release(&file->comp_file[i]);
+ if (file->async_file)
+ kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
kref_put(&file->ref, ib_uverbs_release_file);
@@ -581,27 +695,25 @@ static struct ib_client uverbs_client = {
static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
{
- struct ib_uverbs_device *dev =
- container_of(class_dev, struct ib_uverbs_device, class_dev);
+ struct ib_uverbs_device *dev = class_get_devdata(class_dev);
+
+ if (!dev)
+ return -ENODEV;
return sprintf(buf, "%s\n", dev->ib_dev->name);
}
static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
-static void ib_uverbs_release_class_dev(struct class_device *class_dev)
+static ssize_t show_dev_abi_version(struct class_device *class_dev, char *buf)
{
- struct ib_uverbs_device *dev =
- container_of(class_dev, struct ib_uverbs_device, class_dev);
+ struct ib_uverbs_device *dev = class_get_devdata(class_dev);
- cdev_del(&dev->dev);
- clear_bit(dev->devnum, dev_map);
- kfree(dev);
-}
+ if (!dev)
+ return -ENODEV;
-static struct class uverbs_class = {
- .name = "infiniband_verbs",
- .release = ib_uverbs_release_class_dev
-};
+ return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver);
+}
+static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
static ssize_t show_abi_version(struct class *class, char *buf)
{
@@ -622,6 +734,8 @@ static void ib_uverbs_add_one(struct ib_device *device)
memset(uverbs_dev, 0, sizeof *uverbs_dev);
+ kref_init(&uverbs_dev->ref);
+
spin_lock(&map_lock);
uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) {
@@ -631,41 +745,49 @@ static void ib_uverbs_add_one(struct ib_device *device)
set_bit(uverbs_dev->devnum, dev_map);
spin_unlock(&map_lock);
- uverbs_dev->ib_dev = device;
- uverbs_dev->num_comp = 1;
+ uverbs_dev->ib_dev = device;
+ uverbs_dev->num_comp_vectors = 1;
- if (device->mmap)
- cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops);
- else
- cdev_init(&uverbs_dev->dev, &uverbs_fops);
- uverbs_dev->dev.owner = THIS_MODULE;
- kobject_set_name(&uverbs_dev->dev.kobj, "uverbs%d", uverbs_dev->devnum);
- if (cdev_add(&uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
+ uverbs_dev->dev = cdev_alloc();
+ if (!uverbs_dev->dev)
goto err;
+ uverbs_dev->dev->owner = THIS_MODULE;
+ uverbs_dev->dev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
+ kobject_set_name(&uverbs_dev->dev->kobj, "uverbs%d", uverbs_dev->devnum);
+ if (cdev_add(uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
+ goto err_cdev;
- uverbs_dev->class_dev.class = &uverbs_class;
- uverbs_dev->class_dev.dev = device->dma_device;
- uverbs_dev->class_dev.devt = uverbs_dev->dev.dev;
- snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum);
- if (class_device_register(&uverbs_dev->class_dev))
+ uverbs_dev->class_dev = class_device_create(uverbs_class, NULL,
+ uverbs_dev->dev->dev,
+ device->dma_device,
+ "uverbs%d", uverbs_dev->devnum);
+ if (IS_ERR(uverbs_dev->class_dev))
goto err_cdev;
- if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev))
+ class_set_devdata(uverbs_dev->class_dev, uverbs_dev);
+
+ if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_ibdev))
goto err_class;
+ if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_abi_version))
+ goto err_class;
+
+ spin_lock(&map_lock);
+ dev_table[uverbs_dev->devnum] = uverbs_dev;
+ spin_unlock(&map_lock);
ib_set_client_data(device, &uverbs_client, uverbs_dev);
return;
err_class:
- class_device_unregister(&uverbs_dev->class_dev);
+ class_device_destroy(uverbs_class, uverbs_dev->dev->dev);
err_cdev:
- cdev_del(&uverbs_dev->dev);
+ cdev_del(uverbs_dev->dev);
clear_bit(uverbs_dev->devnum, dev_map);
err:
- kfree(uverbs_dev);
+ kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
return;
}
@@ -676,7 +798,16 @@ static void ib_uverbs_remove_one(struct ib_device *device)
if (!uverbs_dev)
return;
- class_device_unregister(&uverbs_dev->class_dev);
+ class_set_devdata(uverbs_dev->class_dev, NULL);
+ class_device_destroy(uverbs_class, uverbs_dev->dev->dev);
+ cdev_del(uverbs_dev->dev);
+
+ spin_lock(&map_lock);
+ dev_table[uverbs_dev->devnum] = NULL;
+ spin_unlock(&map_lock);
+
+ clear_bit(uverbs_dev->devnum, dev_map);
+ kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
}
static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
@@ -706,13 +837,14 @@ static int __init ib_uverbs_init(void)
goto out;
}
- ret = class_register(&uverbs_class);
- if (ret) {
+ uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
+ if (IS_ERR(uverbs_class)) {
+ ret = PTR_ERR(uverbs_class);
printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
goto out_chrdev;
}
- ret = class_create_file(&uverbs_class, &class_attr_abi_version);
+ ret = class_create_file(uverbs_class, &class_attr_abi_version);
if (ret) {
printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
goto out_class;
@@ -746,7 +878,7 @@ out_fs:
unregister_filesystem(&uverbs_event_fs);
out_class:
- class_unregister(&uverbs_class);
+ class_destroy(uverbs_class);
out_chrdev:
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
@@ -760,8 +892,15 @@ static void __exit ib_uverbs_cleanup(void)
ib_unregister_client(&uverbs_client);
mntput(uverbs_event_mnt);
unregister_filesystem(&uverbs_event_fs);
- class_unregister(&uverbs_class);
+ class_destroy(uverbs_class);
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+ idr_destroy(&ib_uverbs_pd_idr);
+ idr_destroy(&ib_uverbs_mr_idr);
+ idr_destroy(&ib_uverbs_mw_idr);
+ idr_destroy(&ib_uverbs_ah_idr);
+ idr_destroy(&ib_uverbs_cq_idr);
+ idr_destroy(&ib_uverbs_qp_idr);
+ idr_destroy(&ib_uverbs_srq_idr);
}
module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 5081d90..72d3ef7 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -523,16 +523,22 @@ EXPORT_SYMBOL(ib_dealloc_fmr);
int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
- return qp->device->attach_mcast ?
- qp->device->attach_mcast(qp, gid, lid) :
- -ENOSYS;
+ if (!qp->device->attach_mcast)
+ return -ENOSYS;
+ if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ return -EINVAL;
+
+ return qp->device->attach_mcast(qp, gid, lid);
}
EXPORT_SYMBOL(ib_attach_mcast);
int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
- return qp->device->detach_mcast ?
- qp->device->detach_mcast(qp, gid, lid) :
- -ENOSYS;
+ if (!qp->device->detach_mcast)
+ return -ENOSYS;
+ if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ return -EINVAL;
+
+ return qp->device->detach_mcast(qp, gid, lid);
}
EXPORT_SYMBOL(ib_detach_mcast);
diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile
index c44f7ba..47ec5a7 100644
--- a/drivers/infiniband/hw/mthca/Makefile
+++ b/drivers/infiniband/hw/mthca/Makefile
@@ -7,4 +7,5 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o
ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \
mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \
- mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o
+ mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o \
+ mthca_catas.o
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
new file mode 100644
index 0000000..7ac52af
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#include "mthca_dev.h"
+
+enum {
+ MTHCA_CATAS_POLL_INTERVAL = 5 * HZ,
+
+ MTHCA_CATAS_TYPE_INTERNAL = 0,
+ MTHCA_CATAS_TYPE_UPLINK = 3,
+ MTHCA_CATAS_TYPE_DDR = 4,
+ MTHCA_CATAS_TYPE_PARITY = 5,
+};
+
+static DEFINE_SPINLOCK(catas_lock);
+
+static void handle_catas(struct mthca_dev *dev)
+{
+ struct ib_event event;
+ const char *type;
+ int i;
+
+ event.device = &dev->ib_dev;
+ event.event = IB_EVENT_DEVICE_FATAL;
+ event.element.port_num = 0;
+
+ ib_dispatch_event(&event);
+
+ switch (swab32(readl(dev->catas_err.map)) >> 24) {
+ case MTHCA_CATAS_TYPE_INTERNAL:
+ type = "internal error";
+ break;
+ case MTHCA_CATAS_TYPE_UPLINK:
+ type = "uplink bus error";
+ break;
+ case MTHCA_CATAS_TYPE_DDR:
+ type = "DDR data error";
+ break;
+ case MTHCA_CATAS_TYPE_PARITY:
+ type = "internal parity error";
+ break;
+ default:
+ type = "unknown error";
+ break;
+ }
+
+ mthca_err(dev, "Catastrophic error detected: %s\n", type);
+ for (i = 0; i < dev->catas_err.size; ++i)
+ mthca_err(dev, " buf[%02x]: %08x\n",
+ i, swab32(readl(dev->catas_err.map + i)));
+}
+
+static void poll_catas(unsigned long dev_ptr)
+{
+ struct mthca_dev *dev = (struct mthca_dev *) dev_ptr;
+ unsigned long flags;
+ int i;
+
+ for (i = 0; i < dev->catas_err.size; ++i)
+ if (readl(dev->catas_err.map + i)) {
+ handle_catas(dev);
+ return;
+ }
+
+ spin_lock_irqsave(&catas_lock, flags);
+ if (dev->catas_err.stop)
+ mod_timer(&dev->catas_err.timer,
+ jiffies + MTHCA_CATAS_POLL_INTERVAL);
+ spin_unlock_irqrestore(&catas_lock, flags);
+
+ return;
+}
+
+void mthca_start_catas_poll(struct mthca_dev *dev)
+{
+ unsigned long addr;
+
+ init_timer(&dev->catas_err.timer);
+ dev->catas_err.stop = 0;
+ dev->catas_err.map = NULL;
+
+ addr = pci_resource_start(dev->pdev, 0) +
+ ((pci_resource_len(dev->pdev, 0) - 1) &
+ dev->catas_err.addr);
+
+ if (!request_mem_region(addr, dev->catas_err.size * 4,
+ DRV_NAME)) {
+ mthca_warn(dev, "couldn't request catastrophic error region "
+ "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4);
+ return;
+ }
+
+ dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4);
+ if (!dev->catas_err.map) {
+ mthca_warn(dev, "couldn't map catastrophic error region "
+ "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4);
+ release_mem_region(addr, dev->catas_err.size * 4);
+ return;
+ }
+
+ dev->catas_err.timer.data = (unsigned long) dev;
+ dev->catas_err.timer.function = poll_catas;
+ dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL;
+ add_timer(&dev->catas_err.timer);
+}
+
+void mthca_stop_catas_poll(struct mthca_dev *dev)
+{
+ spin_lock_irq(&catas_lock);
+ dev->catas_err.stop = 1;
+ spin_unlock_irq(&catas_lock);
+
+ del_timer_sync(&dev->catas_err.timer);
+
+ if (dev->catas_err.map) {
+ iounmap(dev->catas_err.map);
+ release_mem_region(pci_resource_start(dev->pdev, 0) +
+ ((pci_resource_len(dev->pdev, 0) - 1) &
+ dev->catas_err.addr),
+ dev->catas_err.size * 4);
+ }
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 378646b..49f211d 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -706,9 +707,13 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET);
dev->cmd.max_cmds = 1 << lg;
+ MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET);
+ MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET);
mthca_dbg(dev, "FW version %012llx, max commands %d\n",
(unsigned long long) dev->fw_ver, dev->cmd.max_cmds);
+ mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n",
+ (unsigned long long) dev->catas_err.addr, dev->catas_err.size);
if (mthca_is_memfree(dev)) {
MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET);
@@ -933,9 +938,9 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
goto out;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
- dev_lim->max_srq_sz = 1 << field;
+ dev_lim->max_srq_sz = (1 << field) - 1;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
- dev_lim->max_qp_sz = 1 << field;
+ dev_lim->max_qp_sz = (1 << field) - 1;
MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET);
dev_lim->reserved_qps = 1 << (field & 0xf);
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET);
@@ -1045,6 +1050,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars);
mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n",
dev_lim->max_pds, dev_lim->reserved_mgms);
+ mthca_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n",
+ dev_lim->max_cq_sz, dev_lim->max_qp_sz, dev_lim->max_srq_sz);
mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 7bff5a8..7e68bd4 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -83,6 +83,8 @@ enum {
/* Arbel FW gives us these, but we need them for Tavor */
MTHCA_MPT_ENTRY_SIZE = 0x40,
MTHCA_MTT_SEG_SIZE = 0x40,
+
+ MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
};
enum {
@@ -128,12 +130,16 @@ struct mthca_limits {
int num_uars;
int max_sg;
int num_qps;
+ int max_wqes;
+ int max_qp_init_rdma;
int reserved_qps;
int num_srqs;
+ int max_srq_wqes;
int reserved_srqs;
int num_eecs;
int reserved_eecs;
int num_cqs;
+ int max_cqes;
int reserved_cqs;
int num_eqs;
int reserved_eqs;
@@ -148,6 +154,7 @@ struct mthca_limits {
int reserved_mcgs;
int num_pds;
int reserved_pds;
+ u32 flags;
u8 port_width_cap;
};
@@ -251,6 +258,14 @@ struct mthca_mcg_table {
struct mthca_icm_table *table;
};
+struct mthca_catas_err {
+ u64 addr;
+ u32 __iomem *map;
+ unsigned long stop;
+ u32 size;
+ struct timer_list timer;
+};
+
struct mthca_dev {
struct ib_device ib_dev;
struct pci_dev *pdev;
@@ -311,6 +326,8 @@ struct mthca_dev {
struct mthca_av_table av_table;
struct mthca_mcg_table mcg_table;
+ struct mthca_catas_err catas_err;
+
struct mthca_uar driver_uar;
struct mthca_db_table *db_tab;
struct mthca_pd driver_pd;
@@ -398,6 +415,9 @@ void mthca_cleanup_mcg_table(struct mthca_dev *dev);
int mthca_register_device(struct mthca_dev *dev);
void mthca_unregister_device(struct mthca_dev *dev);
+void mthca_start_catas_poll(struct mthca_dev *dev);
+void mthca_stop_catas_poll(struct mthca_dev *dev);
+
int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
@@ -447,6 +467,8 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
struct ib_srq_attr *attr, struct mthca_srq *srq);
void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
+int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask);
void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
enum ib_event_type event_type);
void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 8dfafda..e5a047a 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -83,7 +83,8 @@ enum {
MTHCA_EVENT_TYPE_PATH_MIG = 0x01,
MTHCA_EVENT_TYPE_COMM_EST = 0x02,
MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03,
- MTHCA_EVENT_TYPE_SRQ_LAST_WQE = 0x13,
+ MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13,
+ MTHCA_EVENT_TYPE_SRQ_LIMIT = 0x14,
MTHCA_EVENT_TYPE_CQ_ERROR = 0x04,
MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05,
MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06,
@@ -110,8 +111,9 @@ enum {
(1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \
(1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \
(1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
-#define MTHCA_SRQ_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \
- (1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE)
+#define MTHCA_SRQ_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE) | \
+ (1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT))
#define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD)
#define MTHCA_EQ_DB_INC_CI (1 << 24)
@@ -142,6 +144,9 @@ struct mthca_eqe {
__be32 qpn;
} __attribute__((packed)) qp;
struct {
+ __be32 srqn;
+ } __attribute__((packed)) srq;
+ struct {
__be32 cqn;
u32 reserved1;
u8 reserved2[3];
@@ -305,6 +310,16 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
IB_EVENT_SQ_DRAINED);
break;
+ case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_QP_LAST_WQE_REACHED);
+ break;
+
+ case MTHCA_EVENT_TYPE_SRQ_LIMIT:
+ mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff,
+ IB_EVENT_SRQ_LIMIT_REACHED);
+ break;
+
case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR:
mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
IB_EVENT_QP_FATAL);
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 9804174..8561b29 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -46,11 +46,6 @@ enum {
MTHCA_VENDOR_CLASS2 = 0xa
};
-struct mthca_trap_mad {
- struct ib_mad *mad;
- DECLARE_PCI_UNMAP_ADDR(mapping)
-};
-
static void update_sm_ah(struct mthca_dev *dev,
u8 port_num, u16 lid, u8 sl)
{
@@ -116,49 +111,14 @@ static void forward_trap(struct mthca_dev *dev,
struct ib_mad *mad)
{
int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
- struct mthca_trap_mad *tmad;
- struct ib_sge gather_list;
- struct ib_send_wr *bad_wr, wr = {
- .opcode = IB_WR_SEND,
- .sg_list = &gather_list,
- .num_sge = 1,
- .send_flags = IB_SEND_SIGNALED,
- .wr = {
- .ud = {
- .remote_qpn = qpn,
- .remote_qkey = qpn ? IB_QP1_QKEY : 0,
- .timeout_ms = 0
- }
- }
- };
+ struct ib_mad_send_buf *send_buf;
struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
int ret;
unsigned long flags;
if (agent) {
- tmad = kmalloc(sizeof *tmad, GFP_KERNEL);
- if (!tmad)
- return;
-
- tmad->mad = kmalloc(sizeof *tmad->mad, GFP_KERNEL);
- if (!tmad->mad) {
- kfree(tmad);
- return;
- }
-
- memcpy(tmad->mad, mad, sizeof *mad);
-
- wr.wr.ud.mad_hdr = &tmad->mad->mad_hdr;
- wr.wr_id = (unsigned long) tmad;
-
- gather_list.addr = dma_map_single(agent->device->dma_device,
- tmad->mad,
- sizeof *tmad->mad,
- DMA_TO_DEVICE);
- gather_list.length = sizeof *tmad->mad;
- gather_list.lkey = to_mpd(agent->qp->pd)->ntmr.ibmr.lkey;
- pci_unmap_addr_set(tmad, mapping, gather_list.addr);
-
+ send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
+ IB_MGMT_MAD_DATA, GFP_ATOMIC);
/*
* We rely here on the fact that MLX QPs don't use the
* address handle after the send is posted (this is
@@ -166,21 +126,15 @@ static void forward_trap(struct mthca_dev *dev,
* it's OK for our devices).
*/
spin_lock_irqsave(&dev->sm_lock, flags);
- wr.wr.ud.ah = dev->sm_ah[port_num - 1];
- if (wr.wr.ud.ah)
- ret = ib_post_send_mad(agent, &wr, &bad_wr);
+ memcpy(send_buf->mad, mad, sizeof *mad);
+ if ((send_buf->ah = dev->sm_ah[port_num - 1]))
+ ret = ib_post_send_mad(send_buf, NULL);
else
ret = -EINVAL;
spin_unlock_irqrestore(&dev->sm_lock, flags);
- if (ret) {
- dma_unmap_single(agent->device->dma_device,
- pci_unmap_addr(tmad, mapping),
- sizeof *tmad->mad,
- DMA_TO_DEVICE);
- kfree(tmad->mad);
- kfree(tmad);
- }
+ if (ret)
+ ib_free_send_mad(send_buf);
}
}
@@ -267,15 +221,7 @@ int mthca_process_mad(struct ib_device *ibdev,
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
- struct mthca_trap_mad *tmad =
- (void *) (unsigned long) mad_send_wc->wr_id;
-
- dma_unmap_single(agent->device->dma_device,
- pci_unmap_addr(tmad, mapping),
- sizeof *tmad->mad,
- DMA_TO_DEVICE);
- kfree(tmad->mad);
- kfree(tmad);
+ ib_free_send_mad(mad_send_wc->send_buf);
}
int mthca_create_agents(struct mthca_dev *dev)
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 23a3f56..883d1e5 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -162,9 +162,18 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim
mdev->limits.pkey_table_len = dev_lim->max_pkeys;
mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay;
mdev->limits.max_sg = dev_lim->max_sg;
+ mdev->limits.max_wqes = dev_lim->max_qp_sz;
+ mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp;
mdev->limits.reserved_qps = dev_lim->reserved_qps;
+ mdev->limits.max_srq_wqes = dev_lim->max_srq_sz;
mdev->limits.reserved_srqs = dev_lim->reserved_srqs;
mdev->limits.reserved_eecs = dev_lim->reserved_eecs;
+ /*
+ * Subtract 1 from the limit because we need to allocate a
+ * spare CQE so the HCA HW can tell the difference between an
+ * empty CQ and a full CQ.
+ */
+ mdev->limits.max_cqes = dev_lim->max_cq_sz - 1;
mdev->limits.reserved_cqs = dev_lim->reserved_cqs;
mdev->limits.reserved_eqs = dev_lim->reserved_eqs;
mdev->limits.reserved_mtts = dev_lim->reserved_mtts;
@@ -172,6 +181,7 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim
mdev->limits.reserved_uars = dev_lim->reserved_uars;
mdev->limits.reserved_pds = dev_lim->reserved_pds;
mdev->limits.port_width_cap = dev_lim->max_port_width;
+ mdev->limits.flags = dev_lim->flags;
/* IB_DEVICE_RESIZE_MAX_WR not supported by driver.
May be doable since hardware supports it for SRQ.
@@ -1186,6 +1196,7 @@ MODULE_DEVICE_TABLE(pci, mthca_pci_table);
static struct pci_driver mthca_driver = {
.name = DRV_NAME,
+ .owner = THIS_MODULE,
.id_table = mthca_pci_table,
.probe = mthca_init_one,
.remove = __devexit_p(mthca_remove_one)
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
index a270760..b47ea7d 100644
--- a/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -37,10 +37,6 @@
#include "mthca_dev.h"
#include "mthca_cmd.h"
-enum {
- MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
-};
-
struct mthca_mgm {
__be32 next_gid_index;
u32 reserved[3];
@@ -189,7 +185,12 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
- if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) {
+ if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) {
+ mthca_dbg(dev, "QP %06x already a member of MGM\n",
+ ibqp->qp_num);
+ err = 0;
+ goto out;
+ } else if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) {
mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31));
break;
}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 9ad8b3b..d72fe95 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -487,7 +487,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
}
}
-int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db)
+int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
+ u32 qn, __be32 **db)
{
int group;
int start, end, dir;
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index 29433f2..4fdca26 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -173,7 +173,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
int mthca_init_db_tab(struct mthca_dev *dev);
void mthca_cleanup_db_tab(struct mthca_dev *dev);
-int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db);
+int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
+ u32 qn, __be32 **db);
void mthca_free_db(struct mthca_dev *dev, int type, int db_index);
#endif /* MTHCA_MEMFREE_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 3f5319a..1b9477e 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -37,6 +37,7 @@
*/
#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
#include <linux/mm.h>
#include "mthca_dev.h"
@@ -90,15 +91,26 @@ static int mthca_query_device(struct ib_device *ibdev,
props->max_mr_size = ~0ull;
props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps;
- props->max_qp_wr = 0xffff;
+ props->max_qp_wr = mdev->limits.max_wqes;
props->max_sge = mdev->limits.max_sg;
props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
- props->max_cqe = 0xffff;
+ props->max_cqe = mdev->limits.max_cqes;
props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds;
props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift;
- props->max_qp_init_rd_atom = 1 << mdev->qp_table.rdb_shift;
+ props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma;
+ props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
+ props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs;
+ props->max_srq_wr = mdev->limits.max_srq_wqes;
+ props->max_srq_sge = mdev->limits.max_sg;
props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay;
+ props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ?
+ IB_ATOMIC_HCA : IB_ATOMIC_NONE;
+ props->max_pkeys = mdev->limits.pkey_table_len;
+ props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms;
+ props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
+ props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
+ props->max_mcast_grp;
err = 0;
out:
@@ -150,9 +162,13 @@ static int mthca_query_port(struct ib_device *ibdev,
props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len;
props->max_msg_sz = 0x80000000;
props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len;
+ props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
props->active_width = out_mad->data[31] & 0xf;
props->active_speed = out_mad->data[35] >> 4;
+ props->max_mtu = out_mad->data[41] & 0xf;
+ props->active_mtu = out_mad->data[36] >> 4;
+ props->subnet_timeout = out_mad->data[51] & 0x1f;
out:
kfree(in_mad);
@@ -634,6 +650,9 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
int nent;
int err;
+ if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
+ return ERR_PTR(-EINVAL);
+
if (context) {
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
return ERR_PTR(-EFAULT);
@@ -1058,6 +1077,26 @@ int mthca_register_device(struct mthca_dev *dev)
strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
dev->ib_dev.owner = THIS_MODULE;
+ dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION;
+ dev->ib_dev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
dev->ib_dev.node_type = IB_NODE_CA;
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
dev->ib_dev.dma_device = &dev->pdev->dev;
@@ -1077,6 +1116,7 @@ int mthca_register_device(struct mthca_dev *dev)
if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
dev->ib_dev.create_srq = mthca_create_srq;
+ dev->ib_dev.modify_srq = mthca_modify_srq;
dev->ib_dev.destroy_srq = mthca_destroy_srq;
if (mthca_is_memfree(dev))
@@ -1135,10 +1175,13 @@ int mthca_register_device(struct mthca_dev *dev)
}
}
+ mthca_start_catas_poll(dev);
+
return 0;
}
void mthca_unregister_device(struct mthca_dev *dev)
{
+ mthca_stop_catas_poll(dev);
ib_unregister_device(&dev->ib_dev);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 5fa0066..62ff091 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -338,8 +338,7 @@ static const struct {
[UC] = (IB_QP_AV |
IB_QP_PATH_MTU |
IB_QP_DEST_QPN |
- IB_QP_RQ_PSN |
- IB_QP_MAX_DEST_RD_ATOMIC),
+ IB_QP_RQ_PSN),
[RC] = (IB_QP_AV |
IB_QP_PATH_MTU |
IB_QP_DEST_QPN |
@@ -368,8 +367,7 @@ static const struct {
.trans = MTHCA_TRANS_RTR2RTS,
.req_param = {
[UD] = IB_QP_SQ_PSN,
- [UC] = (IB_QP_SQ_PSN |
- IB_QP_MAX_QP_RD_ATOMIC),
+ [UC] = IB_QP_SQ_PSN,
[RC] = (IB_QP_TIMEOUT |
IB_QP_RETRY_CNT |
IB_QP_RNR_RETRY |
@@ -446,8 +444,6 @@ static const struct {
[UD] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[UC] = (IB_QP_AV |
- IB_QP_MAX_QP_RD_ATOMIC |
- IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_CUR_STATE |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
@@ -478,7 +474,7 @@ static const struct {
.opt_param = {
[UD] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
- [UC] = (IB_QP_CUR_STATE),
+ [UC] = IB_QP_CUR_STATE,
[RC] = (IB_QP_CUR_STATE |
IB_QP_MIN_RNR_TIMER),
[MLX] = (IB_QP_CUR_STATE |
@@ -1112,8 +1108,10 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
struct mthca_qp *qp)
{
/* Sanity check QP size before proceeding */
- if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 ||
- cap->max_send_sge > 64 || cap->max_recv_sge > 64)
+ if (cap->max_send_wr > dev->limits.max_wqes ||
+ cap->max_recv_wr > dev->limits.max_wqes ||
+ cap->max_send_sge > dev->limits.max_sg ||
+ cap->max_recv_sge > dev->limits.max_sg)
return -EINVAL;
if (mthca_is_memfree(dev)) {
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 18998d4..64f70aa 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -186,7 +186,8 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
int err;
/* Sanity check SRQ size before proceeding */
- if (attr->max_wr > 16 << 20 || attr->max_sge > 64)
+ if (attr->max_wr > dev->limits.max_srq_wqes ||
+ attr->max_sge > dev->limits.max_sg)
return -EINVAL;
srq->max = attr->max_wr;
@@ -332,6 +333,29 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
mthca_free_mailbox(dev, mailbox);
}
+int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask)
+{
+ struct mthca_dev *dev = to_mdev(ibsrq->device);
+ struct mthca_srq *srq = to_msrq(ibsrq);
+ int ret;
+ u8 status;
+
+ /* We don't support resizing SRQs (yet?) */
+ if (attr_mask & IB_SRQ_MAX_WR)
+ return -EINVAL;
+
+ if (attr_mask & IB_SRQ_LIMIT) {
+ ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status);
+ if (ret)
+ return ret;
+ if (status)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
enum ib_event_type event_type)
{
@@ -354,7 +378,7 @@ void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
event.device = &dev->ib_dev;
event.event = event_type;
- event.element.srq = &srq->ibsrq;
+ event.element.srq = &srq->ibsrq;
srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context);
out:
@@ -415,6 +439,14 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
wqe = get_wqe(srq, ind);
next_ind = *wqe_to_link(wqe);
+
+ if (next_ind < 0) {
+ mthca_err(dev, "SRQ %06x full\n", srq->srqn);
+ err = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
prev_wqe = srq->last;
srq->last = wqe;
@@ -506,6 +538,13 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
wqe = get_wqe(srq, ind);
next_ind = *wqe_to_link(wqe);
+ if (next_ind < 0) {
+ mthca_err(dev, "SRQ %06x full\n", srq->srqn);
+ err = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
((struct mthca_next_seg *) wqe)->nda_op =
cpu_to_be32((next_ind << srq->wqe_shift) | 1);
((struct mthca_next_seg *) wqe)->ee_nds = 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
index 41613ec..bb015c6 100644
--- a/drivers/infiniband/hw/mthca/mthca_user.h
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -38,6 +38,12 @@
#include <linux/types.h>
/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define MTHCA_UVERBS_ABI_VERSION 1
+
+/*
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
* avoid incompatibility between 32-bit userspace and 64-bit kernels).
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 4ea1c1c..c994a91 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -100,7 +100,12 @@ struct ipoib_pseudoheader {
struct ipoib_mcast;
-struct ipoib_buf {
+struct ipoib_rx_buf {
+ struct sk_buff *skb;
+ dma_addr_t mapping;
+};
+
+struct ipoib_tx_buf {
struct sk_buff *skb;
DECLARE_PCI_UNMAP_ADDR(mapping)
};
@@ -150,14 +155,14 @@ struct ipoib_dev_priv {
unsigned int admin_mtu;
unsigned int mcast_mtu;
- struct ipoib_buf *rx_ring;
+ struct ipoib_rx_buf *rx_ring;
- spinlock_t tx_lock;
- struct ipoib_buf *tx_ring;
- unsigned tx_head;
- unsigned tx_tail;
- struct ib_sge tx_sge;
- struct ib_send_wr tx_wr;
+ spinlock_t tx_lock;
+ struct ipoib_tx_buf *tx_ring;
+ unsigned tx_head;
+ unsigned tx_tail;
+ struct ib_sge tx_sge;
+ struct ib_send_wr tx_wr;
struct ib_wc ibwc[IPOIB_NUM_WC];
@@ -277,7 +282,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
int ipoib_mcast_detach(struct net_device *dev, u16 mlid,
union ib_gid *mgid);
-int ipoib_qp_create(struct net_device *dev);
+int ipoib_init_qp(struct net_device *dev);
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
void ipoib_transport_dev_cleanup(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index f744009..192fef8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -95,57 +95,65 @@ void ipoib_free_ah(struct kref *kref)
}
}
-static inline int ipoib_ib_receive(struct ipoib_dev_priv *priv,
- unsigned int wr_id,
- dma_addr_t addr)
+static int ipoib_ib_post_receive(struct net_device *dev, int id)
{
- struct ib_sge list = {
- .addr = addr,
- .length = IPOIB_BUF_SIZE,
- .lkey = priv->mr->lkey,
- };
- struct ib_recv_wr param = {
- .wr_id = wr_id | IPOIB_OP_RECV,
- .sg_list = &list,
- .num_sge = 1,
- };
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ib_sge list;
+ struct ib_recv_wr param;
struct ib_recv_wr *bad_wr;
+ int ret;
+
+ list.addr = priv->rx_ring[id].mapping;
+ list.length = IPOIB_BUF_SIZE;
+ list.lkey = priv->mr->lkey;
+
+ param.next = NULL;
+ param.wr_id = id | IPOIB_OP_RECV;
+ param.sg_list = &list;
+ param.num_sge = 1;
+
+ ret = ib_post_recv(priv->qp, &param, &bad_wr);
+ if (unlikely(ret)) {
+ ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
+ dma_unmap_single(priv->ca->dma_device,
+ priv->rx_ring[id].mapping,
+ IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+ dev_kfree_skb_any(priv->rx_ring[id].skb);
+ priv->rx_ring[id].skb = NULL;
+ }
- return ib_post_recv(priv->qp, &param, &bad_wr);
+ return ret;
}
-static int ipoib_ib_post_receive(struct net_device *dev, int id)
+static int ipoib_alloc_rx_skb(struct net_device *dev, int id)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb;
dma_addr_t addr;
- int ret;
skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4);
- if (!skb) {
- ipoib_warn(priv, "failed to allocate receive buffer\n");
-
- priv->rx_ring[id].skb = NULL;
+ if (!skb)
return -ENOMEM;
- }
- skb_reserve(skb, 4); /* 16 byte align IP header */
- priv->rx_ring[id].skb = skb;
+
+ /*
+ * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte
+ * header. So we need 4 more bytes to get to 48 and align the
+ * IP header to a multiple of 16.
+ */
+ skb_reserve(skb, 4);
+
addr = dma_map_single(priv->ca->dma_device,
skb->data, IPOIB_BUF_SIZE,
DMA_FROM_DEVICE);
- pci_unmap_addr_set(&priv->rx_ring[id], mapping, addr);
-
- ret = ipoib_ib_receive(priv, id, addr);
- if (ret) {
- ipoib_warn(priv, "ipoib_ib_receive failed for buf %d (%d)\n",
- id, ret);
- dma_unmap_single(priv->ca->dma_device, addr,
- IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(addr))) {
dev_kfree_skb_any(skb);
- priv->rx_ring[id].skb = NULL;
+ return -EIO;
}
- return ret;
+ priv->rx_ring[id].skb = skb;
+ priv->rx_ring[id].mapping = addr;
+
+ return 0;
}
static int ipoib_ib_post_receives(struct net_device *dev)
@@ -154,6 +162,10 @@ static int ipoib_ib_post_receives(struct net_device *dev)
int i;
for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) {
+ if (ipoib_alloc_rx_skb(dev, i)) {
+ ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
+ return -ENOMEM;
+ }
if (ipoib_ib_post_receive(dev, i)) {
ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
return -EIO;
@@ -176,28 +188,36 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
wr_id &= ~IPOIB_OP_RECV;
if (wr_id < IPOIB_RX_RING_SIZE) {
- struct sk_buff *skb = priv->rx_ring[wr_id].skb;
-
- priv->rx_ring[wr_id].skb = NULL;
+ struct sk_buff *skb = priv->rx_ring[wr_id].skb;
+ dma_addr_t addr = priv->rx_ring[wr_id].mapping;
- dma_unmap_single(priv->ca->dma_device,
- pci_unmap_addr(&priv->rx_ring[wr_id],
- mapping),
- IPOIB_BUF_SIZE,
- DMA_FROM_DEVICE);
-
- if (wc->status != IB_WC_SUCCESS) {
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
if (wc->status != IB_WC_WR_FLUSH_ERR)
ipoib_warn(priv, "failed recv event "
"(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err);
+ dma_unmap_single(priv->ca->dma_device, addr,
+ IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
dev_kfree_skb_any(skb);
+ priv->rx_ring[wr_id].skb = NULL;
return;
}
+ /*
+ * If we can't allocate a new RX buffer, dump
+ * this packet and reuse the old buffer.
+ */
+ if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
+ ++priv->stats.rx_dropped;
+ goto repost;
+ }
+
ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
wc->byte_len, wc->slid);
+ dma_unmap_single(priv->ca->dma_device, addr,
+ IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+
skb_put(skb, wc->byte_len);
skb_pull(skb, IB_GRH_BYTES);
@@ -220,8 +240,8 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
dev_kfree_skb_any(skb);
}
- /* repost receive */
- if (ipoib_ib_post_receive(dev, wr_id))
+ repost:
+ if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
ipoib_warn(priv, "ipoib_ib_post_receive failed "
"for buf %d\n", wr_id);
} else
@@ -229,7 +249,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
wr_id);
} else {
- struct ipoib_buf *tx_req;
+ struct ipoib_tx_buf *tx_req;
unsigned long flags;
if (wr_id >= IPOIB_TX_RING_SIZE) {
@@ -302,7 +322,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_ah *address, u32 qpn)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ipoib_buf *tx_req;
+ struct ipoib_tx_buf *tx_req;
dma_addr_t addr;
if (skb->len > dev->mtu + INFINIBAND_ALEN) {
@@ -387,9 +407,9 @@ int ipoib_ib_dev_open(struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
- ret = ipoib_qp_create(dev);
+ ret = ipoib_init_qp(dev);
if (ret) {
- ipoib_warn(priv, "ipoib_qp_create returned %d\n", ret);
+ ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
return -1;
}
@@ -468,7 +488,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
struct ib_qp_attr qp_attr;
int attr_mask;
unsigned long begin;
- struct ipoib_buf *tx_req;
+ struct ipoib_tx_buf *tx_req;
int i;
/* Kill the existing QP and allocate a new one */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 6c5bf07..cd4f423 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -637,8 +637,11 @@ static void ipoib_timeout(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- ipoib_warn(priv, "transmit timeout: latency %ld\n",
- jiffies - dev->trans_start);
+ ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
+ jiffies_to_msecs(jiffies - dev->trans_start));
+ ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n",
+ netif_queue_stopped(dev),
+ priv->tx_head, priv->tx_tail);
/* XXX reset QP, etc. */
}
@@ -729,7 +732,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
/* Allocate RX/TX "rings" to hold queued skbs */
- priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf),
+ priv->rx_ring = kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf),
GFP_KERNEL);
if (!priv->rx_ring) {
printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
@@ -737,9 +740,9 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
goto out;
}
memset(priv->rx_ring, 0,
- IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf));
+ IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf));
- priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf),
+ priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf),
GFP_KERNEL);
if (!priv->tx_ring) {
printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
@@ -747,7 +750,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
goto out_rx_ring_cleanup;
}
memset(priv->tx_ring, 0,
- IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf));
+ IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf));
/* priv->tx_head & tx_tail are already 0 */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 79f59d0..b5902a7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -92,7 +92,7 @@ int ipoib_mcast_detach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
return ret;
}
-int ipoib_qp_create(struct net_device *dev)
+int ipoib_init_qp(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
@@ -149,10 +149,11 @@ int ipoib_qp_create(struct net_device *dev)
return 0;
out_fail:
- ib_destroy_qp(priv->qp);
- priv->qp = NULL;
+ qp_attr.qp_state = IB_QPS_RESET;
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+ ipoib_warn(priv, "Failed to modify QP to RESET state\n");
- return -EINVAL;
+ return ret;
}
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index 1c5f19f..f1c3bc5 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -171,10 +171,10 @@ struct sctp_sndrcvinfo {
*/
enum sctp_sinfo_flags {
- MSG_UNORDERED = 1, /* Send/receive message unordered. */
- MSG_ADDR_OVER = 2, /* Override the primary destination. */
- MSG_ABORT=4, /* Send an ABORT message to the peer. */
- /* MSG_EOF is already defined per socket.h */
+ SCTP_UNORDERED = 1, /* Send/receive message unordered. */
+ SCTP_ADDR_OVER = 2, /* Override the primary destination. */
+ SCTP_ABORT=4, /* Send an ABORT message to the peer. */
+ SCTP_EOF=MSG_FIN, /* Initiate graceful shutdown process. */
};
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 5308683..0a9fcd5 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@@ -109,7 +109,6 @@ struct ib_cm_id;
struct ib_cm_req_event_param {
struct ib_cm_id *listen_id;
- struct ib_device *device;
u8 port;
struct ib_sa_path_rec *primary_path;
@@ -220,7 +219,6 @@ struct ib_cm_apr_event_param {
struct ib_cm_sidr_req_event_param {
struct ib_cm_id *listen_id;
- struct ib_device *device;
u8 port;
u16 pkey;
};
@@ -284,6 +282,7 @@ typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id,
struct ib_cm_id {
ib_cm_handler cm_handler;
void *context;
+ struct ib_device *device;
__be64 service_id;
__be64 service_mask;
enum ib_cm_state state; /* internal CM/debug use */
@@ -295,6 +294,8 @@ struct ib_cm_id {
/**
* ib_create_cm_id - Allocate a communication identifier.
+ * @device: Device associated with the cm_id. All related communication will
+ * be associated with the specified device.
* @cm_handler: Callback invoked to notify the user of CM events.
* @context: User specified context associated with the communication
* identifier.
@@ -302,7 +303,8 @@ struct ib_cm_id {
* Communication identifiers are used to track connection states, service
* ID resolution requests, and listen requests.
*/
-struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler,
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+ ib_cm_handler cm_handler,
void *context);
/**
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 4172e68..2c13350 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -109,10 +109,14 @@
#define IB_QP_SET_QKEY 0x80000000
enum {
+ IB_MGMT_MAD_HDR = 24,
IB_MGMT_MAD_DATA = 232,
+ IB_MGMT_RMPP_HDR = 36,
IB_MGMT_RMPP_DATA = 220,
+ IB_MGMT_VENDOR_HDR = 40,
IB_MGMT_VENDOR_DATA = 216,
- IB_MGMT_SA_DATA = 200
+ IB_MGMT_SA_HDR = 56,
+ IB_MGMT_SA_DATA = 200,
};
struct ib_mad_hdr {
@@ -203,26 +207,25 @@ struct ib_class_port_info
/**
* ib_mad_send_buf - MAD data buffer and work request for sends.
- * @mad: References an allocated MAD data buffer. The size of the data
- * buffer is specified in the @send_wr.length field.
- * @mapping: DMA mapping information.
+ * @next: A pointer used to chain together MADs for posting.
+ * @mad: References an allocated MAD data buffer.
* @mad_agent: MAD agent that allocated the buffer.
+ * @ah: The address handle to use when sending the MAD.
* @context: User-controlled context fields.
- * @send_wr: An initialized work request structure used when sending the MAD.
- * The wr_id field of the work request is initialized to reference this
- * data structure.
- * @sge: A scatter-gather list referenced by the work request.
+ * @timeout_ms: Time to wait for a response.
+ * @retries: Number of times to retry a request for a response.
*
* Users are responsible for initializing the MAD buffer itself, with the
* exception of specifying the payload length field in any RMPP MAD.
*/
struct ib_mad_send_buf {
- struct ib_mad *mad;
- DECLARE_PCI_UNMAP_ADDR(mapping)
+ struct ib_mad_send_buf *next;
+ void *mad;
struct ib_mad_agent *mad_agent;
+ struct ib_ah *ah;
void *context[2];
- struct ib_send_wr send_wr;
- struct ib_sge sge;
+ int timeout_ms;
+ int retries;
};
/**
@@ -287,7 +290,7 @@ typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent,
* or @mad_send_wc.
*/
typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
- struct ib_send_wr *send_wr,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_send_wc *mad_send_wc);
/**
@@ -334,13 +337,13 @@ struct ib_mad_agent {
/**
* ib_mad_send_wc - MAD send completion information.
- * @wr_id: Work request identifier associated with the send MAD request.
+ * @send_buf: Send MAD data buffer associated with the send MAD request.
* @status: Completion status.
* @vendor_err: Optional vendor error information returned with a failed
* request.
*/
struct ib_mad_send_wc {
- u64 wr_id;
+ struct ib_mad_send_buf *send_buf;
enum ib_wc_status status;
u32 vendor_err;
};
@@ -366,7 +369,7 @@ struct ib_mad_recv_buf {
* @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers.
* @mad_len: The length of the received MAD, without duplicated headers.
*
- * For received response, the wr_id field of the wc is set to the wr_id
+ * For received response, the wr_id contains a pointer to the ib_mad_send_buf
* for the corresponding send request.
*/
struct ib_mad_recv_wc {
@@ -463,9 +466,9 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent);
/**
* ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
* with the registered client.
- * @mad_agent: Specifies the associated registration to post the send to.
- * @send_wr: Specifies the information needed to send the MAD(s).
- * @bad_send_wr: Specifies the MAD on which an error was encountered.
+ * @send_buf: Specifies the information needed to send the MAD(s).
+ * @bad_send_buf: Specifies the MAD on which an error was encountered. This
+ * parameter is optional if only a single MAD is posted.
*
* Sent MADs are not guaranteed to complete in the order that they were posted.
*
@@ -479,9 +482,8 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent);
* defined data being transferred. The paylen_newwin field should be
* specified in network-byte order.
*/
-int ib_post_send_mad(struct ib_mad_agent *mad_agent,
- struct ib_send_wr *send_wr,
- struct ib_send_wr **bad_send_wr);
+int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
+ struct ib_mad_send_buf **bad_send_buf);
/**
* ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer.
@@ -507,23 +509,25 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc);
/**
* ib_cancel_mad - Cancels an outstanding send MAD operation.
* @mad_agent: Specifies the registration associated with sent MAD.
- * @wr_id: Indicates the work request identifier of the MAD to cancel.
+ * @send_buf: Indicates the MAD to cancel.
*
* MADs will be returned to the user through the corresponding
* ib_mad_send_handler.
*/
-void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id);
+void ib_cancel_mad(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf);
/**
* ib_modify_mad - Modifies an outstanding send MAD operation.
* @mad_agent: Specifies the registration associated with sent MAD.
- * @wr_id: Indicates the work request identifier of the MAD to modify.
+ * @send_buf: Indicates the MAD to modify.
* @timeout_ms: New timeout value for sent MAD.
*
* This call will reset the timeout value for a sent MAD to the specified
* value.
*/
-int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms);
+int ib_modify_mad(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf, u32 timeout_ms);
/**
* ib_redirect_mad_qp - Registers a QP for MAD services.
@@ -572,7 +576,6 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
* @remote_qpn: Specifies the QPN of the receiving node.
* @pkey_index: Specifies which PKey the MAD will be sent using. This field
* is valid only if the remote_qpn is QP 1.
- * @ah: References the address handle used to transfer to the remote node.
* @rmpp_active: Indicates if the send will enable RMPP.
* @hdr_len: Indicates the size of the data header of the MAD. This length
* should include the common MAD header, RMPP header, plus any class
@@ -582,11 +585,10 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
* additional padding that may be necessary.
* @gfp_mask: GFP mask used for the memory allocation.
*
- * This is a helper routine that may be used to allocate a MAD. Users are
- * not required to allocate outbound MADs using this call. The returned
- * MAD send buffer will reference a data buffer usable for sending a MAD, along
+ * This routine allocates a MAD for sending. The returned MAD send buffer
+ * will reference a data buffer usable for sending a MAD, along
* with an initialized work request structure. Users may modify the returned
- * MAD data buffer or work request before posting the send.
+ * MAD data buffer before posting the send.
*
* The returned data buffer will be cleared. Users are responsible for
* initializing the common MAD and any class specific headers. If @rmpp_active
@@ -594,7 +596,7 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
*/
struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
u32 remote_qpn, u16 pkey_index,
- struct ib_ah *ah, int rmpp_active,
+ int rmpp_active,
int hdr_len, int data_len,
gfp_t gfp_mask);
diff --git a/include/rdma/ib_user_cm.h b/include/rdma/ib_user_cm.h
index e4d1654..3037588 100644
--- a/include/rdma/ib_user_cm.h
+++ b/include/rdma/ib_user_cm.h
@@ -38,7 +38,7 @@
#include <linux/types.h>
-#define IB_USER_CM_ABI_VERSION 2
+#define IB_USER_CM_ABI_VERSION 3
enum {
IB_USER_CM_CMD_CREATE_ID,
@@ -299,8 +299,6 @@ struct ib_ucm_event_get {
};
struct ib_ucm_req_event_resp {
- /* device */
- /* port */
struct ib_ucm_path_rec primary_path;
struct ib_ucm_path_rec alternate_path;
__be64 remote_ca_guid;
@@ -316,6 +314,7 @@ struct ib_ucm_req_event_resp {
__u8 retry_count;
__u8 rnr_retry_count;
__u8 srq;
+ __u8 port;
};
struct ib_ucm_rep_event_resp {
@@ -353,10 +352,9 @@ struct ib_ucm_apr_event_resp {
};
struct ib_ucm_sidr_req_event_resp {
- /* device */
- /* port */
__u16 pkey;
- __u8 reserved[2];
+ __u8 port;
+ __u8 reserved;
};
struct ib_ucm_sidr_rep_event_resp {
diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h
index fd85725..072f3a2 100644
--- a/include/rdma/ib_user_verbs.h
+++ b/include/rdma/ib_user_verbs.h
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -42,15 +43,12 @@
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
-#define IB_USER_VERBS_ABI_VERSION 2
+#define IB_USER_VERBS_ABI_VERSION 3
enum {
- IB_USER_VERBS_CMD_QUERY_PARAMS,
IB_USER_VERBS_CMD_GET_CONTEXT,
IB_USER_VERBS_CMD_QUERY_DEVICE,
IB_USER_VERBS_CMD_QUERY_PORT,
- IB_USER_VERBS_CMD_QUERY_GID,
- IB_USER_VERBS_CMD_QUERY_PKEY,
IB_USER_VERBS_CMD_ALLOC_PD,
IB_USER_VERBS_CMD_DEALLOC_PD,
IB_USER_VERBS_CMD_CREATE_AH,
@@ -65,6 +63,7 @@ enum {
IB_USER_VERBS_CMD_ALLOC_MW,
IB_USER_VERBS_CMD_BIND_MW,
IB_USER_VERBS_CMD_DEALLOC_MW,
+ IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL,
IB_USER_VERBS_CMD_CREATE_CQ,
IB_USER_VERBS_CMD_RESIZE_CQ,
IB_USER_VERBS_CMD_DESTROY_CQ,
@@ -90,8 +89,11 @@ enum {
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
* avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
+ * Specifically:
+ * - Do not use pointer types -- pass pointers in __u64 instead.
+ * - Make sure that any structure larger than 4 bytes is padded to a
+ * multiple of 8 bytes. Otherwise the structure size will be
+ * different between 32-bit and 64-bit architectures.
*/
struct ib_uverbs_async_event_desc {
@@ -118,27 +120,14 @@ struct ib_uverbs_cmd_hdr {
__u16 out_words;
};
-/*
- * No driver_data for "query params" command, since this is intended
- * to be a core function with no possible device dependence.
- */
-struct ib_uverbs_query_params {
- __u64 response;
-};
-
-struct ib_uverbs_query_params_resp {
- __u32 num_cq_events;
-};
-
struct ib_uverbs_get_context {
__u64 response;
- __u64 cq_fd_tab;
__u64 driver_data[0];
};
struct ib_uverbs_get_context_resp {
__u32 async_fd;
- __u32 reserved;
+ __u32 num_comp_vectors;
};
struct ib_uverbs_query_device {
@@ -220,31 +209,6 @@ struct ib_uverbs_query_port_resp {
__u8 reserved[3];
};
-struct ib_uverbs_query_gid {
- __u64 response;
- __u8 port_num;
- __u8 index;
- __u8 reserved[6];
- __u64 driver_data[0];
-};
-
-struct ib_uverbs_query_gid_resp {
- __u8 gid[16];
-};
-
-struct ib_uverbs_query_pkey {
- __u64 response;
- __u8 port_num;
- __u8 index;
- __u8 reserved[6];
- __u64 driver_data[0];
-};
-
-struct ib_uverbs_query_pkey_resp {
- __u16 pkey;
- __u16 reserved;
-};
-
struct ib_uverbs_alloc_pd {
__u64 response;
__u64 driver_data[0];
@@ -278,11 +242,21 @@ struct ib_uverbs_dereg_mr {
__u32 mr_handle;
};
+struct ib_uverbs_create_comp_channel {
+ __u64 response;
+};
+
+struct ib_uverbs_create_comp_channel_resp {
+ __u32 fd;
+};
+
struct ib_uverbs_create_cq {
__u64 response;
__u64 user_handle;
__u32 cqe;
- __u32 event_handler;
+ __u32 comp_vector;
+ __s32 comp_channel;
+ __u32 reserved;
__u64 driver_data[0];
};
@@ -291,6 +265,41 @@ struct ib_uverbs_create_cq_resp {
__u32 cqe;
};
+struct ib_uverbs_poll_cq {
+ __u64 response;
+ __u32 cq_handle;
+ __u32 ne;
+};
+
+struct ib_uverbs_wc {
+ __u64 wr_id;
+ __u32 status;
+ __u32 opcode;
+ __u32 vendor_err;
+ __u32 byte_len;
+ __u32 imm_data;
+ __u32 qp_num;
+ __u32 src_qp;
+ __u32 wc_flags;
+ __u16 pkey_index;
+ __u16 slid;
+ __u8 sl;
+ __u8 dlid_path_bits;
+ __u8 port_num;
+ __u8 reserved;
+};
+
+struct ib_uverbs_poll_cq_resp {
+ __u32 count;
+ __u32 reserved;
+ struct ib_uverbs_wc wc[0];
+};
+
+struct ib_uverbs_req_notify_cq {
+ __u32 cq_handle;
+ __u32 solicited_only;
+};
+
struct ib_uverbs_destroy_cq {
__u64 response;
__u32 cq_handle;
@@ -388,6 +397,127 @@ struct ib_uverbs_destroy_qp_resp {
__u32 events_reported;
};
+/*
+ * The ib_uverbs_sge structure isn't used anywhere, since we assume
+ * the ib_sge structure is packed the same way on 32-bit and 64-bit
+ * architectures in both kernel and user space. It's just here to
+ * document the ABI.
+ */
+struct ib_uverbs_sge {
+ __u64 addr;
+ __u32 length;
+ __u32 lkey;
+};
+
+struct ib_uverbs_send_wr {
+ __u64 wr_id;
+ __u32 num_sge;
+ __u32 opcode;
+ __u32 send_flags;
+ __u32 imm_data;
+ union {
+ struct {
+ __u64 remote_addr;
+ __u32 rkey;
+ __u32 reserved;
+ } rdma;
+ struct {
+ __u64 remote_addr;
+ __u64 compare_add;
+ __u64 swap;
+ __u32 rkey;
+ __u32 reserved;
+ } atomic;
+ struct {
+ __u32 ah;
+ __u32 remote_qpn;
+ __u32 remote_qkey;
+ __u32 reserved;
+ } ud;
+ } wr;
+};
+
+struct ib_uverbs_post_send {
+ __u64 response;
+ __u32 qp_handle;
+ __u32 wr_count;
+ __u32 sge_count;
+ __u32 wqe_size;
+ struct ib_uverbs_send_wr send_wr[0];
+};
+
+struct ib_uverbs_post_send_resp {
+ __u32 bad_wr;
+};
+
+struct ib_uverbs_recv_wr {
+ __u64 wr_id;
+ __u32 num_sge;
+ __u32 reserved;
+};
+
+struct ib_uverbs_post_recv {
+ __u64 response;
+ __u32 qp_handle;
+ __u32 wr_count;
+ __u32 sge_count;
+ __u32 wqe_size;
+ struct ib_uverbs_recv_wr recv_wr[0];
+};
+
+struct ib_uverbs_post_recv_resp {
+ __u32 bad_wr;
+};
+
+struct ib_uverbs_post_srq_recv {
+ __u64 response;
+ __u32 srq_handle;
+ __u32 wr_count;
+ __u32 sge_count;
+ __u32 wqe_size;
+ struct ib_uverbs_recv_wr recv[0];
+};
+
+struct ib_uverbs_post_srq_recv_resp {
+ __u32 bad_wr;
+};
+
+struct ib_uverbs_global_route {
+ __u8 dgid[16];
+ __u32 flow_label;
+ __u8 sgid_index;
+ __u8 hop_limit;
+ __u8 traffic_class;
+ __u8 reserved;
+};
+
+struct ib_uverbs_ah_attr {
+ struct ib_uverbs_global_route grh;
+ __u16 dlid;
+ __u8 sl;
+ __u8 src_path_bits;
+ __u8 static_rate;
+ __u8 is_global;
+ __u8 port_num;
+ __u8 reserved;
+};
+
+struct ib_uverbs_create_ah {
+ __u64 response;
+ __u64 user_handle;
+ __u32 pd_handle;
+ __u32 reserved;
+ struct ib_uverbs_ah_attr attr;
+};
+
+struct ib_uverbs_create_ah_resp {
+ __u32 ah_handle;
+};
+
+struct ib_uverbs_destroy_ah {
+ __u32 ah_handle;
+};
+
struct ib_uverbs_attach_mcast {
__u8 gid[16];
__u32 qp_handle;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e6f4c9e..f72d46d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -595,11 +595,8 @@ struct ib_send_wr {
} atomic;
struct {
struct ib_ah *ah;
- struct ib_mad_hdr *mad_hdr;
u32 remote_qpn;
u32 remote_qkey;
- int timeout_ms; /* valid for MADs only */
- int retries; /* valid for MADs only */
u16 pkey_index; /* valid for GSI only */
u8 port_num; /* valid for DR SMPs on switch only */
} ud;
@@ -951,6 +948,9 @@ struct ib_device {
IB_DEV_UNREGISTERED
} reg_state;
+ u64 uverbs_cmd_mask;
+ int uverbs_abi_ver;
+
u8 node_type;
u8 phys_port_cnt;
};
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 10e82ec..660c61b 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -554,7 +554,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
dp.ppid = sinfo->sinfo_ppid;
/* Set the flags for an unordered send. */
- if (sinfo->sinfo_flags & MSG_UNORDERED) {
+ if (sinfo->sinfo_flags & SCTP_UNORDERED) {
flags |= SCTP_DATA_UNORDERED;
dp.ssn = 0;
} else
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 02e068d..b529af5 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1010,6 +1010,19 @@ static int __sctp_connect(struct sock* sk,
err = -EAGAIN;
goto out_free;
}
+ } else {
+ /*
+ * If an unprivileged user inherits a 1-many
+ * style socket with open associations on a
+ * privileged port, it MAY be permitted to
+ * accept new associations, but it SHOULD NOT
+ * be permitted to open new associations.
+ */
+ if (ep->base.bind_addr.port < PROT_SOCK &&
+ !capable(CAP_NET_BIND_SERVICE)) {
+ err = -EACCES;
+ goto out_free;
+ }
}
scope = sctp_scope(&to);
@@ -1389,27 +1402,27 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
SCTP_DEBUG_PRINTK("msg_len: %zu, sinfo_flags: 0x%x\n",
msg_len, sinfo_flags);
- /* MSG_EOF or MSG_ABORT cannot be set on a TCP-style socket. */
- if (sctp_style(sk, TCP) && (sinfo_flags & (MSG_EOF | MSG_ABORT))) {
+ /* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */
+ if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) {
err = -EINVAL;
goto out_nounlock;
}
- /* If MSG_EOF is set, no data can be sent. Disallow sending zero
- * length messages when MSG_EOF|MSG_ABORT is not set.
- * If MSG_ABORT is set, the message length could be non zero with
+ /* If SCTP_EOF is set, no data can be sent. Disallow sending zero
+ * length messages when SCTP_EOF|SCTP_ABORT is not set.
+ * If SCTP_ABORT is set, the message length could be non zero with
* the msg_iov set to the user abort reason.
*/
- if (((sinfo_flags & MSG_EOF) && (msg_len > 0)) ||
- (!(sinfo_flags & (MSG_EOF|MSG_ABORT)) && (msg_len == 0))) {
+ if (((sinfo_flags & SCTP_EOF) && (msg_len > 0)) ||
+ (!(sinfo_flags & (SCTP_EOF|SCTP_ABORT)) && (msg_len == 0))) {
err = -EINVAL;
goto out_nounlock;
}
- /* If MSG_ADDR_OVER is set, there must be an address
+ /* If SCTP_ADDR_OVER is set, there must be an address
* specified in msg_name.
*/
- if ((sinfo_flags & MSG_ADDR_OVER) && (!msg->msg_name)) {
+ if ((sinfo_flags & SCTP_ADDR_OVER) && (!msg->msg_name)) {
err = -EINVAL;
goto out_nounlock;
}
@@ -1458,14 +1471,14 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
goto out_unlock;
}
- if (sinfo_flags & MSG_EOF) {
+ if (sinfo_flags & SCTP_EOF) {
SCTP_DEBUG_PRINTK("Shutting down association: %p\n",
asoc);
sctp_primitive_SHUTDOWN(asoc, NULL);
err = 0;
goto out_unlock;
}
- if (sinfo_flags & MSG_ABORT) {
+ if (sinfo_flags & SCTP_ABORT) {
SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc);
sctp_primitive_ABORT(asoc, msg);
err = 0;
@@ -1477,7 +1490,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
if (!asoc) {
SCTP_DEBUG_PRINTK("There is no association yet.\n");
- if (sinfo_flags & (MSG_EOF | MSG_ABORT)) {
+ if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) {
err = -EINVAL;
goto out_unlock;
}
@@ -1515,6 +1528,19 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
err = -EAGAIN;
goto out_unlock;
}
+ } else {
+ /*
+ * If an unprivileged user inherits a one-to-many
+ * style socket with open associations on a privileged
+ * port, it MAY be permitted to accept new associations,
+ * but it SHOULD NOT be permitted to open new
+ * associations.
+ */
+ if (ep->base.bind_addr.port < PROT_SOCK &&
+ !capable(CAP_NET_BIND_SERVICE)) {
+ err = -EACCES;
+ goto out_unlock;
+ }
}
scope = sctp_scope(&to);
@@ -1611,10 +1637,10 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
/* If an address is passed with the sendto/sendmsg call, it is used
* to override the primary destination address in the TCP model, or
- * when MSG_ADDR_OVER flag is set in the UDP model.
+ * when SCTP_ADDR_OVER flag is set in the UDP model.
*/
if ((sctp_style(sk, TCP) && msg_name) ||
- (sinfo_flags & MSG_ADDR_OVER)) {
+ (sinfo_flags & SCTP_ADDR_OVER)) {
chunk_tp = sctp_assoc_lookup_paddr(asoc, &to);
if (!chunk_tp) {
err = -EINVAL;
@@ -2306,16 +2332,14 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl
return -EINVAL;
if (get_user(val, (int __user *)optval))
return -EFAULT;
- if ((val < 8) || (val > SCTP_MAX_CHUNK_LEN))
+ if ((val != 0) && ((val < 8) || (val > SCTP_MAX_CHUNK_LEN)))
return -EINVAL;
sp->user_frag = val;
- if (val) {
- /* Update the frag_point of the existing associations. */
- list_for_each(pos, &(sp->ep->asocs)) {
- asoc = list_entry(pos, struct sctp_association, asocs);
- asoc->frag_point = sctp_frag_point(sp, asoc->pmtu);
- }
+ /* Update the frag_point of the existing associations. */
+ list_for_each(pos, &(sp->ep->asocs)) {
+ asoc = list_entry(pos, struct sctp_association, asocs);
+ asoc->frag_point = sctp_frag_point(sp, asoc->pmtu);
}
return 0;
@@ -2384,14 +2408,14 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
static int sctp_setsockopt_adaption_layer(struct sock *sk, char __user *optval,
int optlen)
{
- __u32 val;
+ struct sctp_setadaption adaption;
- if (optlen < sizeof(__u32))
+ if (optlen != sizeof(struct sctp_setadaption))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(__u32)))
+ if (copy_from_user(&adaption, optval, optlen))
return -EFAULT;
- sctp_sk(sk)->adaption_ind = val;
+ sctp_sk(sk)->adaption_ind = adaption.ssb_adaption_ind;
return 0;
}
@@ -3672,17 +3696,15 @@ static int sctp_getsockopt_primary_addr(struct sock *sk, int len,
static int sctp_getsockopt_adaption_layer(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
- __u32 val;
+ struct sctp_setadaption adaption;
- if (len < sizeof(__u32))
+ if (len != sizeof(struct sctp_setadaption))
return -EINVAL;
- len = sizeof(__u32);
- val = sctp_sk(sk)->adaption_ind;
- if (put_user(len, optlen))
- return -EFAULT;
- if (copy_to_user(optval, &val, len))
+ adaption.ssb_adaption_ind = sctp_sk(sk)->adaption_ind;
+ if (copy_to_user(optval, &adaption, len))
return -EFAULT;
+
return 0;
}
@@ -4640,8 +4662,8 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg,
/* Minimally, validate the sinfo_flags. */
if (cmsgs->info->sinfo_flags &
- ~(MSG_UNORDERED | MSG_ADDR_OVER |
- MSG_ABORT | MSG_EOF))
+ ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+ SCTP_ABORT | SCTP_EOF))
return -EINVAL;
break;
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 057e7fa..e049f41 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -698,7 +698,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
event->ssn = ntohs(chunk->subh.data_hdr->ssn);
event->ppid = chunk->subh.data_hdr->ppid;
if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) {
- event->flags |= MSG_UNORDERED;
+ event->flags |= SCTP_UNORDERED;
event->cumtsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
}
event->tsn = ntohl(chunk->subh.data_hdr->tsn);
@@ -824,7 +824,7 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
*
* recvmsg() flags:
*
- * MSG_UNORDERED - This flag is present when the message was sent
+ * SCTP_UNORDERED - This flag is present when the message was sent
* non-ordered.
*/
sinfo.sinfo_flags = event->flags;
@@ -839,7 +839,7 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
* This field will hold the current cumulative TSN as
* known by the underlying SCTP layer. Note this field is
* ignored when sending and only valid for a receive
- * operation when sinfo_flags are set to MSG_UNORDERED.
+ * operation when sinfo_flags are set to SCTP_UNORDERED.
*/
sinfo.sinfo_cumtsn = event->cumtsn;
/* sinfo_assoc_id: sizeof (sctp_assoc_t)
OpenPOWER on IntegriCloud