summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Makefile19
-rw-r--r--drivers/infiniband/core/sa_query.c2
-rw-r--r--drivers/infiniband/core/sysfs.c85
-rw-r--r--drivers/infiniband/core/user_mad.c75
-rw-r--r--drivers/infiniband/core/verbs.c8
-rw-r--r--drivers/infiniband/hw/Makefile12
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c6
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c1
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c17
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c4
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c5
-rw-r--r--drivers/infiniband/hw/cxgb4/user.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sdma.c4
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c2
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c6
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c40
-rw-r--r--drivers/infiniband/hw/mlx4/main.c26
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h1
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c102
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c7
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c105
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c13
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h13
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c76
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c42
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c14
-rw-r--r--drivers/infiniband/hw/mlx5/user.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c9
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c3
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c3
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c18
-rw-r--r--drivers/infiniband/ulp/Makefile5
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c18
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c105
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h10
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c89
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c672
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h94
43 files changed, 1209 insertions, 526 deletions
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index bf508b5..dc21836 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -1,18 +1,3 @@
obj-$(CONFIG_INFINIBAND) += core/
-obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/
-obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/
-obj-$(CONFIG_INFINIBAND_QIB) += hw/qib/
-obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/
-obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/
-obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/
-obj-$(CONFIG_INFINIBAND_CXGB4) += hw/cxgb4/
-obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/
-obj-$(CONFIG_MLX5_INFINIBAND) += hw/mlx5/
-obj-$(CONFIG_INFINIBAND_NES) += hw/nes/
-obj-$(CONFIG_INFINIBAND_OCRDMA) += hw/ocrdma/
-obj-$(CONFIG_INFINIBAND_USNIC) += hw/usnic/
-obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
-obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/
-obj-$(CONFIG_INFINIBAND_SRPT) += ulp/srpt/
-obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/
-obj-$(CONFIG_INFINIBAND_ISERT) += ulp/isert/
+obj-$(CONFIG_INFINIBAND) += hw/
+obj-$(CONFIG_INFINIBAND) += ulp/
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index f820958..233eaf5 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -618,7 +618,7 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
{
- bool preload = gfp_mask & __GFP_WAIT;
+ bool preload = !!(gfp_mask & __GFP_WAIT);
unsigned long flags;
int ret, id;
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 7d3292c..cbd0383 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -429,15 +429,19 @@ static void ib_port_release(struct kobject *kobj)
struct attribute *a;
int i;
- for (i = 0; (a = p->gid_group.attrs[i]); ++i)
- kfree(a);
+ if (p->gid_group.attrs) {
+ for (i = 0; (a = p->gid_group.attrs[i]); ++i)
+ kfree(a);
- kfree(p->gid_group.attrs);
+ kfree(p->gid_group.attrs);
+ }
- for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
- kfree(a);
+ if (p->pkey_group.attrs) {
+ for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
+ kfree(a);
- kfree(p->pkey_group.attrs);
+ kfree(p->pkey_group.attrs);
+ }
kfree(p);
}
@@ -534,10 +538,12 @@ static int add_port(struct ib_device *device, int port_num,
p->port_num = port_num;
ret = kobject_init_and_add(&p->kobj, &port_type,
- kobject_get(device->ports_parent),
+ device->ports_parent,
"%d", port_num);
- if (ret)
- goto err_put;
+ if (ret) {
+ kfree(p);
+ return ret;
+ }
ret = sysfs_create_group(&p->kobj, &pma_group);
if (ret)
@@ -585,6 +591,7 @@ err_free_pkey:
kfree(p->pkey_group.attrs[i]);
kfree(p->pkey_group.attrs);
+ p->pkey_group.attrs = NULL;
err_remove_gid:
sysfs_remove_group(&p->kobj, &p->gid_group);
@@ -594,13 +601,13 @@ err_free_gid:
kfree(p->gid_group.attrs[i]);
kfree(p->gid_group.attrs);
+ p->gid_group.attrs = NULL;
err_remove_pma:
sysfs_remove_group(&p->kobj, &pma_group);
err_put:
- kobject_put(device->ports_parent);
- kfree(p);
+ kobject_put(&p->kobj);
return ret;
}
@@ -809,6 +816,22 @@ static struct attribute_group iw_stats_group = {
.attrs = iw_proto_stats_attrs,
};
+static void free_port_list_attributes(struct ib_device *device)
+{
+ struct kobject *p, *t;
+
+ list_for_each_entry_safe(p, t, &device->port_list, entry) {
+ struct ib_port *port = container_of(p, struct ib_port, kobj);
+ list_del(&p->entry);
+ sysfs_remove_group(p, &pma_group);
+ sysfs_remove_group(p, &port->pkey_group);
+ sysfs_remove_group(p, &port->gid_group);
+ kobject_put(p);
+ }
+
+ kobject_put(device->ports_parent);
+}
+
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
@@ -835,7 +858,7 @@ int ib_device_register_sysfs(struct ib_device *device,
}
device->ports_parent = kobject_create_and_add("ports",
- kobject_get(&class_dev->kobj));
+ &class_dev->kobj);
if (!device->ports_parent) {
ret = -ENOMEM;
goto err_put;
@@ -862,21 +885,7 @@ int ib_device_register_sysfs(struct ib_device *device,
return 0;
err_put:
- {
- struct kobject *p, *t;
- struct ib_port *port;
-
- list_for_each_entry_safe(p, t, &device->port_list, entry) {
- list_del(&p->entry);
- port = container_of(p, struct ib_port, kobj);
- sysfs_remove_group(p, &pma_group);
- sysfs_remove_group(p, &port->pkey_group);
- sysfs_remove_group(p, &port->gid_group);
- kobject_put(p);
- }
- }
-
- kobject_put(&class_dev->kobj);
+ free_port_list_attributes(device);
err_unregister:
device_unregister(class_dev);
@@ -887,22 +896,18 @@ err:
void ib_device_unregister_sysfs(struct ib_device *device)
{
- struct kobject *p, *t;
- struct ib_port *port;
-
/* Hold kobject until ib_dealloc_device() */
- kobject_get(&device->dev.kobj);
+ struct kobject *kobj_dev = kobject_get(&device->dev.kobj);
+ int i;
- list_for_each_entry_safe(p, t, &device->port_list, entry) {
- list_del(&p->entry);
- port = container_of(p, struct ib_port, kobj);
- sysfs_remove_group(p, &pma_group);
- sysfs_remove_group(p, &port->pkey_group);
- sysfs_remove_group(p, &port->gid_group);
- kobject_put(p);
- }
+ if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats)
+ sysfs_remove_group(kobj_dev, &iw_stats_group);
+
+ free_port_list_attributes(device);
+
+ for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
+ device_remove_file(&device->dev, ib_class_attributes[i]);
- kobject_put(device->ports_parent);
device_unregister(&device->dev);
}
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index f0d588f..1acb991 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -98,7 +98,7 @@ struct ib_umad_port {
struct ib_umad_device {
int start_port, end_port;
- struct kref ref;
+ struct kobject kobj;
struct ib_umad_port port[0];
};
@@ -134,14 +134,18 @@ static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
static void ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device);
-static void ib_umad_release_dev(struct kref *ref)
+static void ib_umad_release_dev(struct kobject *kobj)
{
struct ib_umad_device *dev =
- container_of(ref, struct ib_umad_device, ref);
+ container_of(kobj, struct ib_umad_device, kobj);
kfree(dev);
}
+static struct kobj_type ib_umad_dev_ktype = {
+ .release = ib_umad_release_dev,
+};
+
static int hdr_size(struct ib_umad_file *file)
{
return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) :
@@ -780,27 +784,19 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
{
struct ib_umad_port *port;
struct ib_umad_file *file;
- int ret;
+ int ret = -ENXIO;
port = container_of(inode->i_cdev, struct ib_umad_port, cdev);
- if (port)
- kref_get(&port->umad_dev->ref);
- else
- return -ENXIO;
mutex_lock(&port->file_mutex);
- if (!port->ib_dev) {
- ret = -ENXIO;
+ if (!port->ib_dev)
goto out;
- }
+ ret = -ENOMEM;
file = kzalloc(sizeof *file, GFP_KERNEL);
- if (!file) {
- kref_put(&port->umad_dev->ref, ib_umad_release_dev);
- ret = -ENOMEM;
+ if (!file)
goto out;
- }
mutex_init(&file->mutex);
spin_lock_init(&file->send_lock);
@@ -814,6 +810,13 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
list_add_tail(&file->port_list, &port->file_list);
ret = nonseekable_open(inode, filp);
+ if (ret) {
+ list_del(&file->port_list);
+ kfree(file);
+ goto out;
+ }
+
+ kobject_get(&port->umad_dev->kobj);
out:
mutex_unlock(&port->file_mutex);
@@ -852,7 +855,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
mutex_unlock(&file->port->file_mutex);
kfree(file);
- kref_put(&dev->ref, ib_umad_release_dev);
+ kobject_put(&dev->kobj);
return 0;
}
@@ -880,10 +883,6 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
int ret;
port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev);
- if (port)
- kref_get(&port->umad_dev->ref);
- else
- return -ENXIO;
if (filp->f_flags & O_NONBLOCK) {
if (down_trylock(&port->sm_sem)) {
@@ -898,17 +897,27 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
}
ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
- if (ret) {
- up(&port->sm_sem);
- goto fail;
- }
+ if (ret)
+ goto err_up_sem;
filp->private_data = port;
- return nonseekable_open(inode, filp);
+ ret = nonseekable_open(inode, filp);
+ if (ret)
+ goto err_clr_sm_cap;
+
+ kobject_get(&port->umad_dev->kobj);
+
+ return 0;
+
+err_clr_sm_cap:
+ swap(props.set_port_cap_mask, props.clr_port_cap_mask);
+ ib_modify_port(port->ib_dev, port->port_num, 0, &props);
+
+err_up_sem:
+ up(&port->sm_sem);
fail:
- kref_put(&port->umad_dev->ref, ib_umad_release_dev);
return ret;
}
@@ -927,7 +936,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
up(&port->sm_sem);
- kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+ kobject_put(&port->umad_dev->kobj);
return ret;
}
@@ -995,6 +1004,7 @@ static int find_overflow_devnum(void)
}
static int ib_umad_init_port(struct ib_device *device, int port_num,
+ struct ib_umad_device *umad_dev,
struct ib_umad_port *port)
{
int devnum;
@@ -1027,6 +1037,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
cdev_init(&port->cdev, &umad_fops);
port->cdev.owner = THIS_MODULE;
+ port->cdev.kobj.parent = &umad_dev->kobj;
kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
if (cdev_add(&port->cdev, base, 1))
goto err_cdev;
@@ -1045,6 +1056,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
base += IB_UMAD_MAX_PORTS;
cdev_init(&port->sm_cdev, &umad_sm_fops);
port->sm_cdev.owner = THIS_MODULE;
+ port->sm_cdev.kobj.parent = &umad_dev->kobj;
kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
if (cdev_add(&port->sm_cdev, base, 1))
goto err_sm_cdev;
@@ -1138,7 +1150,7 @@ static void ib_umad_add_one(struct ib_device *device)
if (!umad_dev)
return;
- kref_init(&umad_dev->ref);
+ kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype);
umad_dev->start_port = s;
umad_dev->end_port = e;
@@ -1146,7 +1158,8 @@ static void ib_umad_add_one(struct ib_device *device)
for (i = s; i <= e; ++i) {
umad_dev->port[i - s].umad_dev = umad_dev;
- if (ib_umad_init_port(device, i, &umad_dev->port[i - s]))
+ if (ib_umad_init_port(device, i, umad_dev,
+ &umad_dev->port[i - s]))
goto err;
}
@@ -1158,7 +1171,7 @@ err:
while (--i >= s)
ib_umad_kill_port(&umad_dev->port[i - s]);
- kref_put(&umad_dev->ref, ib_umad_release_dev);
+ kobject_put(&umad_dev->kobj);
}
static void ib_umad_remove_one(struct ib_device *device)
@@ -1172,7 +1185,7 @@ static void ib_umad_remove_one(struct ib_device *device)
for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i)
ib_umad_kill_port(&umad_dev->port[i]);
- kref_put(&umad_dev->ref, ib_umad_release_dev);
+ kobject_put(&umad_dev->kobj);
}
static char *umad_devnode(struct device *dev, umode_t *mode)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 92525f8..c2b89cc 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -48,7 +48,7 @@
#include "core_priv.h"
-int ib_rate_to_mult(enum ib_rate rate)
+__attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
{
switch (rate) {
case IB_RATE_2_5_GBPS: return 1;
@@ -65,7 +65,7 @@ int ib_rate_to_mult(enum ib_rate rate)
}
EXPORT_SYMBOL(ib_rate_to_mult);
-enum ib_rate mult_to_ib_rate(int mult)
+__attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
{
switch (mult) {
case 1: return IB_RATE_2_5_GBPS;
@@ -82,7 +82,7 @@ enum ib_rate mult_to_ib_rate(int mult)
}
EXPORT_SYMBOL(mult_to_ib_rate);
-int ib_rate_to_mbps(enum ib_rate rate)
+__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
{
switch (rate) {
case IB_RATE_2_5_GBPS: return 2500;
@@ -107,7 +107,7 @@ int ib_rate_to_mbps(enum ib_rate rate)
}
EXPORT_SYMBOL(ib_rate_to_mbps);
-enum rdma_transport_type
+__attribute_const__ enum rdma_transport_type
rdma_node_get_transport(enum rdma_node_type node_type)
{
switch (node_type) {
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
new file mode 100644
index 0000000..e900b03
--- /dev/null
+++ b/drivers/infiniband/hw/Makefile
@@ -0,0 +1,12 @@
+obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/
+obj-$(CONFIG_INFINIBAND_IPATH) += ipath/
+obj-$(CONFIG_INFINIBAND_QIB) += qib/
+obj-$(CONFIG_INFINIBAND_EHCA) += ehca/
+obj-$(CONFIG_INFINIBAND_AMSO1100) += amso1100/
+obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/
+obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/
+obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/
+obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
+obj-$(CONFIG_INFINIBAND_NES) += nes/
+obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/
+obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index c3f5aca..de1c61b4 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -735,14 +735,12 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) |
V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
V_TPT_PAGE_SIZE(page_size));
- tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
- cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
+ tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
tpt.len = cpu_to_be32(len);
tpt.va_hi = cpu_to_be32((u32) (to >> 32));
tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
tpt.rsvd_bind_cnt_or_pstag = 0;
- tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
- cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
+ tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
}
err = cxio_hal_ctrl_qp_write_mem(rdev_p,
stag_idx +
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 095bb04..cb78b1e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -418,6 +418,7 @@ static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
skb->priority = CPL_PRIORITY_DATA;
set_arp_failure_handler(skb, abort_arp_failure);
req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req));
+ memset(req, 0, sizeof(*req));
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 7de6c9f..96d7131 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -47,6 +47,8 @@
#include <net/ip6_route.h>
#include <net/addrconf.h>
+#include <rdma/ib_addr.h>
+
#include "iw_cxgb4.h"
static char *states[] = {
@@ -347,10 +349,7 @@ static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
static struct net_device *get_real_dev(struct net_device *egress_dev)
{
- struct net_device *phys_dev = egress_dev;
- if (egress_dev->priv_flags & IFF_802_1Q_VLAN)
- phys_dev = vlan_dev_real_dev(egress_dev);
- return phys_dev;
+ return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
}
static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
@@ -1788,16 +1787,16 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
if (!ep->l2t)
goto out;
ep->mtu = dst_mtu(dst);
- ep->tx_chan = cxgb4_port_chan(n->dev);
- ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1;
+ ep->tx_chan = cxgb4_port_chan(pdev);
+ ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
step = cdev->rdev.lldi.ntxq /
cdev->rdev.lldi.nchan;
- ep->txq_idx = cxgb4_port_idx(n->dev) * step;
- ep->ctrlq_idx = cxgb4_port_idx(n->dev);
+ ep->txq_idx = cxgb4_port_idx(pdev) * step;
+ ep->ctrlq_idx = cxgb4_port_idx(pdev);
step = cdev->rdev.lldi.nrxq /
cdev->rdev.lldi.nchan;
ep->rss_qid = cdev->rdev.lldi.rxq_ids[
- cxgb4_port_idx(n->dev) * step];
+ cxgb4_port_idx(pdev) * step];
if (clear_mpa_v1) {
ep->retry_with_mpa_v1 = 0;
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index cfaa56a..7151a02 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -940,7 +940,6 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
if (!mm2)
goto err4;
- memset(&uresp, 0, sizeof(uresp));
uresp.qid_mask = rhp->rdev.cqmask;
uresp.cqid = chp->cq.cqid;
uresp.size = chp->cq.size;
@@ -951,7 +950,8 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
uresp.gts_key = ucontext->key;
ucontext->key += PAGE_SIZE;
spin_unlock(&ucontext->mmap_lock);
- ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
+ ret = ib_copy_to_udata(udata, &uresp,
+ sizeof(uresp) - sizeof(uresp.reserved));
if (ret)
goto err5;
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index bd749e7..dd93aad 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -779,6 +779,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
pci_resource_len(devp->rdev.lldi.pdev, 2));
if (!devp->rdev.bar2_kva) {
pr_err(MOD "Unable to ioremap BAR2\n");
+ ib_dealloc_device(&devp->ibdev);
return ERR_PTR(-EINVAL);
}
} else if (ocqp_supported(infop)) {
@@ -790,6 +791,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
devp->rdev.lldi.vr->ocq.size);
if (!devp->rdev.oc_mw_kva) {
pr_err(MOD "Unable to ioremap onchip mem\n");
+ ib_dealloc_device(&devp->ibdev);
return ERR_PTR(-EINVAL);
}
}
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index a94a3e1..c777e22 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -122,7 +122,7 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
INIT_LIST_HEAD(&context->mmaps);
spin_lock_init(&context->mmap_lock);
- if (udata->outlen < sizeof(uresp)) {
+ if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
if (!warned++)
pr_err(MOD "Warning - downlevel libcxgb4 (non-fatal), device status page disabled.");
rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED;
@@ -140,7 +140,8 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
context->key += PAGE_SIZE;
spin_unlock(&context->mmap_lock);
- ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ ret = ib_copy_to_udata(udata, &uresp,
+ sizeof(uresp) - sizeof(uresp.reserved));
if (ret)
goto err_mm;
diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h
index 11ccd27..cbd0ce1 100644
--- a/drivers/infiniband/hw/cxgb4/user.h
+++ b/drivers/infiniband/hw/cxgb4/user.h
@@ -48,6 +48,7 @@ struct c4iw_create_cq_resp {
__u32 cqid;
__u32 size;
__u32 qid_mask;
+ __u32 reserved; /* explicit padding (optional for i386) */
};
@@ -74,5 +75,6 @@ struct c4iw_create_qp_resp {
struct c4iw_alloc_ucontext_resp {
__u64 status_page_key;
__u32 status_page_size;
+ __u32 reserved; /* explicit padding (optional for i386) */
};
#endif
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index e2f9a51..45802e9 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -346,6 +346,10 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
ret = -EFAULT;
goto bail;
}
+ dp.len = odp.len;
+ dp.unit = odp.unit;
+ dp.data = odp.data;
+ dp.pbc_wd = 0;
} else {
ret = -EINVAL;
goto bail;
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 26dfbc8..01ba792 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -70,7 +70,7 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
int i;
if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
- dd->ipath_lastcancel > jiffies) {
+ time_after(dd->ipath_lastcancel, jiffies)) {
__IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
"SendbufErrs %lx %lx", sbuf[0],
sbuf[1]);
@@ -755,7 +755,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
/* likely due to cancel; so suppress message unless verbose */
if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
- dd->ipath_lastcancel > jiffies) {
+ time_after(dd->ipath_lastcancel, jiffies)) {
/* armlaunch takes precedence; it often causes both. */
ipath_cdbg(VERBOSE,
"Suppressed %s error (%llx) after sendbuf cancel\n",
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
index 98ac18e..17a5177 100644
--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -247,7 +247,7 @@ static void sdma_abort_task(unsigned long opaque)
/* ipath_sdma_abort() is done, waiting for interrupt */
if (status == IPATH_SDMA_ABORT_DISARMED) {
- if (jiffies < dd->ipath_sdma_abort_intr_timeout)
+ if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout))
goto resched_noprint;
/* give up, intr got lost somewhere */
ipath_dbg("give up waiting for SDMADISABLED intr\n");
@@ -341,7 +341,7 @@ resched:
* JAG - this is bad to just have default be a loop without
* state change
*/
- if (jiffies > dd->ipath_sdma_abort_jiffies) {
+ if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) {
ipath_dbg("looping with status 0x%08lx\n",
dd->ipath_sdma_status);
dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 170dca6..2d8c339 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -73,7 +73,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
{
struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
struct mlx4_dev *dev = ibdev->dev;
- int is_mcast;
+ int is_mcast = 0;
struct in6_addr in6;
u16 vlan_tag;
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 5f64081..1066eec 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -102,7 +102,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
int err;
err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
- PAGE_SIZE * 2, &buf->buf);
+ PAGE_SIZE * 2, &buf->buf, GFP_KERNEL);
if (err)
goto out;
@@ -113,7 +113,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf);
+ err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL);
if (err)
goto err_mtt;
@@ -209,7 +209,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
uar = &to_mucontext(context)->uar;
} else {
- err = mlx4_db_alloc(dev->dev, &cq->db, 1);
+ err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL);
if (err)
goto err_cq;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index fd36ec6..287ad05 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -478,10 +478,6 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
return -EAGAIN;
- /* QP0 forwarding only for Dom0 */
- if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave))
- return -EINVAL;
-
if (!dest_qpt)
tun_qp = &tun_ctx->qp[0];
else
@@ -667,6 +663,21 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
}
/* Class-specific handling */
switch (mad->mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+ /* 255 indicates the dom0 */
+ if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) {
+ if (!mlx4_vf_smi_enabled(dev->dev, slave, port))
+ return -EPERM;
+ /* for a VF. drop unsolicited MADs */
+ if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) {
+ mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n",
+ slave, mad->mad_hdr.mgmt_class,
+ mad->mad_hdr.method);
+ return -EINVAL;
+ }
+ }
+ break;
case IB_MGMT_CLASS_SUBN_ADM:
if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
(struct ib_sa_mad *) mad))
@@ -1165,10 +1176,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
return -EAGAIN;
- /* QP0 forwarding only for Dom0 */
- if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave))
- return -EINVAL;
-
if (dest_qpt == IB_QPT_SMI) {
src_qpnum = 0;
sqp = &sqp_ctx->qp[0];
@@ -1285,11 +1292,6 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
"belongs to another slave\n", wc->src_qp);
return;
}
- if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) {
- mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
- "non-master trying to send QP0 packets\n", wc->src_qp);
- return;
- }
/* Map transaction ID */
ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
@@ -1317,6 +1319,12 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
/* Class-specific handling */
switch (tunnel->mad.mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+ if (slave != mlx4_master_func_num(dev->dev) &&
+ !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port))
+ return;
+ break;
case IB_MGMT_CLASS_SUBN_ADM:
if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
(struct ib_sa_mad *) &tunnel->mad))
@@ -1749,9 +1757,9 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
return -EEXIST;
ctx->state = DEMUX_PV_STATE_STARTING;
- /* have QP0 only on port owner, and only if link layer is IB */
- if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) &&
- rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND)
+ /* have QP0 only if link layer is IB */
+ if (rdma_port_get_link_layer(ibdev, ctx->port) ==
+ IB_LINK_LAYER_INFINIBAND)
ctx->has_smi = 1;
if (ctx->has_smi) {
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 1b6dbe15..3c3806a 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -544,12 +544,11 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
return 0;
}
-static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
- u32 cap_mask)
+static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
+ u32 cap_mask)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
- u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
if (IS_ERR(mailbox))
@@ -563,8 +562,8 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
}
- err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+ err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
mlx4_free_cmd_mailbox(dev->dev, mailbox);
return err;
@@ -573,11 +572,20 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
+ struct mlx4_ib_dev *mdev = to_mdev(ibdev);
+ u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
struct ib_port_attr attr;
u32 cap_mask;
int err;
- mutex_lock(&to_mdev(ibdev)->cap_mask_mutex);
+ /* return OK if this is RoCE. CM calls ib_modify_port() regardless
+ * of whether port link layer is ETH or IB. For ETH ports, qkey
+ * violations and port capabilities are not meaningful.
+ */
+ if (is_eth)
+ return 0;
+
+ mutex_lock(&mdev->cap_mask_mutex);
err = mlx4_ib_query_port(ibdev, port, &attr);
if (err)
@@ -586,9 +594,9 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
~props->clr_port_cap_mask;
- err = mlx4_SET_PORT(to_mdev(ibdev), port,
- !!(mask & IB_PORT_RESET_QKEY_CNTR),
- cap_mask);
+ err = mlx4_ib_SET_PORT(mdev, port,
+ !!(mask & IB_PORT_RESET_QKEY_CNTR),
+ cap_mask);
out:
mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index f589522..bb8c9dd 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -156,6 +156,7 @@ enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
+ MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
MLX4_IB_SRIOV_SQP = 1 << 31,
};
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 41308af..5b0cb8e 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -608,9 +608,20 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
return !attr->srq;
}
+static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
+{
+ int i;
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ if (qpn == dev->caps.qp0_proxy[i])
+ return !!dev->caps.qp0_qkey[i];
+ }
+ return 0;
+}
+
static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
+ struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
+ gfp_t gfp)
{
int qpn;
int err;
@@ -625,10 +636,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
!(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
if (init_attr->qp_type == IB_QPT_GSI)
qp_type = MLX4_IB_QPT_PROXY_GSI;
- else if (mlx4_is_master(dev->dev))
- qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
- else
- qp_type = MLX4_IB_QPT_PROXY_SMI;
+ else {
+ if (mlx4_is_master(dev->dev) ||
+ qp0_enabled_vf(dev->dev, sqpn))
+ qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
+ else
+ qp_type = MLX4_IB_QPT_PROXY_SMI;
+ }
}
qpn = sqpn;
/* add extra sg entry for tunneling */
@@ -643,7 +657,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
return -EINVAL;
if (tnl_init->proxy_qp_type == IB_QPT_GSI)
qp_type = MLX4_IB_QPT_TUN_GSI;
- else if (tnl_init->slave == mlx4_master_func_num(dev->dev))
+ else if (tnl_init->slave == mlx4_master_func_num(dev->dev) ||
+ mlx4_vf_smi_enabled(dev->dev, tnl_init->slave,
+ tnl_init->port))
qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
else
qp_type = MLX4_IB_QPT_TUN_SMI;
@@ -658,14 +674,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
(qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
- sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL);
+ sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp);
if (!sqp)
return -ENOMEM;
qp = &sqp->qp;
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
} else {
- qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL);
+ qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp);
if (!qp)
return -ENOMEM;
qp->pri.vid = 0xFFFF;
@@ -748,14 +764,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err;
if (qp_has_rq(init_attr)) {
- err = mlx4_db_alloc(dev->dev, &qp->db, 0);
+ err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp);
if (err)
goto err;
*qp->db.db = 0;
}
- if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
+ if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) {
err = -ENOMEM;
goto err_db;
}
@@ -765,13 +781,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
+ err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp);
if (err)
goto err_mtt;
- qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
- qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);
-
+ qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
+ qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
if (!qp->sq.wrid || !qp->rq.wrid) {
err = -ENOMEM;
goto err_wrid;
@@ -801,7 +816,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err_proxy;
}
- err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
+ err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
if (err)
goto err_qpn;
@@ -1040,7 +1055,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
struct mlx4_ib_qp *qp = NULL;
int err;
u16 xrcdn = 0;
+ gfp_t gfp;
+ gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ?
+ GFP_NOIO : GFP_KERNEL;
/*
* We only support LSO, vendor flag1, and multicast loopback blocking,
* and only for kernel UD QPs.
@@ -1049,7 +1067,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
MLX4_IB_SRIOV_TUNNEL_QP |
MLX4_IB_SRIOV_SQP |
- MLX4_IB_QP_NETIF))
+ MLX4_IB_QP_NETIF |
+ MLX4_IB_QP_CREATE_USE_GFP_NOIO))
return ERR_PTR(-EINVAL);
if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
@@ -1059,7 +1078,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
if (init_attr->create_flags &&
(udata ||
- ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
+ ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) &&
init_attr->qp_type != IB_QPT_UD) ||
((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
init_attr->qp_type > IB_QPT_GSI)))
@@ -1079,7 +1098,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_RAW_PACKET:
- qp = kzalloc(sizeof *qp, GFP_KERNEL);
+ qp = kzalloc(sizeof *qp, gfp);
if (!qp)
return ERR_PTR(-ENOMEM);
qp->pri.vid = 0xFFFF;
@@ -1088,7 +1107,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
case IB_QPT_UD:
{
err = create_qp_common(to_mdev(pd->device), pd, init_attr,
- udata, 0, &qp);
+ udata, 0, &qp, gfp);
if (err)
return ERR_PTR(err);
@@ -1106,7 +1125,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
get_sqp_num(to_mdev(pd->device), init_attr),
- &qp);
+ &qp, gfp);
if (err)
return ERR_PTR(err);
@@ -1930,6 +1949,19 @@ out:
return err;
}
+static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
+{
+ int i;
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ if (qpn == dev->caps.qp0_proxy[i] ||
+ qpn == dev->caps.qp0_tunnel[i]) {
+ *qkey = dev->caps.qp0_qkey[i];
+ return 0;
+ }
+ }
+ return -EINVAL;
+}
+
static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
struct ib_send_wr *wr,
void *wqe, unsigned *mlx_seg_len)
@@ -1987,8 +2019,13 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
- if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
- return -EINVAL;
+ if (mlx4_is_master(mdev->dev)) {
+ if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ return -EINVAL;
+ } else {
+ if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ return -EINVAL;
+ }
sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
@@ -2370,7 +2407,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
struct mlx4_wqe_datagram_seg *dseg,
- struct ib_send_wr *wr, enum ib_qp_type qpt)
+ struct ib_send_wr *wr,
+ enum mlx4_ib_qp_type qpt)
{
union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
struct mlx4_av sqp_av = {0};
@@ -2383,8 +2421,10 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
cpu_to_be32(0xf0000000);
memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
- /* This function used only for sending on QP1 proxies */
- dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+ if (qpt == MLX4_IB_QPT_PROXY_GSI)
+ dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+ else
+ dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]);
/* Use QKEY from the QP context, which is set by master */
dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
}
@@ -2679,11 +2719,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case MLX4_IB_QPT_PROXY_SMI_OWNER:
- if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) {
- err = -ENOSYS;
- *bad_wr = wr;
- goto out;
- }
err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
@@ -2700,16 +2735,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += seglen / 16;
break;
case MLX4_IB_QPT_PROXY_SMI:
- /* don't allow QP0 sends on guests */
- err = -ENOSYS;
- *bad_wr = wr;
- goto out;
case MLX4_IB_QPT_PROXY_GSI:
/* If we are tunneling special qps, this is a UD qp.
* In this case we first add a UD segment targeting
* the tunnel qp, and then add a header with address
* information */
- set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type);
+ set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr,
+ qp->mlx4_ib_qp_type);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
build_tunnel_header(wr, wqe, &seglen);
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 60c5fb0..62d9285 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -134,13 +134,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_mtt;
} else {
- err = mlx4_db_alloc(dev->dev, &srq->db, 0);
+ err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL);
if (err)
goto err_srq;
*srq->db.db = 0;
- if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
+ if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf,
+ GFP_KERNEL)) {
err = -ENOMEM;
goto err_db;
}
@@ -165,7 +166,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
+ err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL);
if (err)
goto err_mtt;
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 5a38e43..cb4c66e 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -389,8 +389,10 @@ struct mlx4_port {
struct mlx4_ib_dev *dev;
struct attribute_group pkey_group;
struct attribute_group gid_group;
- u8 port_num;
+ struct device_attribute enable_smi_admin;
+ struct device_attribute smi_enabled;
int slave;
+ u8 port_num;
};
@@ -558,6 +560,101 @@ err:
return NULL;
}
+static ssize_t sysfs_show_smi_enabled(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct mlx4_port *p =
+ container_of(attr, struct mlx4_port, smi_enabled);
+ ssize_t len = 0;
+
+ if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num))
+ len = sprintf(buf, "%d\n", 1);
+ else
+ len = sprintf(buf, "%d\n", 0);
+
+ return len;
+}
+
+static ssize_t sysfs_show_enable_smi_admin(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_port *p =
+ container_of(attr, struct mlx4_port, enable_smi_admin);
+ ssize_t len = 0;
+
+ if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num))
+ len = sprintf(buf, "%d\n", 1);
+ else
+ len = sprintf(buf, "%d\n", 0);
+
+ return len;
+}
+
+static ssize_t sysfs_store_enable_smi_admin(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct mlx4_port *p =
+ container_of(attr, struct mlx4_port, enable_smi_admin);
+ int enable;
+
+ if (sscanf(buf, "%i", &enable) != 1 ||
+ enable < 0 || enable > 1)
+ return -EINVAL;
+
+ if (mlx4_vf_set_enable_smi_admin(p->dev->dev, p->slave, p->port_num, enable))
+ return -EINVAL;
+ return count;
+}
+
+static int add_vf_smi_entries(struct mlx4_port *p)
+{
+ int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
+ IB_LINK_LAYER_ETHERNET;
+ int ret;
+
+ /* do not display entries if eth transport, or if master */
+ if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
+ return 0;
+
+ sysfs_attr_init(&p->smi_enabled.attr);
+ p->smi_enabled.show = sysfs_show_smi_enabled;
+ p->smi_enabled.store = NULL;
+ p->smi_enabled.attr.name = "smi_enabled";
+ p->smi_enabled.attr.mode = 0444;
+ ret = sysfs_create_file(&p->kobj, &p->smi_enabled.attr);
+ if (ret) {
+ pr_err("failed to create smi_enabled\n");
+ return ret;
+ }
+
+ sysfs_attr_init(&p->enable_smi_admin.attr);
+ p->enable_smi_admin.show = sysfs_show_enable_smi_admin;
+ p->enable_smi_admin.store = sysfs_store_enable_smi_admin;
+ p->enable_smi_admin.attr.name = "enable_smi_admin";
+ p->enable_smi_admin.attr.mode = 0644;
+ ret = sysfs_create_file(&p->kobj, &p->enable_smi_admin.attr);
+ if (ret) {
+ pr_err("failed to create enable_smi_admin\n");
+ sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
+ return ret;
+ }
+ return 0;
+}
+
+static void remove_vf_smi_entries(struct mlx4_port *p)
+{
+ int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
+ IB_LINK_LAYER_ETHERNET;
+
+ if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
+ return;
+
+ sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
+ sysfs_remove_file(&p->kobj, &p->enable_smi_admin.attr);
+}
+
static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
{
struct mlx4_port *p;
@@ -602,6 +699,10 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
if (ret)
goto err_free_gid;
+ ret = add_vf_smi_entries(p);
+ if (ret)
+ goto err_free_gid;
+
list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]);
return 0;
@@ -669,6 +770,7 @@ err_add:
mport = container_of(p, struct mlx4_port, kobj);
sysfs_remove_group(p, &mport->pkey_group);
sysfs_remove_group(p, &mport->gid_group);
+ remove_vf_smi_entries(mport);
kobject_put(p);
}
kobject_put(dev->dev_ports_parent[slave]);
@@ -713,6 +815,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
port = container_of(p, struct mlx4_port, kobj);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
+ remove_vf_smi_entries(port);
kobject_put(p);
kobject_put(device->dev_ports_parent[slave]);
}
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 62bb6b4..8ae4f89 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -32,6 +32,7 @@
#include <linux/kref.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
#include "mlx5_ib.h"
#include "user.h"
@@ -602,14 +603,24 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
int *cqe_size, int *index, int *inlen)
{
struct mlx5_ib_create_cq ucmd;
+ size_t ucmdlen;
int page_shift;
int npages;
int ncont;
int err;
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+ ucmdlen =
+ (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
+ sizeof(ucmd)) ? (sizeof(ucmd) -
+ sizeof(ucmd.reserved)) : sizeof(ucmd);
+
+ if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
return -EFAULT;
+ if (ucmdlen == sizeof(ucmd) &&
+ ucmd.reserved != 0)
+ return -EINVAL;
+
if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
return -EINVAL;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 5054158..f2ccf1a 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -264,8 +264,6 @@ struct mlx5_ib_mr {
__be64 *pas;
dma_addr_t dma;
int npages;
- struct completion done;
- enum ib_wc_status status;
struct mlx5_ib_dev *dev;
struct mlx5_create_mkey_mbox_out out;
struct mlx5_core_sig_ctx *sig;
@@ -277,6 +275,17 @@ struct mlx5_ib_fast_reg_page_list {
dma_addr_t map;
};
+struct mlx5_ib_umr_context {
+ enum ib_wc_status status;
+ struct completion done;
+};
+
+static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
+{
+ context->status = -1;
+ init_completion(&context->done);
+}
+
struct umr_common {
struct ib_pd *pd;
struct ib_cq *cq;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 81392b2..afa873b 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -73,6 +73,8 @@ static void reg_mr_callback(int status, void *context)
struct mlx5_cache_ent *ent = &cache->ent[c];
u8 key;
unsigned long flags;
+ struct mlx5_mr_table *table = &dev->mdev.priv.mr_table;
+ int err;
spin_lock_irqsave(&ent->lock, flags);
ent->pending--;
@@ -107,6 +109,13 @@ static void reg_mr_callback(int status, void *context)
ent->cur++;
ent->size++;
spin_unlock_irqrestore(&ent->lock, flags);
+
+ write_lock_irqsave(&table->lock, flags);
+ err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
+ &mr->mmr);
+ if (err)
+ pr_err("Error inserting to mr tree. 0x%x\n", -err);
+ write_unlock_irqrestore(&table->lock, flags);
}
static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
@@ -699,7 +708,7 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
{
- struct mlx5_ib_mr *mr;
+ struct mlx5_ib_umr_context *context;
struct ib_wc wc;
int err;
@@ -712,9 +721,9 @@ void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
if (err == 0)
break;
- mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id;
- mr->status = wc.status;
- complete(&mr->done);
+ context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id;
+ context->status = wc.status;
+ complete(&context->done);
}
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
}
@@ -726,11 +735,12 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct device *ddev = dev->ib_dev.dma_device;
struct umr_common *umrc = &dev->umrc;
+ struct mlx5_ib_umr_context umr_context;
struct ib_send_wr wr, *bad;
struct mlx5_ib_mr *mr;
struct ib_sge sg;
int size = sizeof(u64) * npages;
- int err;
+ int err = 0;
int i;
for (i = 0; i < 1; i++) {
@@ -751,7 +761,7 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
if (!mr->pas) {
err = -ENOMEM;
- goto error;
+ goto free_mr;
}
mlx5_ib_populate_pas(dev, umem, page_shift,
@@ -760,44 +770,46 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
DMA_TO_DEVICE);
if (dma_mapping_error(ddev, mr->dma)) {
- kfree(mr->pas);
err = -ENOMEM;
- goto error;
+ goto free_pas;
}
memset(&wr, 0, sizeof(wr));
- wr.wr_id = (u64)(unsigned long)mr;
+ wr.wr_id = (u64)(unsigned long)&umr_context;
prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
- /* We serialize polls so one process does not kidnap another's
- * completion. This is not a problem since wr is completed in
- * around 1 usec
- */
+ mlx5_ib_init_umr_context(&umr_context);
down(&umrc->sem);
- init_completion(&mr->done);
err = ib_post_send(umrc->qp, &wr, &bad);
if (err) {
mlx5_ib_warn(dev, "post send failed, err %d\n", err);
- up(&umrc->sem);
- goto error;
+ goto unmap_dma;
+ } else {
+ wait_for_completion(&umr_context.done);
+ if (umr_context.status != IB_WC_SUCCESS) {
+ mlx5_ib_warn(dev, "reg umr failed\n");
+ err = -EFAULT;
+ }
}
- wait_for_completion(&mr->done);
- up(&umrc->sem);
+ mr->mmr.iova = virt_addr;
+ mr->mmr.size = len;
+ mr->mmr.pd = to_mpd(pd)->pdn;
+
+unmap_dma:
+ up(&umrc->sem);
dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
+
+free_pas:
kfree(mr->pas);
- if (mr->status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed\n");
- err = -EFAULT;
- goto error;
+free_mr:
+ if (err) {
+ free_cached_mr(dev, mr);
+ return ERR_PTR(err);
}
return mr;
-
-error:
- free_cached_mr(dev, mr);
- return ERR_PTR(err);
}
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
@@ -926,24 +938,26 @@ error:
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct umr_common *umrc = &dev->umrc;
+ struct mlx5_ib_umr_context umr_context;
struct ib_send_wr wr, *bad;
int err;
memset(&wr, 0, sizeof(wr));
- wr.wr_id = (u64)(unsigned long)mr;
+ wr.wr_id = (u64)(unsigned long)&umr_context;
prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
+ mlx5_ib_init_umr_context(&umr_context);
down(&umrc->sem);
- init_completion(&mr->done);
err = ib_post_send(umrc->qp, &wr, &bad);
if (err) {
up(&umrc->sem);
mlx5_ib_dbg(dev, "err %d\n", err);
goto error;
+ } else {
+ wait_for_completion(&umr_context.done);
+ up(&umrc->sem);
}
- wait_for_completion(&mr->done);
- up(&umrc->sem);
- if (mr->status != IB_WC_SUCCESS) {
+ if (umr_context.status != IB_WC_SUCCESS) {
mlx5_ib_warn(dev, "unreg umr failed\n");
err = -EFAULT;
goto error;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index dc930ed..d13ddf1 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -574,6 +574,10 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
+ qp->rq.offset = 0;
+ qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
+ qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+
err = set_user_buf_size(dev, qp, &ucmd);
if (err)
goto err_uuar;
@@ -2078,6 +2082,7 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
struct ib_sig_domain *wire = &sig_attrs->wire;
int ret, selector;
+ memset(bsf, 0, sizeof(*bsf));
switch (sig_attrs->mem.sig_type) {
case IB_SIG_TYPE_T10_DIF:
if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF)
@@ -2090,9 +2095,11 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
/* Same block structure */
basic->bsf_size_sbs = 1 << 4;
if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
- basic->wire.copy_byte_mask = 0xff;
- else
- basic->wire.copy_byte_mask = 0x3f;
+ basic->wire.copy_byte_mask |= 0xc0;
+ if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
+ basic->wire.copy_byte_mask |= 0x30;
+ if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
+ basic->wire.copy_byte_mask |= 0x0f;
} else
basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
@@ -2131,9 +2138,13 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
int ret;
int wqe_size;
- if (!wr->wr.sig_handover.prot) {
+ if (!wr->wr.sig_handover.prot ||
+ (data_key == wr->wr.sig_handover.prot->lkey &&
+ data_va == wr->wr.sig_handover.prot->addr &&
+ data_len == wr->wr.sig_handover.prot->length)) {
/**
* Source domain doesn't contain signature information
+ * or data and protection are interleaved in memory.
* So need construct:
* ------------------
* | data_klm |
@@ -2187,23 +2198,13 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
data_sentry->bcount = cpu_to_be16(block_size);
data_sentry->key = cpu_to_be32(data_key);
data_sentry->va = cpu_to_be64(data_va);
+ data_sentry->stride = cpu_to_be16(block_size);
+
prot_sentry->bcount = cpu_to_be16(prot_size);
prot_sentry->key = cpu_to_be32(prot_key);
+ prot_sentry->va = cpu_to_be64(prot_va);
+ prot_sentry->stride = cpu_to_be16(prot_size);
- if (prot_key == data_key && prot_va == data_va) {
- /**
- * The data and protection are interleaved
- * in a single memory region
- **/
- prot_sentry->va = cpu_to_be64(data_va + block_size);
- prot_sentry->stride = cpu_to_be16(block_size + prot_size);
- data_sentry->stride = prot_sentry->stride;
- } else {
- /* The data and protection are two different buffers */
- prot_sentry->va = cpu_to_be64(prot_va);
- data_sentry->stride = cpu_to_be16(block_size);
- prot_sentry->stride = cpu_to_be16(prot_size);
- }
wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
sizeof(*prot_sentry), 64);
}
@@ -2275,7 +2276,10 @@ static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
/* length of the protected region, data + protection */
region_len = wr->sg_list->length;
- if (wr->wr.sig_handover.prot)
+ if (wr->wr.sig_handover.prot &&
+ (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey ||
+ wr->wr.sig_handover.prot->addr != wr->sg_list->addr ||
+ wr->wr.sig_handover.prot->length != wr->sg_list->length))
region_len += wr->wr.sig_handover.prot->length;
/**
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 210b3ea..384af6d 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -35,6 +35,7 @@
#include <linux/mlx5/srq.h>
#include <linux/slab.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
#include "mlx5_ib.h"
#include "user.h"
@@ -78,16 +79,27 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_create_srq ucmd;
+ size_t ucmdlen;
int err;
int npages;
int page_shift;
int ncont;
u32 offset;
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+ ucmdlen =
+ (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
+ sizeof(ucmd)) ? (sizeof(ucmd) -
+ sizeof(ucmd.reserved)) : sizeof(ucmd);
+
+ if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
mlx5_ib_dbg(dev, "failed copy udata\n");
return -EFAULT;
}
+
+ if (ucmdlen == sizeof(ucmd) &&
+ ucmd.reserved != 0)
+ return -EINVAL;
+
srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
index 0f4f8e4..d0ba264 100644
--- a/drivers/infiniband/hw/mlx5/user.h
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -91,6 +91,7 @@ struct mlx5_ib_create_cq {
__u64 buf_addr;
__u64 db_addr;
__u32 cqe_size;
+ __u32 reserved; /* explicit padding (optional on i386) */
};
struct mlx5_ib_create_cq_resp {
@@ -109,6 +110,7 @@ struct mlx5_ib_create_srq {
__u64 buf_addr;
__u64 db_addr;
__u32 flags;
+ __u32 reserved; /* explicit padding (optional on i386) */
};
struct mlx5_ib_create_srq_resp {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 6c54106..41a9aec 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -510,16 +510,9 @@ exit:
return status;
}
-static int ocrdma_debugfs_open(struct inode *inode, struct file *file)
-{
- if (inode->i_private)
- file->private_data = inode->i_private;
- return 0;
-}
-
static const struct file_operations ocrdma_dbg_ops = {
.owner = THIS_MODULE,
- .open = ocrdma_debugfs_open,
+ .open = simple_open,
.read = ocrdma_dbgfs_ops_read,
};
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 5b7aeb2..8d3c78dd 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1272,7 +1272,7 @@ static int qib_notify_dca(struct notifier_block *nb, unsigned long event,
* Do all the generic driver unit- and chip-independent memory
* allocation and initialization.
*/
-static int __init qlogic_ib_init(void)
+static int __init qib_ib_init(void)
{
int ret;
@@ -1316,12 +1316,12 @@ bail:
return ret;
}
-module_init(qlogic_ib_init);
+module_init(qib_ib_init);
/*
* Do the non-unit driver cleanup, memory free, etc. at unload.
*/
-static void __exit qlogic_ib_cleanup(void)
+static void __exit qib_ib_cleanup(void)
{
int ret;
@@ -1346,7 +1346,7 @@ static void __exit qlogic_ib_cleanup(void)
qib_dev_cleanup();
}
-module_exit(qlogic_ib_cleanup);
+module_exit(qib_ib_cleanup);
/* this can only be called after a successful initialization */
static void cleanup_device_data(struct qib_devdata *dd)
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index edad991..22c720e 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -1028,7 +1028,7 @@ static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys)
event.event = IB_EVENT_PKEY_CHANGE;
event.device = &dd->verbs_dev.ibdev;
- event.element.port_num = 1;
+ event.element.port_num = port;
ib_dispatch_event(&event);
}
return 0;
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 0cad0c4..7fcc150 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -985,7 +985,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
struct ib_qp *ret;
if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
- init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) {
+ init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
+ init_attr->create_flags) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index d48d2c0..53bd6a2 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -466,6 +466,9 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
ucontext = to_uucontext(pd->uobject->context);
us_ibdev = to_usdev(pd->device);
+ if (init_attr->create_flags)
+ return ERR_PTR(-EINVAL);
+
err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
if (err) {
usnic_err("%s: cannot copy udata for create_qp\n",
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
index d135ad9..3a4288e 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
@@ -1,3 +1,21 @@
+/*
+ * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
#include <linux/init.h>
#include <linux/list.h>
#include <linux/slab.h>
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile
new file mode 100644
index 0000000..f3c7dcf
--- /dev/null
+++ b/drivers/infiniband/ulp/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_INFINIBAND_IPOIB) += ipoib/
+obj-$(CONFIG_INFINIBAND_SRP) += srp/
+obj-$(CONFIG_INFINIBAND_SRPT) += srpt/
+obj-$(CONFIG_INFINIBAND_ISER) += iser/
+obj-$(CONFIG_INFINIBAND_ISERT) += isert/
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 1377f85..933efce 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1030,10 +1030,20 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
.cap.max_send_sge = 1,
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_RC,
- .qp_context = tx
+ .qp_context = tx,
+ .create_flags = IB_QP_CREATE_USE_GFP_NOIO
};
- return ib_create_qp(priv->pd, &attr);
+ struct ib_qp *tx_qp;
+
+ tx_qp = ib_create_qp(priv->pd, &attr);
+ if (PTR_ERR(tx_qp) == -EINVAL) {
+ ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n",
+ priv->ca->name);
+ attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
+ tx_qp = ib_create_qp(priv->pd, &attr);
+ }
+ return tx_qp;
}
static int ipoib_cm_send_req(struct net_device *dev,
@@ -1104,12 +1114,14 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
struct ipoib_dev_priv *priv = netdev_priv(p->dev);
int ret;
- p->tx_ring = vzalloc(ipoib_sendq_size * sizeof *p->tx_ring);
+ p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring,
+ GFP_NOIO, PAGE_KERNEL);
if (!p->tx_ring) {
ipoib_warn(priv, "failed to allocate tx ring\n");
ret = -ENOMEM;
goto err_tx;
}
+ memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring);
p->qp = ipoib_cm_create_tx_qp(p->dev, p);
if (IS_ERR(p->qp)) {
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 25f195e..eb79739 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -99,6 +99,7 @@ MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
module_param_named(pi_guard, iser_pi_guard, int, 0644);
MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)");
+static struct workqueue_struct *release_wq;
struct iser_global ig;
void
@@ -337,24 +338,6 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
return cls_conn;
}
-static void
-iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn)
-{
- struct iscsi_conn *conn = cls_conn->dd_data;
- struct iser_conn *ib_conn = conn->dd_data;
-
- iscsi_conn_teardown(cls_conn);
- /*
- * Userspace will normally call the stop callback and
- * already have freed the ib_conn, but if it goofed up then
- * we free it here.
- */
- if (ib_conn) {
- ib_conn->iscsi_conn = NULL;
- iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */
- }
-}
-
static int
iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
@@ -392,29 +375,39 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
conn->dd_data = ib_conn;
ib_conn->iscsi_conn = conn;
- iser_conn_get(ib_conn); /* ref iscsi/ib conn binding */
return 0;
}
+static int
+iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
+{
+ struct iscsi_conn *iscsi_conn;
+ struct iser_conn *ib_conn;
+
+ iscsi_conn = cls_conn->dd_data;
+ ib_conn = iscsi_conn->dd_data;
+ reinit_completion(&ib_conn->stop_completion);
+
+ return iscsi_conn_start(cls_conn);
+}
+
static void
iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
{
struct iscsi_conn *conn = cls_conn->dd_data;
struct iser_conn *ib_conn = conn->dd_data;
+ iser_dbg("stopping iscsi_conn: %p, ib_conn: %p\n", conn, ib_conn);
+ iscsi_conn_stop(cls_conn, flag);
+
/*
* Userspace may have goofed up and not bound the connection or
* might have only partially setup the connection.
*/
if (ib_conn) {
- iscsi_conn_stop(cls_conn, flag);
- /*
- * There is no unbind event so the stop callback
- * must release the ref from the bind.
- */
- iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */
+ conn->dd_data = NULL;
+ complete(&ib_conn->stop_completion);
}
- conn->dd_data = NULL;
}
static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
@@ -515,28 +508,28 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn,
case ISCSI_PARAM_HDRDGST_EN:
sscanf(buf, "%d", &value);
if (value) {
- iser_err("DataDigest wasn't negotiated to None");
+ iser_err("DataDigest wasn't negotiated to None\n");
return -EPROTO;
}
break;
case ISCSI_PARAM_DATADGST_EN:
sscanf(buf, "%d", &value);
if (value) {
- iser_err("DataDigest wasn't negotiated to None");
+ iser_err("DataDigest wasn't negotiated to None\n");
return -EPROTO;
}
break;
case ISCSI_PARAM_IFMARKER_EN:
sscanf(buf, "%d", &value);
if (value) {
- iser_err("IFMarker wasn't negotiated to No");
+ iser_err("IFMarker wasn't negotiated to No\n");
return -EPROTO;
}
break;
case ISCSI_PARAM_OFMARKER_EN:
sscanf(buf, "%d", &value);
if (value) {
- iser_err("OFMarker wasn't negotiated to No");
+ iser_err("OFMarker wasn't negotiated to No\n");
return -EPROTO;
}
break;
@@ -652,19 +645,20 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
struct iser_conn *ib_conn;
ib_conn = ep->dd_data;
- if (ib_conn->iscsi_conn)
- /*
- * Must suspend xmit path if the ep is bound to the
- * iscsi_conn, so we know we are not accessing the ib_conn
- * when we free it.
- *
- * This may not be bound if the ep poll failed.
- */
- iscsi_suspend_tx(ib_conn->iscsi_conn);
-
-
- iser_info("ib conn %p state %d\n", ib_conn, ib_conn->state);
+ iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state);
iser_conn_terminate(ib_conn);
+
+ /*
+ * if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop
+ * call and ISER_CONN_DOWN state before freeing the iser resources.
+ * otherwise we are safe to free resources immediately.
+ */
+ if (ib_conn->iscsi_conn) {
+ INIT_WORK(&ib_conn->release_work, iser_release_work);
+ queue_work(release_wq, &ib_conn->release_work);
+ } else {
+ iser_conn_release(ib_conn);
+ }
}
static umode_t iser_attr_is_visible(int param_type, int param)
@@ -748,13 +742,13 @@ static struct iscsi_transport iscsi_iser_transport = {
/* connection management */
.create_conn = iscsi_iser_conn_create,
.bind_conn = iscsi_iser_conn_bind,
- .destroy_conn = iscsi_iser_conn_destroy,
+ .destroy_conn = iscsi_conn_teardown,
.attr_is_visible = iser_attr_is_visible,
.set_param = iscsi_iser_set_param,
.get_conn_param = iscsi_conn_get_param,
.get_ep_param = iscsi_iser_get_ep_param,
.get_session_param = iscsi_session_get_param,
- .start_conn = iscsi_conn_start,
+ .start_conn = iscsi_iser_conn_start,
.stop_conn = iscsi_iser_conn_stop,
/* iscsi host params */
.get_host_param = iscsi_host_get_param,
@@ -801,6 +795,12 @@ static int __init iser_init(void)
mutex_init(&ig.connlist_mutex);
INIT_LIST_HEAD(&ig.connlist);
+ release_wq = alloc_workqueue("release workqueue", 0, 0);
+ if (!release_wq) {
+ iser_err("failed to allocate release workqueue\n");
+ return -ENOMEM;
+ }
+
iscsi_iser_scsi_transport = iscsi_register_transport(
&iscsi_iser_transport);
if (!iscsi_iser_scsi_transport) {
@@ -819,7 +819,24 @@ register_transport_failure:
static void __exit iser_exit(void)
{
+ struct iser_conn *ib_conn, *n;
+ int connlist_empty;
+
iser_dbg("Removing iSER datamover...\n");
+ destroy_workqueue(release_wq);
+
+ mutex_lock(&ig.connlist_mutex);
+ connlist_empty = list_empty(&ig.connlist);
+ mutex_unlock(&ig.connlist_mutex);
+
+ if (!connlist_empty) {
+ iser_err("Error cleanup stage completed but we still have iser "
+ "connections, destroying them anyway.\n");
+ list_for_each_entry_safe(ib_conn, n, &ig.connlist, conn_list) {
+ iser_conn_release(ib_conn);
+ }
+ }
+
iscsi_unregister_transport(&iscsi_iser_transport);
kmem_cache_destroy(ig.desc_cache);
}
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 324129f..97cd385 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -69,7 +69,7 @@
#define DRV_NAME "iser"
#define PFX DRV_NAME ": "
-#define DRV_VER "1.3"
+#define DRV_VER "1.4"
#define iser_dbg(fmt, arg...) \
do { \
@@ -333,6 +333,8 @@ struct iser_conn {
int post_recv_buf_count; /* posted rx count */
atomic_t post_send_buf_count; /* posted tx count */
char name[ISER_OBJECT_NAME_SIZE];
+ struct work_struct release_work;
+ struct completion stop_completion;
struct list_head conn_list; /* entry in ig conn list */
char *login_buf;
@@ -417,12 +419,12 @@ void iscsi_iser_recv(struct iscsi_conn *conn,
void iser_conn_init(struct iser_conn *ib_conn);
-void iser_conn_get(struct iser_conn *ib_conn);
-
-int iser_conn_put(struct iser_conn *ib_conn, int destroy_cma_id_allowed);
+void iser_conn_release(struct iser_conn *ib_conn);
void iser_conn_terminate(struct iser_conn *ib_conn);
+void iser_release_work(struct work_struct *work);
+
void iser_rcv_completion(struct iser_rx_desc *desc,
unsigned long dto_xfer_len,
struct iser_conn *ib_conn);
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 32849f2..ea01075 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -581,14 +581,30 @@ static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
return ret;
}
+void iser_release_work(struct work_struct *work)
+{
+ struct iser_conn *ib_conn;
+
+ ib_conn = container_of(work, struct iser_conn, release_work);
+
+ /* wait for .conn_stop callback */
+ wait_for_completion(&ib_conn->stop_completion);
+
+ /* wait for the qp`s post send and post receive buffers to empty */
+ wait_event_interruptible(ib_conn->wait,
+ ib_conn->state == ISER_CONN_DOWN);
+
+ iser_conn_release(ib_conn);
+}
+
/**
* Frees all conn objects and deallocs conn descriptor
*/
-static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id)
+void iser_conn_release(struct iser_conn *ib_conn)
{
struct iser_device *device = ib_conn->device;
- BUG_ON(ib_conn->state != ISER_CONN_DOWN);
+ BUG_ON(ib_conn->state == ISER_CONN_UP);
mutex_lock(&ig.connlist_mutex);
list_del(&ib_conn->conn_list);
@@ -600,27 +616,13 @@ static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id)
if (device != NULL)
iser_device_try_release(device);
/* if cma handler context, the caller actually destroy the id */
- if (ib_conn->cma_id != NULL && can_destroy_id) {
+ if (ib_conn->cma_id != NULL) {
rdma_destroy_id(ib_conn->cma_id);
ib_conn->cma_id = NULL;
}
iscsi_destroy_endpoint(ib_conn->ep);
}
-void iser_conn_get(struct iser_conn *ib_conn)
-{
- atomic_inc(&ib_conn->refcount);
-}
-
-int iser_conn_put(struct iser_conn *ib_conn, int can_destroy_id)
-{
- if (atomic_dec_and_test(&ib_conn->refcount)) {
- iser_conn_release(ib_conn, can_destroy_id);
- return 1;
- }
- return 0;
-}
-
/**
* triggers start of the disconnect procedures and wait for them to be done
*/
@@ -638,24 +640,19 @@ void iser_conn_terminate(struct iser_conn *ib_conn)
if (err)
iser_err("Failed to disconnect, conn: 0x%p err %d\n",
ib_conn,err);
-
- wait_event_interruptible(ib_conn->wait,
- ib_conn->state == ISER_CONN_DOWN);
-
- iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */
}
-static int iser_connect_error(struct rdma_cm_id *cma_id)
+static void iser_connect_error(struct rdma_cm_id *cma_id)
{
struct iser_conn *ib_conn;
+
ib_conn = (struct iser_conn *)cma_id->context;
ib_conn->state = ISER_CONN_DOWN;
wake_up_interruptible(&ib_conn->wait);
- return iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */
}
-static int iser_addr_handler(struct rdma_cm_id *cma_id)
+static void iser_addr_handler(struct rdma_cm_id *cma_id)
{
struct iser_device *device;
struct iser_conn *ib_conn;
@@ -664,7 +661,8 @@ static int iser_addr_handler(struct rdma_cm_id *cma_id)
device = iser_device_find_by_ib_device(cma_id);
if (!device) {
iser_err("device lookup/creation failed\n");
- return iser_connect_error(cma_id);
+ iser_connect_error(cma_id);
+ return;
}
ib_conn = (struct iser_conn *)cma_id->context;
@@ -686,13 +684,12 @@ static int iser_addr_handler(struct rdma_cm_id *cma_id)
ret = rdma_resolve_route(cma_id, 1000);
if (ret) {
iser_err("resolve route failed: %d\n", ret);
- return iser_connect_error(cma_id);
+ iser_connect_error(cma_id);
+ return;
}
-
- return 0;
}
-static int iser_route_handler(struct rdma_cm_id *cma_id)
+static void iser_route_handler(struct rdma_cm_id *cma_id)
{
struct rdma_conn_param conn_param;
int ret;
@@ -720,9 +717,9 @@ static int iser_route_handler(struct rdma_cm_id *cma_id)
goto failure;
}
- return 0;
+ return;
failure:
- return iser_connect_error(cma_id);
+ iser_connect_error(cma_id);
}
static void iser_connected_handler(struct rdma_cm_id *cma_id)
@@ -735,14 +732,13 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id)
iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
ib_conn = (struct iser_conn *)cma_id->context;
- ib_conn->state = ISER_CONN_UP;
- wake_up_interruptible(&ib_conn->wait);
+ if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP))
+ wake_up_interruptible(&ib_conn->wait);
}
-static int iser_disconnected_handler(struct rdma_cm_id *cma_id)
+static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
{
struct iser_conn *ib_conn;
- int ret;
ib_conn = (struct iser_conn *)cma_id->context;
@@ -762,24 +758,19 @@ static int iser_disconnected_handler(struct rdma_cm_id *cma_id)
ib_conn->state = ISER_CONN_DOWN;
wake_up_interruptible(&ib_conn->wait);
}
-
- ret = iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */
- return ret;
}
static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
- int ret = 0;
-
iser_info("event %d status %d conn %p id %p\n",
event->event, event->status, cma_id->context, cma_id);
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
- ret = iser_addr_handler(cma_id);
+ iser_addr_handler(cma_id);
break;
case RDMA_CM_EVENT_ROUTE_RESOLVED:
- ret = iser_route_handler(cma_id);
+ iser_route_handler(cma_id);
break;
case RDMA_CM_EVENT_ESTABLISHED:
iser_connected_handler(cma_id);
@@ -789,18 +780,18 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
case RDMA_CM_EVENT_REJECTED:
- ret = iser_connect_error(cma_id);
+ iser_connect_error(cma_id);
break;
case RDMA_CM_EVENT_DISCONNECTED:
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_ADDR_CHANGE:
- ret = iser_disconnected_handler(cma_id);
+ iser_disconnected_handler(cma_id);
break;
default:
iser_err("Unexpected RDMA CM event (%d)\n", event->event);
break;
}
- return ret;
+ return 0;
}
void iser_conn_init(struct iser_conn *ib_conn)
@@ -809,7 +800,7 @@ void iser_conn_init(struct iser_conn *ib_conn)
init_waitqueue_head(&ib_conn->wait);
ib_conn->post_recv_buf_count = 0;
atomic_set(&ib_conn->post_send_buf_count, 0);
- atomic_set(&ib_conn->refcount, 1); /* ref ib conn allocation */
+ init_completion(&ib_conn->stop_completion);
INIT_LIST_HEAD(&ib_conn->conn_list);
spin_lock_init(&ib_conn->lock);
}
@@ -837,7 +828,6 @@ int iser_connect(struct iser_conn *ib_conn,
ib_conn->state = ISER_CONN_PENDING;
- iser_conn_get(ib_conn); /* ref ib conn's cma id */
ib_conn->cma_id = rdma_create_id(iser_cma_handler,
(void *)ib_conn,
RDMA_PS_TCP, IB_QPT_RC);
@@ -874,9 +864,8 @@ id_failure:
ib_conn->cma_id = NULL;
addr_failure:
ib_conn->state = ISER_CONN_DOWN;
- iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */
connect_failure:
- iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */
+ iser_conn_release(ib_conn);
return err;
}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 66a908b..e3c2c5b 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -30,7 +30,7 @@
* SOFTWARE.
*/
-#define pr_fmt(fmt) PFX fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/init.h>
@@ -66,6 +66,8 @@ static unsigned int srp_sg_tablesize;
static unsigned int cmd_sg_entries;
static unsigned int indirect_sg_entries;
static bool allow_ext_sg;
+static bool prefer_fr;
+static bool register_always;
static int topspin_workarounds = 1;
module_param(srp_sg_tablesize, uint, 0444);
@@ -87,6 +89,14 @@ module_param(topspin_workarounds, int, 0444);
MODULE_PARM_DESC(topspin_workarounds,
"Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
+module_param(prefer_fr, bool, 0444);
+MODULE_PARM_DESC(prefer_fr,
+"Whether to use fast registration if both FMR and fast registration are supported");
+
+module_param(register_always, bool, 0444);
+MODULE_PARM_DESC(register_always,
+ "Use memory registration even for contiguous memory regions");
+
static struct kernel_param_ops srp_tmo_ops;
static int srp_reconnect_delay = 10;
@@ -288,28 +298,174 @@ static int srp_new_cm_id(struct srp_target_port *target)
return 0;
}
+static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
+{
+ struct srp_device *dev = target->srp_host->srp_dev;
+ struct ib_fmr_pool_param fmr_param;
+
+ memset(&fmr_param, 0, sizeof(fmr_param));
+ fmr_param.pool_size = target->scsi_host->can_queue;
+ fmr_param.dirty_watermark = fmr_param.pool_size / 4;
+ fmr_param.cache = 1;
+ fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
+ fmr_param.page_shift = ilog2(dev->mr_page_size);
+ fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ);
+
+ return ib_create_fmr_pool(dev->pd, &fmr_param);
+}
+
+/**
+ * srp_destroy_fr_pool() - free the resources owned by a pool
+ * @pool: Fast registration pool to be destroyed.
+ */
+static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
+{
+ int i;
+ struct srp_fr_desc *d;
+
+ if (!pool)
+ return;
+
+ for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
+ if (d->frpl)
+ ib_free_fast_reg_page_list(d->frpl);
+ if (d->mr)
+ ib_dereg_mr(d->mr);
+ }
+ kfree(pool);
+}
+
+/**
+ * srp_create_fr_pool() - allocate and initialize a pool for fast registration
+ * @device: IB device to allocate fast registration descriptors for.
+ * @pd: Protection domain associated with the FR descriptors.
+ * @pool_size: Number of descriptors to allocate.
+ * @max_page_list_len: Maximum fast registration work request page list length.
+ */
+static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
+ struct ib_pd *pd, int pool_size,
+ int max_page_list_len)
+{
+ struct srp_fr_pool *pool;
+ struct srp_fr_desc *d;
+ struct ib_mr *mr;
+ struct ib_fast_reg_page_list *frpl;
+ int i, ret = -EINVAL;
+
+ if (pool_size <= 0)
+ goto err;
+ ret = -ENOMEM;
+ pool = kzalloc(sizeof(struct srp_fr_pool) +
+ pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
+ if (!pool)
+ goto err;
+ pool->size = pool_size;
+ pool->max_page_list_len = max_page_list_len;
+ spin_lock_init(&pool->lock);
+ INIT_LIST_HEAD(&pool->free_list);
+
+ for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
+ mr = ib_alloc_fast_reg_mr(pd, max_page_list_len);
+ if (IS_ERR(mr)) {
+ ret = PTR_ERR(mr);
+ goto destroy_pool;
+ }
+ d->mr = mr;
+ frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
+ if (IS_ERR(frpl)) {
+ ret = PTR_ERR(frpl);
+ goto destroy_pool;
+ }
+ d->frpl = frpl;
+ list_add_tail(&d->entry, &pool->free_list);
+ }
+
+out:
+ return pool;
+
+destroy_pool:
+ srp_destroy_fr_pool(pool);
+
+err:
+ pool = ERR_PTR(ret);
+ goto out;
+}
+
+/**
+ * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
+ * @pool: Pool to obtain descriptor from.
+ */
+static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
+{
+ struct srp_fr_desc *d = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pool->lock, flags);
+ if (!list_empty(&pool->free_list)) {
+ d = list_first_entry(&pool->free_list, typeof(*d), entry);
+ list_del(&d->entry);
+ }
+ spin_unlock_irqrestore(&pool->lock, flags);
+
+ return d;
+}
+
+/**
+ * srp_fr_pool_put() - put an FR descriptor back in the free list
+ * @pool: Pool the descriptor was allocated from.
+ * @desc: Pointer to an array of fast registration descriptor pointers.
+ * @n: Number of descriptors to put back.
+ *
+ * Note: The caller must already have queued an invalidation request for
+ * desc->mr->rkey before calling this function.
+ */
+static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
+ int n)
+{
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&pool->lock, flags);
+ for (i = 0; i < n; i++)
+ list_add(&desc[i]->entry, &pool->free_list);
+ spin_unlock_irqrestore(&pool->lock, flags);
+}
+
+static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
+{
+ struct srp_device *dev = target->srp_host->srp_dev;
+
+ return srp_create_fr_pool(dev->dev, dev->pd,
+ target->scsi_host->can_queue,
+ dev->max_pages_per_mr);
+}
+
static int srp_create_target_ib(struct srp_target_port *target)
{
+ struct srp_device *dev = target->srp_host->srp_dev;
struct ib_qp_init_attr *init_attr;
struct ib_cq *recv_cq, *send_cq;
struct ib_qp *qp;
+ struct ib_fmr_pool *fmr_pool = NULL;
+ struct srp_fr_pool *fr_pool = NULL;
+ const int m = 1 + dev->use_fast_reg;
int ret;
init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
if (!init_attr)
return -ENOMEM;
- recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
- srp_recv_completion, NULL, target,
+ recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, target,
target->queue_size, target->comp_vector);
if (IS_ERR(recv_cq)) {
ret = PTR_ERR(recv_cq);
goto err;
}
- send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
- srp_send_completion, NULL, target,
- target->queue_size, target->comp_vector);
+ send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, target,
+ m * target->queue_size, target->comp_vector);
if (IS_ERR(send_cq)) {
ret = PTR_ERR(send_cq);
goto err_recv_cq;
@@ -318,16 +474,16 @@ static int srp_create_target_ib(struct srp_target_port *target)
ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
init_attr->event_handler = srp_qp_event;
- init_attr->cap.max_send_wr = target->queue_size;
+ init_attr->cap.max_send_wr = m * target->queue_size;
init_attr->cap.max_recv_wr = target->queue_size;
init_attr->cap.max_recv_sge = 1;
init_attr->cap.max_send_sge = 1;
- init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
+ init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
init_attr->qp_type = IB_QPT_RC;
init_attr->send_cq = send_cq;
init_attr->recv_cq = recv_cq;
- qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr);
+ qp = ib_create_qp(dev->pd, init_attr);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
goto err_send_cq;
@@ -337,6 +493,30 @@ static int srp_create_target_ib(struct srp_target_port *target)
if (ret)
goto err_qp;
+ if (dev->use_fast_reg && dev->has_fr) {
+ fr_pool = srp_alloc_fr_pool(target);
+ if (IS_ERR(fr_pool)) {
+ ret = PTR_ERR(fr_pool);
+ shost_printk(KERN_WARNING, target->scsi_host, PFX
+ "FR pool allocation failed (%d)\n", ret);
+ goto err_qp;
+ }
+ if (target->fr_pool)
+ srp_destroy_fr_pool(target->fr_pool);
+ target->fr_pool = fr_pool;
+ } else if (!dev->use_fast_reg && dev->has_fmr) {
+ fmr_pool = srp_alloc_fmr_pool(target);
+ if (IS_ERR(fmr_pool)) {
+ ret = PTR_ERR(fmr_pool);
+ shost_printk(KERN_WARNING, target->scsi_host, PFX
+ "FMR pool allocation failed (%d)\n", ret);
+ goto err_qp;
+ }
+ if (target->fmr_pool)
+ ib_destroy_fmr_pool(target->fmr_pool);
+ target->fmr_pool = fmr_pool;
+ }
+
if (target->qp)
ib_destroy_qp(target->qp);
if (target->recv_cq)
@@ -371,8 +551,16 @@ err:
*/
static void srp_free_target_ib(struct srp_target_port *target)
{
+ struct srp_device *dev = target->srp_host->srp_dev;
int i;
+ if (dev->use_fast_reg) {
+ if (target->fr_pool)
+ srp_destroy_fr_pool(target->fr_pool);
+ } else {
+ if (target->fmr_pool)
+ ib_destroy_fmr_pool(target->fmr_pool);
+ }
ib_destroy_qp(target->qp);
ib_destroy_cq(target->send_cq);
ib_destroy_cq(target->recv_cq);
@@ -577,7 +765,8 @@ static void srp_disconnect_target(struct srp_target_port *target)
static void srp_free_req_data(struct srp_target_port *target)
{
- struct ib_device *ibdev = target->srp_host->srp_dev->dev;
+ struct srp_device *dev = target->srp_host->srp_dev;
+ struct ib_device *ibdev = dev->dev;
struct srp_request *req;
int i;
@@ -586,7 +775,10 @@ static void srp_free_req_data(struct srp_target_port *target)
for (i = 0; i < target->req_ring_size; ++i) {
req = &target->req_ring[i];
- kfree(req->fmr_list);
+ if (dev->use_fast_reg)
+ kfree(req->fr_list);
+ else
+ kfree(req->fmr_list);
kfree(req->map_page);
if (req->indirect_dma_addr) {
ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
@@ -605,6 +797,7 @@ static int srp_alloc_req_data(struct srp_target_port *target)
struct srp_device *srp_dev = target->srp_host->srp_dev;
struct ib_device *ibdev = srp_dev->dev;
struct srp_request *req;
+ void *mr_list;
dma_addr_t dma_addr;
int i, ret = -ENOMEM;
@@ -617,12 +810,20 @@ static int srp_alloc_req_data(struct srp_target_port *target)
for (i = 0; i < target->req_ring_size; ++i) {
req = &target->req_ring[i];
- req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
- GFP_KERNEL);
- req->map_page = kmalloc(SRP_FMR_SIZE * sizeof(void *),
- GFP_KERNEL);
+ mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
+ GFP_KERNEL);
+ if (!mr_list)
+ goto out;
+ if (srp_dev->use_fast_reg)
+ req->fr_list = mr_list;
+ else
+ req->fmr_list = mr_list;
+ req->map_page = kmalloc(srp_dev->max_pages_per_mr *
+ sizeof(void *), GFP_KERNEL);
+ if (!req->map_page)
+ goto out;
req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
- if (!req->fmr_list || !req->map_page || !req->indirect_desc)
+ if (!req->indirect_desc)
goto out;
dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
@@ -759,21 +960,56 @@ static int srp_connect_target(struct srp_target_port *target)
}
}
+static int srp_inv_rkey(struct srp_target_port *target, u32 rkey)
+{
+ struct ib_send_wr *bad_wr;
+ struct ib_send_wr wr = {
+ .opcode = IB_WR_LOCAL_INV,
+ .wr_id = LOCAL_INV_WR_ID_MASK,
+ .next = NULL,
+ .num_sge = 0,
+ .send_flags = 0,
+ .ex.invalidate_rkey = rkey,
+ };
+
+ return ib_post_send(target->qp, &wr, &bad_wr);
+}
+
static void srp_unmap_data(struct scsi_cmnd *scmnd,
struct srp_target_port *target,
struct srp_request *req)
{
- struct ib_device *ibdev = target->srp_host->srp_dev->dev;
- struct ib_pool_fmr **pfmr;
+ struct srp_device *dev = target->srp_host->srp_dev;
+ struct ib_device *ibdev = dev->dev;
+ int i, res;
if (!scsi_sglist(scmnd) ||
(scmnd->sc_data_direction != DMA_TO_DEVICE &&
scmnd->sc_data_direction != DMA_FROM_DEVICE))
return;
- pfmr = req->fmr_list;
- while (req->nfmr--)
- ib_fmr_pool_unmap(*pfmr++);
+ if (dev->use_fast_reg) {
+ struct srp_fr_desc **pfr;
+
+ for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
+ res = srp_inv_rkey(target, (*pfr)->mr->rkey);
+ if (res < 0) {
+ shost_printk(KERN_ERR, target->scsi_host, PFX
+ "Queueing INV WR for rkey %#x failed (%d)\n",
+ (*pfr)->mr->rkey, res);
+ queue_work(system_long_wq,
+ &target->tl_err_work);
+ }
+ }
+ if (req->nmdesc)
+ srp_fr_pool_put(target->fr_pool, req->fr_list,
+ req->nmdesc);
+ } else {
+ struct ib_pool_fmr **pfmr;
+
+ for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
+ ib_fmr_pool_unmap(*pfmr);
+ }
ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
scmnd->sc_data_direction);
@@ -813,6 +1049,10 @@ static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,
/**
* srp_free_req() - Unmap data and add request to the free request list.
+ * @target: SRP target port.
+ * @req: Request to be freed.
+ * @scmnd: SCSI command associated with @req.
+ * @req_lim_delta: Amount to be added to @target->req_lim.
*/
static void srp_free_req(struct srp_target_port *target,
struct srp_request *req, struct scsi_cmnd *scmnd,
@@ -882,21 +1122,19 @@ static int srp_rport_reconnect(struct srp_rport *rport)
* callbacks will have finished before a new QP is allocated.
*/
ret = srp_new_cm_id(target);
- /*
- * Whether or not creating a new CM ID succeeded, create a new
- * QP. This guarantees that all completion callback function
- * invocations have finished before request resetting starts.
- */
- if (ret == 0)
- ret = srp_create_target_ib(target);
- else
- srp_create_target_ib(target);
for (i = 0; i < target->req_ring_size; ++i) {
struct srp_request *req = &target->req_ring[i];
srp_finish_req(target, req, NULL, DID_RESET << 16);
}
+ /*
+ * Whether or not creating a new CM ID succeeded, create a new
+ * QP. This guarantees that all callback functions for the old QP have
+ * finished before any send requests are posted on the new QP.
+ */
+ ret += srp_create_target_ib(target);
+
INIT_LIST_HEAD(&target->free_tx);
for (i = 0; i < target->queue_size; ++i)
list_add(&target->tx_ring[i]->list, &target->free_tx);
@@ -928,33 +1166,87 @@ static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
static int srp_map_finish_fmr(struct srp_map_state *state,
struct srp_target_port *target)
{
- struct srp_device *dev = target->srp_host->srp_dev;
struct ib_pool_fmr *fmr;
u64 io_addr = 0;
- if (!state->npages)
- return 0;
-
- if (state->npages == 1) {
- srp_map_desc(state, state->base_dma_addr, state->fmr_len,
- target->rkey);
- state->npages = state->fmr_len = 0;
- return 0;
- }
-
- fmr = ib_fmr_pool_map_phys(dev->fmr_pool, state->pages,
+ fmr = ib_fmr_pool_map_phys(target->fmr_pool, state->pages,
state->npages, io_addr);
if (IS_ERR(fmr))
return PTR_ERR(fmr);
*state->next_fmr++ = fmr;
- state->nfmr++;
+ state->nmdesc++;
+
+ srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey);
- srp_map_desc(state, 0, state->fmr_len, fmr->fmr->rkey);
- state->npages = state->fmr_len = 0;
return 0;
}
+static int srp_map_finish_fr(struct srp_map_state *state,
+ struct srp_target_port *target)
+{
+ struct srp_device *dev = target->srp_host->srp_dev;
+ struct ib_send_wr *bad_wr;
+ struct ib_send_wr wr;
+ struct srp_fr_desc *desc;
+ u32 rkey;
+
+ desc = srp_fr_pool_get(target->fr_pool);
+ if (!desc)
+ return -ENOMEM;
+
+ rkey = ib_inc_rkey(desc->mr->rkey);
+ ib_update_fast_reg_key(desc->mr, rkey);
+
+ memcpy(desc->frpl->page_list, state->pages,
+ sizeof(state->pages[0]) * state->npages);
+
+ memset(&wr, 0, sizeof(wr));
+ wr.opcode = IB_WR_FAST_REG_MR;
+ wr.wr_id = FAST_REG_WR_ID_MASK;
+ wr.wr.fast_reg.iova_start = state->base_dma_addr;
+ wr.wr.fast_reg.page_list = desc->frpl;
+ wr.wr.fast_reg.page_list_len = state->npages;
+ wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size);
+ wr.wr.fast_reg.length = state->dma_len;
+ wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE);
+ wr.wr.fast_reg.rkey = desc->mr->lkey;
+
+ *state->next_fr++ = desc;
+ state->nmdesc++;
+
+ srp_map_desc(state, state->base_dma_addr, state->dma_len,
+ desc->mr->rkey);
+
+ return ib_post_send(target->qp, &wr, &bad_wr);
+}
+
+static int srp_finish_mapping(struct srp_map_state *state,
+ struct srp_target_port *target)
+{
+ int ret = 0;
+
+ if (state->npages == 0)
+ return 0;
+
+ if (state->npages == 1 && !register_always)
+ srp_map_desc(state, state->base_dma_addr, state->dma_len,
+ target->rkey);
+ else
+ ret = target->srp_host->srp_dev->use_fast_reg ?
+ srp_map_finish_fr(state, target) :
+ srp_map_finish_fmr(state, target);
+
+ if (ret == 0) {
+ state->npages = 0;
+ state->dma_len = 0;
+ }
+
+ return ret;
+}
+
static void srp_map_update_start(struct srp_map_state *state,
struct scatterlist *sg, int sg_index,
dma_addr_t dma_addr)
@@ -967,7 +1259,7 @@ static void srp_map_update_start(struct srp_map_state *state,
static int srp_map_sg_entry(struct srp_map_state *state,
struct srp_target_port *target,
struct scatterlist *sg, int sg_index,
- int use_fmr)
+ bool use_mr)
{
struct srp_device *dev = target->srp_host->srp_dev;
struct ib_device *ibdev = dev->dev;
@@ -979,23 +1271,25 @@ static int srp_map_sg_entry(struct srp_map_state *state,
if (!dma_len)
return 0;
- if (use_fmr == SRP_MAP_NO_FMR) {
- /* Once we're in direct map mode for a request, we don't
- * go back to FMR mode, so no need to update anything
+ if (!use_mr) {
+ /*
+ * Once we're in direct map mode for a request, we don't
+ * go back to FMR or FR mode, so no need to update anything
* other than the descriptor.
*/
srp_map_desc(state, dma_addr, dma_len, target->rkey);
return 0;
}
- /* If we start at an offset into the FMR page, don't merge into
- * the current FMR. Finish it out, and use the kernel's MR for this
- * sg entry. This is to avoid potential bugs on some SRP targets
- * that were never quite defined, but went away when the initiator
- * avoided using FMR on such page fragments.
+ /*
+ * Since not all RDMA HW drivers support non-zero page offsets for
+ * FMR, if we start at an offset into a page, don't merge into the
+ * current FMR mapping. Finish it out, and use the kernel's MR for
+ * this sg entry.
*/
- if (dma_addr & ~dev->fmr_page_mask || dma_len > dev->fmr_max_size) {
- ret = srp_map_finish_fmr(state, target);
+ if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) ||
+ dma_len > dev->mr_max_size) {
+ ret = srp_finish_mapping(state, target);
if (ret)
return ret;
@@ -1004,52 +1298,106 @@ static int srp_map_sg_entry(struct srp_map_state *state,
return 0;
}
- /* If this is the first sg to go into the FMR, save our position.
- * We need to know the first unmapped entry, its index, and the
- * first unmapped address within that entry to be able to restart
- * mapping after an error.
+ /*
+ * If this is the first sg that will be mapped via FMR or via FR, save
+ * our position. We need to know the first unmapped entry, its index,
+ * and the first unmapped address within that entry to be able to
+ * restart mapping after an error.
*/
if (!state->unmapped_sg)
srp_map_update_start(state, sg, sg_index, dma_addr);
while (dma_len) {
- if (state->npages == SRP_FMR_SIZE) {
- ret = srp_map_finish_fmr(state, target);
+ unsigned offset = dma_addr & ~dev->mr_page_mask;
+ if (state->npages == dev->max_pages_per_mr || offset != 0) {
+ ret = srp_finish_mapping(state, target);
if (ret)
return ret;
srp_map_update_start(state, sg, sg_index, dma_addr);
}
- len = min_t(unsigned int, dma_len, dev->fmr_page_size);
+ len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
if (!state->npages)
state->base_dma_addr = dma_addr;
- state->pages[state->npages++] = dma_addr;
- state->fmr_len += len;
+ state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
+ state->dma_len += len;
dma_addr += len;
dma_len -= len;
}
- /* If the last entry of the FMR wasn't a full page, then we need to
+ /*
+ * If the last entry of the MR wasn't a full page, then we need to
* close it out and start a new one -- we can only merge at page
* boundries.
*/
ret = 0;
- if (len != dev->fmr_page_size) {
- ret = srp_map_finish_fmr(state, target);
+ if (len != dev->mr_page_size) {
+ ret = srp_finish_mapping(state, target);
if (!ret)
srp_map_update_start(state, NULL, 0, 0);
}
return ret;
}
+static int srp_map_sg(struct srp_map_state *state,
+ struct srp_target_port *target, struct srp_request *req,
+ struct scatterlist *scat, int count)
+{
+ struct srp_device *dev = target->srp_host->srp_dev;
+ struct ib_device *ibdev = dev->dev;
+ struct scatterlist *sg;
+ int i;
+ bool use_mr;
+
+ state->desc = req->indirect_desc;
+ state->pages = req->map_page;
+ if (dev->use_fast_reg) {
+ state->next_fr = req->fr_list;
+ use_mr = !!target->fr_pool;
+ } else {
+ state->next_fmr = req->fmr_list;
+ use_mr = !!target->fmr_pool;
+ }
+
+ for_each_sg(scat, sg, count, i) {
+ if (srp_map_sg_entry(state, target, sg, i, use_mr)) {
+ /*
+ * Memory registration failed, so backtrack to the
+ * first unmapped entry and continue on without using
+ * memory registration.
+ */
+ dma_addr_t dma_addr;
+ unsigned int dma_len;
+
+backtrack:
+ sg = state->unmapped_sg;
+ i = state->unmapped_index;
+
+ dma_addr = ib_sg_dma_address(ibdev, sg);
+ dma_len = ib_sg_dma_len(ibdev, sg);
+ dma_len -= (state->unmapped_addr - dma_addr);
+ dma_addr = state->unmapped_addr;
+ use_mr = false;
+ srp_map_desc(state, dma_addr, dma_len, target->rkey);
+ }
+ }
+
+ if (use_mr && srp_finish_mapping(state, target))
+ goto backtrack;
+
+ req->nmdesc = state->nmdesc;
+
+ return 0;
+}
+
static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
struct srp_request *req)
{
- struct scatterlist *scat, *sg;
+ struct scatterlist *scat;
struct srp_cmd *cmd = req->cmd->buf;
- int i, len, nents, count, use_fmr;
+ int len, nents, count;
struct srp_device *dev;
struct ib_device *ibdev;
struct srp_map_state state;
@@ -1081,7 +1429,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
fmt = SRP_DATA_DESC_DIRECT;
len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
- if (count == 1) {
+ if (count == 1 && !register_always) {
/*
* The midlayer only generated a single gather/scatter
* entry, or DMA mapping coalesced everything to a
@@ -1094,13 +1442,13 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
buf->key = cpu_to_be32(target->rkey);
buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
- req->nfmr = 0;
+ req->nmdesc = 0;
goto map_complete;
}
- /* We have more than one scatter/gather entry, so build our indirect
- * descriptor table, trying to merge as many entries with FMR as we
- * can.
+ /*
+ * We have more than one scatter/gather entry, so build our indirect
+ * descriptor table, trying to merge as many entries as we can.
*/
indirect_hdr = (void *) cmd->add_data;
@@ -1108,35 +1456,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
target->indirect_size, DMA_TO_DEVICE);
memset(&state, 0, sizeof(state));
- state.desc = req->indirect_desc;
- state.pages = req->map_page;
- state.next_fmr = req->fmr_list;
-
- use_fmr = dev->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR;
-
- for_each_sg(scat, sg, count, i) {
- if (srp_map_sg_entry(&state, target, sg, i, use_fmr)) {
- /* FMR mapping failed, so backtrack to the first
- * unmapped entry and continue on without using FMR.
- */
- dma_addr_t dma_addr;
- unsigned int dma_len;
-
-backtrack:
- sg = state.unmapped_sg;
- i = state.unmapped_index;
-
- dma_addr = ib_sg_dma_address(ibdev, sg);
- dma_len = ib_sg_dma_len(ibdev, sg);
- dma_len -= (state.unmapped_addr - dma_addr);
- dma_addr = state.unmapped_addr;
- use_fmr = SRP_MAP_NO_FMR;
- srp_map_desc(&state, dma_addr, dma_len, target->rkey);
- }
- }
-
- if (use_fmr == SRP_MAP_ALLOW_FMR && srp_map_finish_fmr(&state, target))
- goto backtrack;
+ srp_map_sg(&state, target, req, scat, count);
/* We've mapped the request, now pull as much of the indirect
* descriptor table as we can into the command buffer. If this
@@ -1144,9 +1464,9 @@ backtrack:
* guaranteed to fit into the command, as the SCSI layer won't
* give us more S/G entries than we allow.
*/
- req->nfmr = state.nfmr;
if (state.ndesc == 1) {
- /* FMR mapping was able to collapse this to one entry,
+ /*
+ * Memory registration collapsed the sg-list into one entry,
* so use a direct descriptor.
*/
struct srp_direct_buf *buf = (void *) cmd->add_data;
@@ -1455,6 +1775,7 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
/**
* srp_tl_err_work() - handle a transport layer error
+ * @work: Work structure embedded in an SRP target port.
*
* Note: This function may get invoked before the rport has been created,
* hence the target->rport test.
@@ -1468,14 +1789,24 @@ static void srp_tl_err_work(struct work_struct *work)
srp_start_tl_fail_timers(target->rport);
}
-static void srp_handle_qp_err(enum ib_wc_status wc_status, bool send_err,
- struct srp_target_port *target)
+static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
+ bool send_err, struct srp_target_port *target)
{
if (target->connected && !target->qp_in_error) {
- shost_printk(KERN_ERR, target->scsi_host,
- PFX "failed %s status %d\n",
- send_err ? "send" : "receive",
- wc_status);
+ if (wr_id & LOCAL_INV_WR_ID_MASK) {
+ shost_printk(KERN_ERR, target->scsi_host, PFX
+ "LOCAL_INV failed with status %d\n",
+ wc_status);
+ } else if (wr_id & FAST_REG_WR_ID_MASK) {
+ shost_printk(KERN_ERR, target->scsi_host, PFX
+ "FAST_REG_MR failed status %d\n",
+ wc_status);
+ } else {
+ shost_printk(KERN_ERR, target->scsi_host,
+ PFX "failed %s status %d for iu %p\n",
+ send_err ? "send" : "receive",
+ wc_status, (void *)(uintptr_t)wr_id);
+ }
queue_work(system_long_wq, &target->tl_err_work);
}
target->qp_in_error = true;
@@ -1491,7 +1822,7 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
if (likely(wc.status == IB_WC_SUCCESS)) {
srp_handle_recv(target, &wc);
} else {
- srp_handle_qp_err(wc.status, false, target);
+ srp_handle_qp_err(wc.wr_id, wc.status, false, target);
}
}
}
@@ -1507,7 +1838,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
list_add(&iu->list, &target->free_tx);
} else {
- srp_handle_qp_err(wc.status, true, target);
+ srp_handle_qp_err(wc.wr_id, wc.status, true, target);
}
}
}
@@ -1521,7 +1852,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
struct srp_cmd *cmd;
struct ib_device *dev;
unsigned long flags;
- int len, result;
+ int len, ret;
const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
/*
@@ -1533,12 +1864,9 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
if (in_scsi_eh)
mutex_lock(&rport->mutex);
- result = srp_chkready(target->rport);
- if (unlikely(result)) {
- scmnd->result = result;
- scmnd->scsi_done(scmnd);
- goto unlock_rport;
- }
+ scmnd->result = srp_chkready(target->rport);
+ if (unlikely(scmnd->result))
+ goto err;
spin_lock_irqsave(&target->lock, flags);
iu = __srp_get_tx_iu(target, SRP_IU_CMD);
@@ -1553,7 +1881,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
DMA_TO_DEVICE);
- scmnd->result = 0;
scmnd->host_scribble = (void *) req;
cmd = iu->buf;
@@ -1570,7 +1897,15 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
len = srp_map_data(scmnd, target, req);
if (len < 0) {
shost_printk(KERN_ERR, target->scsi_host,
- PFX "Failed to map data\n");
+ PFX "Failed to map data (%d)\n", len);
+ /*
+ * If we ran out of memory descriptors (-ENOMEM) because an
+ * application is queuing many requests with more than
+ * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
+ * to reduce queue depth temporarily.
+ */
+ scmnd->result = len == -ENOMEM ?
+ DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
goto err_iu;
}
@@ -1582,11 +1917,13 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
goto err_unmap;
}
+ ret = 0;
+
unlock_rport:
if (in_scsi_eh)
mutex_unlock(&rport->mutex);
- return 0;
+ return ret;
err_unmap:
srp_unmap_data(scmnd, target, req);
@@ -1594,16 +1931,27 @@ err_unmap:
err_iu:
srp_put_tx_iu(target, iu, SRP_IU_CMD);
+ /*
+ * Avoid that the loops that iterate over the request ring can
+ * encounter a dangling SCSI command pointer.
+ */
+ req->scmnd = NULL;
+
spin_lock_irqsave(&target->lock, flags);
list_add(&req->list, &target->free_reqs);
err_unlock:
spin_unlock_irqrestore(&target->lock, flags);
- if (in_scsi_eh)
- mutex_unlock(&rport->mutex);
+err:
+ if (scmnd->result) {
+ scmnd->scsi_done(scmnd);
+ ret = 0;
+ } else {
+ ret = SCSI_MLQUEUE_HOST_BUSY;
+ }
- return SCSI_MLQUEUE_HOST_BUSY;
+ goto unlock_rport;
}
/*
@@ -2310,6 +2658,8 @@ static struct class srp_class = {
/**
* srp_conn_unique() - check whether the connection to a target is unique
+ * @host: SRP host.
+ * @target: SRP target port.
*/
static bool srp_conn_unique(struct srp_host *host,
struct srp_target_port *target)
@@ -2605,7 +2955,8 @@ static ssize_t srp_create_target(struct device *dev,
container_of(dev, struct srp_host, dev);
struct Scsi_Host *target_host;
struct srp_target_port *target;
- struct ib_device *ibdev = host->srp_dev->dev;
+ struct srp_device *srp_dev = host->srp_dev;
+ struct ib_device *ibdev = srp_dev->dev;
int ret;
target_host = scsi_host_alloc(&srp_template,
@@ -2650,9 +3001,9 @@ static ssize_t srp_create_target(struct device *dev,
goto err;
}
- if (!host->srp_dev->fmr_pool && !target->allow_ext_sg &&
- target->cmd_sg_cnt < target->sg_tablesize) {
- pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
+ if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
+ target->cmd_sg_cnt < target->sg_tablesize) {
+ pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
target->sg_tablesize = target->cmd_sg_cnt;
}
@@ -2790,9 +3141,9 @@ static void srp_add_one(struct ib_device *device)
{
struct srp_device *srp_dev;
struct ib_device_attr *dev_attr;
- struct ib_fmr_pool_param fmr_param;
struct srp_host *host;
- int max_pages_per_fmr, fmr_page_shift, s, e, p;
+ int mr_page_shift, s, e, p;
+ u64 max_pages_per_mr;
dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
if (!dev_attr)
@@ -2807,15 +3158,39 @@ static void srp_add_one(struct ib_device *device)
if (!srp_dev)
goto free_attr;
+ srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
+ device->map_phys_fmr && device->unmap_fmr);
+ srp_dev->has_fr = (dev_attr->device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS);
+ if (!srp_dev->has_fmr && !srp_dev->has_fr)
+ dev_warn(&device->dev, "neither FMR nor FR is supported\n");
+
+ srp_dev->use_fast_reg = (srp_dev->has_fr &&
+ (!srp_dev->has_fmr || prefer_fr));
+
/*
* Use the smallest page size supported by the HCA, down to a
* minimum of 4096 bytes. We're unlikely to build large sglists
* out of smaller entries.
*/
- fmr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
- srp_dev->fmr_page_size = 1 << fmr_page_shift;
- srp_dev->fmr_page_mask = ~((u64) srp_dev->fmr_page_size - 1);
- srp_dev->fmr_max_size = srp_dev->fmr_page_size * SRP_FMR_SIZE;
+ mr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
+ srp_dev->mr_page_size = 1 << mr_page_shift;
+ srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
+ max_pages_per_mr = dev_attr->max_mr_size;
+ do_div(max_pages_per_mr, srp_dev->mr_page_size);
+ srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
+ max_pages_per_mr);
+ if (srp_dev->use_fast_reg) {
+ srp_dev->max_pages_per_mr =
+ min_t(u32, srp_dev->max_pages_per_mr,
+ dev_attr->max_fast_reg_page_list_len);
+ }
+ srp_dev->mr_max_size = srp_dev->mr_page_size *
+ srp_dev->max_pages_per_mr;
+ pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
+ device->name, mr_page_shift, dev_attr->max_mr_size,
+ dev_attr->max_fast_reg_page_list_len,
+ srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
INIT_LIST_HEAD(&srp_dev->dev_list);
@@ -2831,27 +3206,6 @@ static void srp_add_one(struct ib_device *device)
if (IS_ERR(srp_dev->mr))
goto err_pd;
- for (max_pages_per_fmr = SRP_FMR_SIZE;
- max_pages_per_fmr >= SRP_FMR_MIN_SIZE;
- max_pages_per_fmr /= 2, srp_dev->fmr_max_size /= 2) {
- memset(&fmr_param, 0, sizeof fmr_param);
- fmr_param.pool_size = SRP_FMR_POOL_SIZE;
- fmr_param.dirty_watermark = SRP_FMR_DIRTY_SIZE;
- fmr_param.cache = 1;
- fmr_param.max_pages_per_fmr = max_pages_per_fmr;
- fmr_param.page_shift = fmr_page_shift;
- fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ);
-
- srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param);
- if (!IS_ERR(srp_dev->fmr_pool))
- break;
- }
-
- if (IS_ERR(srp_dev->fmr_pool))
- srp_dev->fmr_pool = NULL;
-
if (device->node_type == RDMA_NODE_IB_SWITCH) {
s = 0;
e = 0;
@@ -2914,8 +3268,6 @@ static void srp_remove_one(struct ib_device *device)
kfree(host);
}
- if (srp_dev->fmr_pool)
- ib_destroy_fmr_pool(srp_dev->fmr_pool);
ib_dereg_mr(srp_dev->mr);
ib_dealloc_pd(srp_dev->pd);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index aad27b7..e46ecb1 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -66,13 +66,10 @@ enum {
SRP_TAG_NO_REQ = ~0U,
SRP_TAG_TSK_MGMT = 1U << 31,
- SRP_FMR_SIZE = 512,
- SRP_FMR_MIN_SIZE = 128,
- SRP_FMR_POOL_SIZE = 1024,
- SRP_FMR_DIRTY_SIZE = SRP_FMR_POOL_SIZE / 4,
+ SRP_MAX_PAGES_PER_MR = 512,
- SRP_MAP_ALLOW_FMR = 0,
- SRP_MAP_NO_FMR = 1,
+ LOCAL_INV_WR_ID_MASK = 1,
+ FAST_REG_WR_ID_MASK = 2,
};
enum srp_target_state {
@@ -86,15 +83,24 @@ enum srp_iu_type {
SRP_IU_RSP,
};
+/*
+ * @mr_page_mask: HCA memory registration page mask.
+ * @mr_page_size: HCA memory registration page size.
+ * @mr_max_size: Maximum size in bytes of a single FMR / FR registration
+ * request.
+ */
struct srp_device {
struct list_head dev_list;
struct ib_device *dev;
struct ib_pd *pd;
struct ib_mr *mr;
- struct ib_fmr_pool *fmr_pool;
- u64 fmr_page_mask;
- int fmr_page_size;
- int fmr_max_size;
+ u64 mr_page_mask;
+ int mr_page_size;
+ int mr_max_size;
+ int max_pages_per_mr;
+ bool has_fmr;
+ bool has_fr;
+ bool use_fast_reg;
};
struct srp_host {
@@ -112,11 +118,14 @@ struct srp_request {
struct list_head list;
struct scsi_cmnd *scmnd;
struct srp_iu *cmd;
- struct ib_pool_fmr **fmr_list;
+ union {
+ struct ib_pool_fmr **fmr_list;
+ struct srp_fr_desc **fr_list;
+ };
u64 *map_page;
struct srp_direct_buf *indirect_desc;
dma_addr_t indirect_dma_addr;
- short nfmr;
+ short nmdesc;
short index;
};
@@ -131,6 +140,10 @@ struct srp_target_port {
struct ib_cq *send_cq ____cacheline_aligned_in_smp;
struct ib_cq *recv_cq;
struct ib_qp *qp;
+ union {
+ struct ib_fmr_pool *fmr_pool;
+ struct srp_fr_pool *fr_pool;
+ };
u32 lkey;
u32 rkey;
enum srp_target_state state;
@@ -197,15 +210,66 @@ struct srp_iu {
enum dma_data_direction direction;
};
+/**
+ * struct srp_fr_desc - fast registration work request arguments
+ * @entry: Entry in srp_fr_pool.free_list.
+ * @mr: Memory region.
+ * @frpl: Fast registration page list.
+ */
+struct srp_fr_desc {
+ struct list_head entry;
+ struct ib_mr *mr;
+ struct ib_fast_reg_page_list *frpl;
+};
+
+/**
+ * struct srp_fr_pool - pool of fast registration descriptors
+ *
+ * An entry is available for allocation if and only if it occurs in @free_list.
+ *
+ * @size: Number of descriptors in this pool.
+ * @max_page_list_len: Maximum fast registration work request page list length.
+ * @lock: Protects free_list.
+ * @free_list: List of free descriptors.
+ * @desc: Fast registration descriptor pool.
+ */
+struct srp_fr_pool {
+ int size;
+ int max_page_list_len;
+ spinlock_t lock;
+ struct list_head free_list;
+ struct srp_fr_desc desc[0];
+};
+
+/**
+ * struct srp_map_state - per-request DMA memory mapping state
+ * @desc: Pointer to the element of the SRP buffer descriptor array
+ * that is being filled in.
+ * @pages: Array with DMA addresses of pages being considered for
+ * memory registration.
+ * @base_dma_addr: DMA address of the first page that has not yet been mapped.
+ * @dma_len: Number of bytes that will be registered with the next
+ * FMR or FR memory registration call.
+ * @total_len: Total number of bytes in the sg-list being mapped.
+ * @npages: Number of page addresses in the pages[] array.
+ * @nmdesc: Number of FMR or FR memory descriptors used for mapping.
+ * @ndesc: Number of SRP buffer descriptors that have been filled in.
+ * @unmapped_sg: First element of the sg-list that is mapped via FMR or FR.
+ * @unmapped_index: Index of the first element mapped via FMR or FR.
+ * @unmapped_addr: DMA address of the first element mapped via FMR or FR.
+ */
struct srp_map_state {
- struct ib_pool_fmr **next_fmr;
+ union {
+ struct ib_pool_fmr **next_fmr;
+ struct srp_fr_desc **next_fr;
+ };
struct srp_direct_buf *desc;
u64 *pages;
dma_addr_t base_dma_addr;
- u32 fmr_len;
+ u32 dma_len;
u32 total_len;
unsigned int npages;
- unsigned int nfmr;
+ unsigned int nmdesc;
unsigned int ndesc;
struct scatterlist *unmapped_sg;
int unmapped_index;
OpenPOWER on IntegriCloud