summaryrefslogtreecommitdiffstats
path: root/sys/ofed
diff options
context:
space:
mode:
Diffstat (limited to 'sys/ofed')
-rw-r--r--sys/ofed/drivers/infiniband/core/addr.c2
-rw-r--r--sys/ofed/drivers/infiniband/core/cma.c2
-rw-r--r--sys/ofed/drivers/infiniband/core/core_priv.h3
-rw-r--r--sys/ofed/drivers/infiniband/core/device.c24
-rw-r--r--sys/ofed/drivers/infiniband/core/sa_query.c21
-rw-r--r--sys/ofed/drivers/infiniband/core/sysfs.c291
-rw-r--r--sys/ofed/drivers/infiniband/core/uverbs_cmd.c30
-rw-r--r--sys/ofed/drivers/infiniband/core/uverbs_main.c9
-rw-r--r--sys/ofed/drivers/infiniband/core/verbs.c28
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/Kconfig2
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/Makefile33
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/ah.c52
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c688
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/cm.c440
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/cq.c160
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/mad.c2014
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/main.c1826
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/mcg.c1254
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h489
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/mr.c515
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/qp.c2212
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/srq.c69
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c800
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/user.h12
-rw-r--r--sys/ofed/drivers/infiniband/hw/mlx4/wc.c1
-rw-r--r--sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.c2
-rw-r--r--sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c2
-rw-r--r--sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c1
-rw-r--r--sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c4
-rw-r--r--sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h3
-rw-r--r--sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c17
-rw-r--r--sys/ofed/drivers/net/mlx4/Makefile37
-rw-r--r--sys/ofed/drivers/net/mlx4/alloc.c45
-rw-r--r--sys/ofed/drivers/net/mlx4/catas.c50
-rw-r--r--sys/ofed/drivers/net/mlx4/cmd.c1783
-rw-r--r--sys/ofed/drivers/net/mlx4/cq.c220
-rw-r--r--sys/ofed/drivers/net/mlx4/en_cq.c6
-rw-r--r--sys/ofed/drivers/net/mlx4/en_main.c22
-rw-r--r--sys/ofed/drivers/net/mlx4/en_netdev.c10
-rw-r--r--sys/ofed/drivers/net/mlx4/en_port.c33
-rw-r--r--sys/ofed/drivers/net/mlx4/en_port.h7
-rw-r--r--sys/ofed/drivers/net/mlx4/en_rx.c6
-rw-r--r--sys/ofed/drivers/net/mlx4/en_tx.c4
-rw-r--r--sys/ofed/drivers/net/mlx4/eq.c893
-rw-r--r--sys/ofed/drivers/net/mlx4/fw.c1002
-rw-r--r--sys/ofed/drivers/net/mlx4/fw.h50
-rw-r--r--sys/ofed/drivers/net/mlx4/icm.c151
-rw-r--r--sys/ofed/drivers/net/mlx4/icm.h20
-rw-r--r--sys/ofed/drivers/net/mlx4/intf.c53
-rw-r--r--sys/ofed/drivers/net/mlx4/main.c2050
-rw-r--r--sys/ofed/drivers/net/mlx4/mcg.c1188
-rw-r--r--sys/ofed/drivers/net/mlx4/mlx4.h960
-rw-r--r--sys/ofed/drivers/net/mlx4/mlx4_en.h11
-rw-r--r--sys/ofed/drivers/net/mlx4/mr.c520
-rw-r--r--sys/ofed/drivers/net/mlx4/pd.c107
-rw-r--r--sys/ofed/drivers/net/mlx4/port.c809
-rw-r--r--sys/ofed/drivers/net/mlx4/profile.c40
-rw-r--r--sys/ofed/drivers/net/mlx4/qp.c364
-rw-r--r--sys/ofed/drivers/net/mlx4/reset.c7
-rw-r--r--sys/ofed/drivers/net/mlx4/resource_tracker.c4315
-rw-r--r--sys/ofed/drivers/net/mlx4/sense.c46
-rw-r--r--sys/ofed/drivers/net/mlx4/srq.c160
-rw-r--r--sys/ofed/drivers/net/mlx4/sys_tune.c325
-rw-r--r--sys/ofed/include/asm/atomic.h22
-rw-r--r--sys/ofed/include/asm/byteorder.h1
-rw-r--r--sys/ofed/include/linux/atomic.h53
-rw-r--r--sys/ofed/include/linux/bitops.h166
-rw-r--r--sys/ofed/include/linux/clocksource.h17
-rw-r--r--sys/ofed/include/linux/compat.h3
-rw-r--r--sys/ofed/include/linux/device.h6
-rw-r--r--sys/ofed/include/linux/dma-mapping.h7
-rw-r--r--sys/ofed/include/linux/gfp.h4
-rw-r--r--sys/ofed/include/linux/idr.h4
-rw-r--r--sys/ofed/include/linux/if_ether.h5
-rw-r--r--sys/ofed/include/linux/in6.h2
-rw-r--r--sys/ofed/include/linux/kernel.h70
-rw-r--r--sys/ofed/include/linux/linux_compat.c2
-rw-r--r--sys/ofed/include/linux/list.h3
-rw-r--r--sys/ofed/include/linux/log2.h115
-rw-r--r--sys/ofed/include/linux/mlx4/cmd.h70
-rw-r--r--sys/ofed/include/linux/mlx4/cq.h18
-rw-r--r--sys/ofed/include/linux/mlx4/device.h744
-rw-r--r--sys/ofed/include/linux/mlx4/driver.h40
-rw-r--r--sys/ofed/include/linux/mlx4/qp.h71
-rw-r--r--sys/ofed/include/linux/mlx4/srq.h12
-rw-r--r--sys/ofed/include/linux/moduleparam.h3
-rw-r--r--sys/ofed/include/linux/pci.h107
-rw-r--r--sys/ofed/include/linux/sysfs.h6
-rw-r--r--sys/ofed/include/linux/types.h2
-rw-r--r--sys/ofed/include/linux/workqueue.h21
-rw-r--r--sys/ofed/include/rdma/ib_cm.h15
-rw-r--r--sys/ofed/include/rdma/ib_mad.h2
-rw-r--r--sys/ofed/include/rdma/ib_pma.h156
-rw-r--r--sys/ofed/include/rdma/ib_sa.h34
-rw-r--r--sys/ofed/include/rdma/ib_user_verbs.h91
-rw-r--r--sys/ofed/include/rdma/ib_verbs.h253
96 files changed, 24925 insertions, 3500 deletions
diff --git a/sys/ofed/drivers/infiniband/core/addr.c b/sys/ofed/drivers/infiniband/core/addr.c
index a467807..0048c7c 100644
--- a/sys/ofed/drivers/infiniband/core/addr.c
+++ b/sys/ofed/drivers/infiniband/core/addr.c
@@ -356,7 +356,7 @@ static int addr_resolve(struct sockaddr *src_in,
u_char edst[MAX_ADDR_LEN];
int multi;
int bcast;
- int error;
+ int error = 0;
/*
* Determine whether the address is unicast, multicast, or broadcast
diff --git a/sys/ofed/drivers/infiniband/core/cma.c b/sys/ofed/drivers/infiniband/core/cma.c
index 34419f3..318beb1 100644
--- a/sys/ofed/drivers/infiniband/core/cma.c
+++ b/sys/ofed/drivers/infiniband/core/cma.c
@@ -2957,7 +2957,7 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
0xFF10A01B)) {
/* IPv6 address is an SA assigned MGID. */
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
- } else if ((addr->sa_family == AF_INET6)) {
+ } else if (addr->sa_family == AF_INET6) {
ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
mc_map[7] = 0x01; /* Use RDMA CM signature */
diff --git a/sys/ofed/drivers/infiniband/core/core_priv.h b/sys/ofed/drivers/infiniband/core/core_priv.h
index 05ac36e..08c4bbb 100644
--- a/sys/ofed/drivers/infiniband/core/core_priv.h
+++ b/sys/ofed/drivers/infiniband/core/core_priv.h
@@ -38,7 +38,8 @@
#include <rdma/ib_verbs.h>
-int ib_device_register_sysfs(struct ib_device *device);
+int ib_device_register_sysfs(struct ib_device *device, int (*port_callback)(struct ib_device *,
+ u8, struct kobject *));
void ib_device_unregister_sysfs(struct ib_device *device);
int ib_sysfs_setup(void);
diff --git a/sys/ofed/drivers/infiniband/core/device.c b/sys/ofed/drivers/infiniband/core/device.c
index 9d34bb6..6c2c2f4 100644
--- a/sys/ofed/drivers/infiniband/core/device.c
+++ b/sys/ofed/drivers/infiniband/core/device.c
@@ -273,7 +273,9 @@ out:
* callback for each device that is added. @device must be allocated
* with ib_alloc_device().
*/
-int ib_register_device(struct ib_device *device)
+int ib_register_device(struct ib_device *device,
+ int (*port_callback)(struct ib_device *,
+ u8, struct kobject *))
{
int ret;
@@ -294,8 +296,6 @@ int ib_register_device(struct ib_device *device)
INIT_LIST_HEAD(&device->client_data_list);
spin_lock_init(&device->event_handler_lock);
spin_lock_init(&device->client_data_lock);
- device->ib_uverbs_xrcd_table = RB_ROOT;
- mutex_init(&device->xrcd_table_mutex);
ret = read_port_table_lengths(device);
if (ret) {
@@ -304,7 +304,7 @@ int ib_register_device(struct ib_device *device)
goto out;
}
- ret = ib_device_register_sysfs(device);
+ ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
printk(KERN_WARNING "Couldn't register device %s with driver model\n",
device->name);
@@ -752,3 +752,19 @@ static void __exit ib_core_cleanup(void)
module_init(ib_core_init);
module_exit(ib_core_cleanup);
+
+#undef MODULE_VERSION
+#include <sys/module.h>
+static int
+ibcore_evhand(module_t mod, int event, void *arg)
+{
+ return (0);
+}
+
+static moduledata_t ibcore_mod = {
+ .name = "ibcore",
+ .evhand = ibcore_evhand,
+};
+
+MODULE_VERSION(ibcore, 1);
+DECLARE_MODULE(ibcore, ibcore_mod, SI_SUB_SMP, SI_ORDER_ANY);
diff --git a/sys/ofed/drivers/infiniband/core/sa_query.c b/sys/ofed/drivers/infiniband/core/sa_query.c
index 0fc1c0e..f36dbd6 100644
--- a/sys/ofed/drivers/infiniband/core/sa_query.c
+++ b/sys/ofed/drivers/infiniband/core/sa_query.c
@@ -1105,6 +1105,27 @@ static void ib_sa_inform_release(struct ib_sa_query *sa_query)
kfree(container_of(sa_query, struct ib_sa_inform_query, sa_query));
}
+int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_guidinfo_rec *rec,
+ ib_sa_comp_mask comp_mask, u8 method,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_guidinfo_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
+{
+ // stub function -
+ // called originally from mad.c under mlx4_ib_init_sriov()
+ // which calls mlx4_ib_init_alias_guid_service() in alias_GUID.c
+ // which goes down to this function
+
+ printk("ERROR: function should be called only in SRIOV flow!!!");
+
+ return 0;
+}
+
/**
* ib_sa_informinfo_query - Start an InformInfo registration.
* @client:SA client
diff --git a/sys/ofed/drivers/infiniband/core/sysfs.c b/sys/ofed/drivers/infiniband/core/sysfs.c
index a406406..7c9b4b2 100644
--- a/sys/ofed/drivers/infiniband/core/sysfs.c
+++ b/sys/ofed/drivers/infiniband/core/sysfs.c
@@ -38,6 +38,7 @@
#include <linux/string.h>
#include <rdma/ib_mad.h>
+#include <rdma/ib_pma.h>
struct ib_port {
struct kobject kobj;
@@ -103,7 +104,7 @@ static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
return ret;
return sprintf(buf, "%d: %s\n", attr.state,
- attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ?
+ attr.state < ARRAY_SIZE(state_name) ?
state_name[attr.state] : "UNKNOWN");
}
@@ -292,118 +293,124 @@ static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
return sprintf(buf, "0x%04x\n", pkey);
}
-#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
-struct port_table_attribute port_pma_attr_##_name = { \
- .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
- .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
-}
-
-static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
- char *buf)
+static ssize_t get_pma_counters(struct ib_port *p, struct port_attribute *attr,
+ char *buf, int c_ext)
{
- struct port_table_attribute *tab_attr =
- container_of(attr, struct port_table_attribute, attr);
- int offset = tab_attr->index & 0xffff;
- int width = (tab_attr->index >> 16) & 0xff;
- struct ib_mad *in_mad = NULL;
- struct ib_mad *out_mad = NULL;
- ssize_t ret;
-
- if (!p->ibdev->process_mad)
- return sprintf(buf, "N/A (no PMA)\n");
-
- in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
- out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
- if (!in_mad || !out_mad) {
- ret = -ENOMEM;
- goto out;
- }
-
- in_mad->mad_hdr.base_version = 1;
- in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
- in_mad->mad_hdr.class_version = 1;
- in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
- in_mad->mad_hdr.attr_id = cpu_to_be16(0x12); /* PortCounters */
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ int offset = tab_attr->index & 0xffff;
+ int width = (tab_attr->index >> 16) & 0xff;
+ struct ib_mad *in_mad = NULL;
+ struct ib_mad *out_mad = NULL;
+ ssize_t ret;
+
+ if (!p->ibdev->process_mad)
+ return -ENXIO;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ in_mad->mad_hdr.base_version = 1;
+ in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
+ in_mad->mad_hdr.class_version = 1;
+ in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
+ if (c_ext)
+ in_mad->mad_hdr.attr_id = IB_PMA_PORT_COUNTERS_EXT;
+ else
+ in_mad->mad_hdr.attr_id = IB_PMA_PORT_COUNTERS;
+
+ in_mad->data[41] = p->port_num; /* PortSelect field */
+
+ if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
+ p->port_num, NULL, NULL, in_mad, out_mad) &
+ (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
+ (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ switch (width) {
+ case 4:
+ ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
+ (4 - (offset % 8))) & 0xf);
+ break;
+ case 8:
+ ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
+ break;
+ case 16:
+ ret = sprintf(buf, "%u\n",
+ be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
+ break;
+ case 32:
+ ret = sprintf(buf, "%u\n",
+ be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
+ break;
+ case 64:
+ ret = sprintf(buf, "%llu\n", (unsigned long long)
+ be64_to_cpup((__be64 *)(out_mad->data + 40 + offset / 8)));
+ break;
+ default:
+ ret = 0;
+ }
- in_mad->data[41] = p->port_num; /* PortSelect field */
-
- if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
- p->port_num, NULL, NULL, in_mad, out_mad) &
- (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
- (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
- ret = -EINVAL;
- goto out;
- }
+out:
+ kfree(in_mad);
+ kfree(out_mad);
- switch (width) {
- case 4:
- ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
- (4 - (offset % 8))) & 0xf);
- break;
- case 8:
- ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
- break;
- case 16:
- ret = sprintf(buf, "%u\n",
- be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
- break;
- case 32:
- ret = sprintf(buf, "%u\n",
- be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
- break;
- default:
- ret = 0;
- }
+ return ret;
+}
-out:
- kfree(in_mad);
- kfree(out_mad);
+#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
+struct port_table_attribute port_pma_attr_##_name = { \
+ .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
+ .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
+}
- return ret;
+static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
+ char *buf)
+{
+ return get_pma_counters(p, attr, buf, 0);
}
-static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
-static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48);
-static PORT_PMA_ATTR(link_downed , 2, 8, 56);
-static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64);
+static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
+static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48);
+static PORT_PMA_ATTR(link_downed , 2, 8, 56);
+static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64);
static PORT_PMA_ATTR(port_rcv_remote_physical_errors, 4, 16, 80);
static PORT_PMA_ATTR(port_rcv_switch_relay_errors , 5, 16, 96);
-static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112);
+static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112);
static PORT_PMA_ATTR(port_xmit_constraint_errors , 7, 8, 128);
-static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136);
+static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136);
static PORT_PMA_ATTR(local_link_integrity_errors , 9, 4, 152);
static PORT_PMA_ATTR(excessive_buffer_overrun_errors, 10, 4, 156);
-static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176);
-static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192);
-static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224);
-static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
-static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
-/*
- * There is no bit allocated for port_xmit_wait in the CounterSelect field
- * (IB spec). However, since this bit is ignored when reading
- * (show_pma_counter), the _counter field of port_xmit_wait can be set to zero.
- */
-static PORT_PMA_ATTR(port_xmit_wait , 0, 32, 320);
+static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176);
+static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192);
+static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224);
+static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
+static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
static struct attribute *pma_attrs[] = {
- &port_pma_attr_symbol_error.attr.attr,
- &port_pma_attr_link_error_recovery.attr.attr,
- &port_pma_attr_link_downed.attr.attr,
- &port_pma_attr_port_rcv_errors.attr.attr,
- &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
- &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
- &port_pma_attr_port_xmit_discards.attr.attr,
- &port_pma_attr_port_xmit_constraint_errors.attr.attr,
- &port_pma_attr_port_rcv_constraint_errors.attr.attr,
- &port_pma_attr_local_link_integrity_errors.attr.attr,
- &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
- &port_pma_attr_VL15_dropped.attr.attr,
- &port_pma_attr_port_xmit_data.attr.attr,
- &port_pma_attr_port_rcv_data.attr.attr,
- &port_pma_attr_port_xmit_packets.attr.attr,
- &port_pma_attr_port_rcv_packets.attr.attr,
- &port_pma_attr_port_xmit_wait.attr.attr,
- NULL
+ &port_pma_attr_symbol_error.attr.attr,
+ &port_pma_attr_link_error_recovery.attr.attr,
+ &port_pma_attr_link_downed.attr.attr,
+ &port_pma_attr_port_rcv_errors.attr.attr,
+ &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+ &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+ &port_pma_attr_port_xmit_discards.attr.attr,
+ &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+ &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+ &port_pma_attr_local_link_integrity_errors.attr.attr,
+ &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+ &port_pma_attr_VL15_dropped.attr.attr,
+ &port_pma_attr_port_xmit_data.attr.attr,
+ &port_pma_attr_port_rcv_data.attr.attr,
+ &port_pma_attr_port_xmit_packets.attr.attr,
+ &port_pma_attr_port_rcv_packets.attr.attr,
+ NULL
};
static struct attribute_group pma_group = {
@@ -411,6 +418,44 @@ static struct attribute_group pma_group = {
.attrs = pma_attrs
};
+#define PORT_PMA_ATTR_EXT(_name, _counter, _width, _offset) \
+struct port_table_attribute port_pma_attr_ext_##_name = { \
+ .attr = __ATTR(_name, S_IRUGO, show_pma_counter_ext, NULL), \
+ .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
+}
+
+static ssize_t show_pma_counter_ext(struct ib_port *p,
+ struct port_attribute *attr, char *buf)
+{
+ return get_pma_counters(p, attr, buf, 1);
+}
+
+static PORT_PMA_ATTR_EXT(port_xmit_data_64 , 0, 64, 64);
+static PORT_PMA_ATTR_EXT(port_rcv_data_64 , 0, 64, 128);
+static PORT_PMA_ATTR_EXT(port_xmit_packets_64 , 0, 64, 192);
+static PORT_PMA_ATTR_EXT(port_rcv_packets_64 , 0, 64, 256);
+static PORT_PMA_ATTR_EXT(port_unicast_xmit_packets , 0, 64, 320);
+static PORT_PMA_ATTR_EXT(port_unicast_rcv_packets , 0, 64, 384);
+static PORT_PMA_ATTR_EXT(port_multicast_xmit_packets , 0, 64, 448);
+static PORT_PMA_ATTR_EXT(port_multicast_rcv_packets , 0, 64, 512);
+
+static struct attribute *pma_attrs_ext[] = {
+ &port_pma_attr_ext_port_xmit_data_64.attr.attr,
+ &port_pma_attr_ext_port_rcv_data_64.attr.attr,
+ &port_pma_attr_ext_port_xmit_packets_64.attr.attr,
+ &port_pma_attr_ext_port_rcv_packets_64.attr.attr,
+ &port_pma_attr_ext_port_unicast_xmit_packets.attr.attr,
+ &port_pma_attr_ext_port_unicast_rcv_packets.attr.attr,
+ &port_pma_attr_ext_port_multicast_xmit_packets.attr.attr,
+ &port_pma_attr_ext_port_multicast_rcv_packets.attr.attr,
+ NULL
+};
+
+static struct attribute_group pma_ext_group = {
+ .name = "counters_ext",
+ .attrs = pma_attrs_ext
+};
+
static void ib_port_release(struct kobject *kobj)
{
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
@@ -503,7 +548,9 @@ err:
return NULL;
}
-static int add_port(struct ib_device *device, int port_num)
+static int add_port(struct ib_device *device, int port_num,
+ int (*port_callback)(struct ib_device *,
+ u8, struct kobject *))
{
struct ib_port *p;
struct ib_port_attr attr;
@@ -522,7 +569,7 @@ static int add_port(struct ib_device *device, int port_num)
p->port_num = port_num;
ret = kobject_init_and_add(&p->kobj, &port_type,
- device->ports_parent,
+ kobject_get(device->ports_parent),
"%d", port_num);
if (ret)
goto err_put;
@@ -531,10 +578,14 @@ static int add_port(struct ib_device *device, int port_num)
if (ret)
goto err_put;
+ ret = sysfs_create_group(&p->kobj, &pma_ext_group);
+ if (ret)
+ goto err_remove_pma;
+
p->gid_group.name = "gids";
p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
if (!p->gid_group.attrs)
- goto err_remove_pma;
+ goto err_remove_pma_ext;
ret = sysfs_create_group(&p->kobj, &p->gid_group);
if (ret)
@@ -550,6 +601,12 @@ static int add_port(struct ib_device *device, int port_num)
if (ret)
goto err_free_pkey;
+ if (port_callback) {
+ ret = port_callback(device, port_num, &p->kobj);
+ if (ret)
+ goto err_remove_pkey;
+ }
+
list_add_tail(&p->kobj.entry, &device->port_list);
#ifdef __linux__
@@ -557,6 +614,9 @@ static int add_port(struct ib_device *device, int port_num)
#endif
return 0;
+err_remove_pkey:
+ sysfs_remove_group(&p->kobj, &p->pkey_group);
+
err_free_pkey:
for (i = 0; i < attr.pkey_tbl_len; ++i)
kfree(p->pkey_group.attrs[i]);
@@ -572,6 +632,9 @@ err_free_gid:
kfree(p->gid_group.attrs);
+err_remove_pma_ext:
+ sysfs_remove_group(&p->kobj, &pma_ext_group);
+
err_remove_pma:
sysfs_remove_group(&p->kobj, &pma_group);
@@ -786,16 +849,17 @@ static struct attribute_group iw_stats_group = {
.attrs = iw_proto_stats_attrs,
};
-int ib_device_register_sysfs(struct ib_device *device)
+int ib_device_register_sysfs(struct ib_device *device,
+ int (*port_callback)(struct ib_device *, u8, struct kobject *))
{
struct device *class_dev = &device->dev;
int ret;
int i;
class_dev->class = &ib_class;
- class_dev->driver_data = device;
class_dev->parent = device->dma_device;
- dev_set_name(class_dev, device->name);
+ dev_set_name(class_dev, device->name);
+ dev_set_drvdata(class_dev, device);
INIT_LIST_HEAD(&device->port_list);
@@ -810,19 +874,19 @@ int ib_device_register_sysfs(struct ib_device *device)
}
device->ports_parent = kobject_create_and_add("ports",
- &class_dev->kobj);
- if (!device->ports_parent) {
+ kobject_get(&class_dev->kobj));
+ if (!device->ports_parent) {
ret = -ENOMEM;
goto err_put;
}
if (device->node_type == RDMA_NODE_IB_SWITCH) {
- ret = add_port(device, 0);
+ ret = add_port(device, 0, port_callback);
if (ret)
goto err_put;
} else {
for (i = 1; i <= device->phys_port_cnt; ++i) {
- ret = add_port(device, i);
+ ret = add_port(device, i, port_callback);
if (ret)
goto err_put;
}
@@ -864,10 +928,15 @@ void ib_device_unregister_sysfs(struct ib_device *device)
{
struct kobject *p, *t;
struct ib_port *port;
+ int i;
/* Hold kobject until ib_dealloc_device() */
kobject_get(&device->dev.kobj);
+ for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
+ device_remove_file(&device->dev, ib_class_attributes[i]);
+ }
+
list_for_each_entry_safe(p, t, &device->port_list, entry) {
list_del(&p->entry);
port = container_of(p, struct ib_port, kobj);
@@ -891,7 +960,7 @@ void ib_sysfs_cleanup(void)
class_unregister(&ib_class);
}
-int ib_sysfs_create_port_files(struct ib_device *device,
+/*int ib_sysfs_create_port_files(struct ib_device *device,
int (*create)(struct ib_device *dev, u8 port_num,
struct kobject *kobj))
{
@@ -908,4 +977,4 @@ int ib_sysfs_create_port_files(struct ib_device *device,
return ret;
}
-EXPORT_SYMBOL(ib_sysfs_create_port_files);
+EXPORT_SYMBOL(ib_sysfs_create_port_files);*/
diff --git a/sys/ofed/drivers/infiniband/core/uverbs_cmd.c b/sys/ofed/drivers/infiniband/core/uverbs_cmd.c
index 3520182..9946c71 100644
--- a/sys/ofed/drivers/infiniband/core/uverbs_cmd.c
+++ b/sys/ofed/drivers/infiniband/core/uverbs_cmd.c
@@ -312,7 +312,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&ucontext->qp_list);
INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list);
- INIT_LIST_HEAD(&ucontext->xrc_domain_list);
+ INIT_LIST_HEAD(&ucontext->xrcd_list);
ucontext->closing = 0;
resp.num_comp_vectors = file->device->num_comp_vectors;
@@ -633,7 +633,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
}
mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
- cmd.access_flags, &udata);
+ cmd.access_flags, &udata, 0);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto err_put;
@@ -1087,7 +1087,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
attr.srq = srq;
attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
attr.qp_type = cmd.qp_type;
- attr.xrc_domain = xrcd;
+ attr.xrcd = xrcd;
attr.create_flags = 0;
attr.cap.max_send_wr = cmd.max_send_wr;
@@ -1115,14 +1115,14 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
qp->event_handler = attr.event_handler;
qp->qp_context = attr.qp_context;
qp->qp_type = attr.qp_type;
- qp->xrcd = attr.xrc_domain;
+ qp->xrcd = attr.xrcd;
atomic_inc(&pd->usecnt);
atomic_inc(&attr.send_cq->usecnt);
atomic_inc(&attr.recv_cq->usecnt);
if (attr.srq)
atomic_inc(&attr.srq->usecnt);
- else if (attr.xrc_domain)
- atomic_inc(&attr.xrc_domain->usecnt);
+ else if (attr.xrcd)
+ atomic_inc(&attr.xrcd->usecnt);
obj->uevent.uobject.object = qp;
ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
@@ -2032,8 +2032,8 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
srq->uobject = &obj->uobject;
srq->event_handler = attr.event_handler;
srq->srq_context = attr.srq_context;
- srq->xrc_cq = NULL;
- srq->xrcd = NULL;
+ srq->ext.xrc.cq = NULL;
+ srq->ext.xrc.xrcd = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&srq->usecnt, 0);
@@ -2083,7 +2083,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
- struct ib_uverbs_create_xrc_srq cmd;
+ struct ib_uverbs_create_xsrq cmd;
struct ib_uverbs_create_srq_resp resp;
struct ib_udata udata;
struct ib_uevent_object *obj;
@@ -2119,7 +2119,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
goto err;
}
- xrc_cq = idr_read_cq(cmd.xrc_cq, file->ucontext, 0);
+ xrc_cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
if (!xrc_cq) {
ret = -EINVAL;
goto err_put_pd;
@@ -2152,8 +2152,8 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
srq->uobject = &obj->uobject;
srq->event_handler = attr.event_handler;
srq->srq_context = attr.srq_context;
- srq->xrc_cq = xrc_cq;
- srq->xrcd = xrcd;
+ srq->ext.xrc.cq = xrc_cq;
+ srq->ext.xrc.xrcd = xrcd;
atomic_inc(&pd->usecnt);
atomic_inc(&xrc_cq->usecnt);
atomic_inc(&xrcd->usecnt);
@@ -2528,7 +2528,7 @@ ssize_t ib_uverbs_open_xrc_domain(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&xrcd_uobj->xrc_reg_qp_list);
mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->xrc_domain_list);
+ list_add_tail(&uobj->list, &file->ucontext->xrcd_list);
mutex_unlock(&file->mutex);
uobj->live = 1;
@@ -2598,7 +2598,7 @@ ssize_t ib_uverbs_close_xrc_domain(struct ib_uverbs_file *file,
if (!ret) {
list_for_each_entry(t_uobj, &file->ucontext->srq_list, list) {
struct ib_srq *srq = t_uobj->object;
- if (srq->xrcd && srq->xrcd == uobj->object) {
+ if (srq->ext.xrc.xrcd && srq->ext.xrc.xrcd == uobj->object) {
ret = -EBUSY;
break;
}
@@ -2702,7 +2702,7 @@ ssize_t ib_uverbs_create_xrc_rcv_qp(struct ib_uverbs_file *file,
init_attr.sq_sig_type =
cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
init_attr.qp_type = IB_QPT_XRC;
- init_attr.xrc_domain = xrcd;
+ init_attr.xrcd = xrcd;
init_attr.cap.max_send_wr = 1;
init_attr.cap.max_recv_wr = 0;
diff --git a/sys/ofed/drivers/infiniband/core/uverbs_main.c b/sys/ofed/drivers/infiniband/core/uverbs_main.c
index 380abd3..a0eb4fe 100644
--- a/sys/ofed/drivers/infiniband/core/uverbs_main.c
+++ b/sys/ofed/drivers/infiniband/core/uverbs_main.c
@@ -110,8 +110,8 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
[IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
[IB_USER_VERBS_CMD_CREATE_XRC_SRQ] = ib_uverbs_create_xrc_srq,
- [IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN] = ib_uverbs_open_xrc_domain,
- [IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN] = ib_uverbs_close_xrc_domain,
+ [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrc_domain,
+ [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrc_domain,
[IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP] = ib_uverbs_create_xrc_rcv_qp,
[IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP] = ib_uverbs_modify_xrc_rcv_qp,
[IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP] = ib_uverbs_query_xrc_rcv_qp,
@@ -258,7 +258,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
}
mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
- list_for_each_entry_safe(uobj, tmp, &context->xrc_domain_list, list) {
+ list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
struct ib_xrcd *xrcd = uobj->object;
struct ib_uxrc_rcv_object *xrc_qp_obj, *tmp1;
struct ib_uxrcd_object *xrcd_uobj =
@@ -629,8 +629,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
if (hdr.in_words * 4 != count)
return -EINVAL;
- if (hdr.command < 0 ||
- hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
+ if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
!uverbs_cmd_table[hdr.command] ||
!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
return -EINVAL;
diff --git a/sys/ofed/drivers/infiniband/core/verbs.c b/sys/ofed/drivers/infiniband/core/verbs.c
index 90bdeaa..023564f 100644
--- a/sys/ofed/drivers/infiniband/core/verbs.c
+++ b/sys/ofed/drivers/infiniband/core/verbs.c
@@ -250,8 +250,8 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
srq->uobject = NULL;
srq->event_handler = srq_init_attr->event_handler;
srq->srq_context = srq_init_attr->srq_context;
- srq->xrc_cq = NULL;
- srq->xrcd = NULL;
+ srq->ext.xrc.cq = NULL;
+ srq->ext.xrc.xrcd = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&srq->usecnt, 0);
}
@@ -278,8 +278,8 @@ struct ib_srq *ib_create_xrc_srq(struct ib_pd *pd,
srq->uobject = NULL;
srq->event_handler = srq_init_attr->event_handler;
srq->srq_context = srq_init_attr->srq_context;
- srq->xrc_cq = xrc_cq;
- srq->xrcd = xrcd;
+ srq->ext.xrc.cq = xrc_cq;
+ srq->ext.xrc.xrcd = xrcd;
atomic_inc(&pd->usecnt);
atomic_inc(&xrcd->usecnt);
atomic_inc(&xrc_cq->usecnt);
@@ -319,8 +319,8 @@ int ib_destroy_srq(struct ib_srq *srq)
return -EBUSY;
pd = srq->pd;
- xrc_cq = srq->xrc_cq;
- xrcd = srq->xrcd;
+ xrc_cq = srq->ext.xrc.cq;
+ xrcd = srq->ext.xrc.xrcd;
ret = srq->device->destroy_srq(srq);
if (!ret) {
@@ -355,7 +355,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
qp->qp_context = qp_init_attr->qp_context;
qp->qp_type = qp_init_attr->qp_type;
qp->xrcd = qp->qp_type == IB_QPT_XRC ?
- qp_init_attr->xrc_domain : NULL;
+ qp_init_attr->xrcd : NULL;
atomic_inc(&pd->usecnt);
atomic_inc(&qp_init_attr->send_cq->usecnt);
atomic_inc(&qp_init_attr->recv_cq->usecnt);
@@ -371,8 +371,8 @@ EXPORT_SYMBOL(ib_create_qp);
static const struct {
int valid;
- enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETH + 1];
- enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETH + 1];
+ enum ib_qp_attr_mask req_param[IB_QPT_RAW_PACKET + 1];
+ enum ib_qp_attr_mask opt_param[IB_QPT_RAW_PACKET + 1];
} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = {
[IB_QPS_RESET] = { .valid = 1 },
@@ -382,7 +382,7 @@ static const struct {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_QKEY),
- [IB_QPT_RAW_ETH] = IB_QP_PORT,
+ [IB_QPT_RAW_PACKET] = IB_QP_PORT,
[IB_QPT_UC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
@@ -1005,7 +1005,7 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
switch (rdma_node_get_transport(qp->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (qp->qp_type == IB_QPT_RAW_ETH) {
+ if (qp->qp_type == IB_QPT_RAW_PACKET) {
/* In raw Etherent mgids the 63 msb's should be 0 */
if (gid->global.subnet_prefix & cpu_to_be64(~1ULL))
return -EINVAL;
@@ -1013,7 +1013,7 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
return -EINVAL;
break;
case RDMA_TRANSPORT_IWARP:
- if (qp->qp_type != IB_QPT_RAW_ETH)
+ if (qp->qp_type != IB_QPT_RAW_PACKET)
return -EINVAL;
break;
}
@@ -1028,7 +1028,7 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
switch (rdma_node_get_transport(qp->device->node_type)) {
case RDMA_TRANSPORT_IB:
- if (qp->qp_type == IB_QPT_RAW_ETH) {
+ if (qp->qp_type == IB_QPT_RAW_PACKET) {
/* In raw Etherent mgids the 63 msb's should be 0 */
if (gid->global.subnet_prefix & cpu_to_be64(~1ULL))
return -EINVAL;
@@ -1036,7 +1036,7 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
return -EINVAL;
break;
case RDMA_TRANSPORT_IWARP:
- if (qp->qp_type != IB_QPT_RAW_ETH)
+ if (qp->qp_type != IB_QPT_RAW_PACKET)
return -EINVAL;
break;
}
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/Kconfig b/sys/ofed/drivers/infiniband/hw/mlx4/Kconfig
index 4175a4b..24ab11a 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/Kconfig
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/Kconfig
@@ -1,5 +1,7 @@
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
+ depends on NETDEVICES && ETHERNET && PCI
+ select NET_VENDOR_MELLANOX
select MLX4_CORE
---help---
This driver provides low-level InfiniBand support for
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/Makefile b/sys/ofed/drivers/infiniband/hw/mlx4/Makefile
index ce885a8..cbfa7a4 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/Makefile
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/Makefile
@@ -1,4 +1,31 @@
-obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
+# $FreeBSD$
+#.PATH: ${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4
+#.PATH: ${.CURDIR}/../../../../include/linux
-mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o
-mlx4_ib-y += wc.o
+.include <bsd.own.mk>
+
+KMOD = mlx4ib
+SRCS = device_if.h bus_if.h pci_if.h vnode_if.h
+#SRCS+= linux_compat.c linux_radix.c
+SRCS+= ah.c cq.c doorbell.c mad.c main.c mr.c qp.c srq.c wc.c
+SRCS+= opt_inet.h opt_inet6.h
+
+#CFLAGS+= -I${.CURDIR}/../../ofed/include/
+CFLAGS+= -I${.CURDIR}/../../../../include
+CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM
+
+.if !defined(KERNBUILDDIR)
+.if ${MK_INET_SUPPORT} != "no"
+opt_inet.h:
+ @echo "#define INET 1" > ${.TARGET}
+.endif
+
+.if ${MK_INET6_SUPPORT} != "no"
+opt_inet6.h:
+ @echo "#define INET6 1" > ${.TARGET}
+.endif
+.endif
+
+.include <bsd.kmod.mk>
+
+CFLAGS+= -Wno-cast-qual -Wno-pointer-arith -fms-extensions
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/ah.c b/sys/ofed/drivers/infiniband/hw/mlx4/ah.c
index 26251b47..47c9aa0 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/ah.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/ah.c
@@ -30,25 +30,25 @@
* SOFTWARE.
*/
-#include "mlx4_ib.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+
+#include <linux/slab.h>
#include <linux/inet.h>
#include <linux/string.h>
-#include <rdma/ib_cache.h>
+
+#include "mlx4_ib.h"
int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
u8 *mac, int *is_mcast, u8 port)
{
- struct mlx4_ib_iboe *iboe = &dev->iboe;
struct in6_addr in6;
*is_mcast = 0;
- spin_lock(&iboe->lock);
- if (!iboe->netdevs[port - 1]) {
- spin_unlock(&iboe->lock);
- return -EINVAL;
- }
- spin_unlock(&iboe->lock);
memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6);
if (rdma_link_local_addr(&in6))
@@ -92,15 +92,15 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
}
static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
- struct mlx4_ib_ah *ah)
+ struct mlx4_ib_ah *ah)
{
struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
struct mlx4_dev *dev = ibdev->dev;
+ union ib_gid sgid;
u8 mac[6];
int err;
int is_mcast;
u16 vlan_tag;
- union ib_gid sgid;
err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
if (err)
@@ -130,7 +130,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
ah->av.ib.dlid = cpu_to_be16(0xc000);
memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
- ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+ ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29);
return &ah->ibah;
}
@@ -147,25 +147,24 @@ struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) {
if (!(ah_attr->ah_flags & IB_AH_GRH)) {
ret = ERR_PTR(-EINVAL);
- goto out;
} else {
- /* TBD: need to handle the case when we get called
- in an atomic context and there we might sleep. We
- don't expect this currently since we're working with
- link local addresses which we can translate without
- going to sleep */
+ /*
+ * TBD: need to handle the case when we get
+ * called in an atomic context and there we
+ * might sleep. We don't expect this
+ * currently since we're working with link
+ * local addresses which we can translate
+ * without going to sleep.
+ */
ret = create_iboe_ah(pd, ah_attr, ah);
- if (IS_ERR(ret))
- goto out;
- else
- return ret;
}
+
+ if (IS_ERR(ret))
+ kfree(ah);
+
+ return ret;
} else
return create_ib_ah(pd, ah_attr, ah); /* never fails */
-
-out:
- kfree(ah);
- return ret;
}
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
@@ -202,4 +201,3 @@ int mlx4_ib_destroy_ah(struct ib_ah *ah)
kfree(to_mah(ah));
return 0;
}
-
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c b/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c
new file mode 100644
index 0000000..ae7b558
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -0,0 +1,688 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+ /***********************************************************/
+/*This file support the handling of the Alias GUID feature. */
+/***********************************************************/
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_pack.h>
+#include <linux/mlx4/cmd.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/delay.h>
+#include "mlx4_ib.h"
+
+/*
+The driver keeps the current state of all guids, as they are in the HW.
+Whenever we receive an smp mad GUIDInfo record, the data will be cached.
+*/
+
+struct mlx4_alias_guid_work_context {
+ u8 port;
+ struct mlx4_ib_dev *dev ;
+ struct ib_sa_query *sa_query;
+ struct completion done;
+ int query_id;
+ struct list_head list;
+ int block_num;
+};
+
+struct mlx4_next_alias_guid_work {
+ u8 port;
+ u8 block_num;
+ struct mlx4_sriov_alias_guid_info_rec_det rec_det;
+};
+
+
+void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
+ u8 port_num, u8 *p_data)
+{
+ int i;
+ u64 guid_indexes;
+ int slave_id;
+ int port_index = port_num - 1;
+
+ if (!mlx4_is_master(dev->dev))
+ return;
+
+ guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
+ ports_guid[port_num - 1].
+ all_rec_per_port[block_num].guid_indexes);
+ pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
+
+ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ /* The location of the specific index starts from bit number 4
+ * until bit num 11 */
+ if (test_bit(i + 4, (unsigned long *)&guid_indexes)) {
+ slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
+ if (slave_id >= dev->dev->num_slaves) {
+ pr_debug("The last slave: %d\n", slave_id);
+ return;
+ }
+
+ /* cache the guid: */
+ memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id],
+ &p_data[i * GUID_REC_SIZE],
+ GUID_REC_SIZE);
+ } else
+ pr_debug("Guid number: %d in block: %d"
+ " was not updated\n", i, block_num);
+ }
+}
+
+static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index)
+{
+ if (index >= NUM_ALIAS_GUID_PER_PORT) {
+ pr_err("%s: ERROR: asked for index:%d\n", __func__, index);
+ return (__force __be64) -1;
+ }
+ return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index];
+}
+
+
+ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
+{
+ return IB_SA_COMP_MASK(4 + index);
+}
+
+/*
+ * Whenever new GUID is set/unset (guid table change) create event and
+ * notify the relevant slave (master also should be notified).
+ * If the GUID value is not as we have in the cache the slave will not be
+ * updated; in this case it waits for the smp_snoop or the port management
+ * event to call the function and to update the slave.
+ * block_number - the index of the block (16 blocks available)
+ * port_number - 1 or 2
+ */
+void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
+ int block_num, u8 port_num,
+ u8 *p_data)
+{
+ int i;
+ u64 guid_indexes;
+ int slave_id;
+ enum slave_port_state new_state;
+ enum slave_port_state prev_state;
+ __be64 tmp_cur_ag, form_cache_ag;
+ enum slave_port_gen_event gen_event;
+
+ if (!mlx4_is_master(dev->dev))
+ return;
+
+ guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
+ ports_guid[port_num - 1].
+ all_rec_per_port[block_num].guid_indexes);
+ pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
+
+ /*calculate the slaves and notify them*/
+ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ /* the location of the specific index runs from bits 4..11 */
+ if (!(test_bit(i + 4, (unsigned long *)&guid_indexes)))
+ continue;
+
+ slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
+ if (slave_id >= dev->dev->num_slaves)
+ return;
+ tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
+ form_cache_ag = get_cached_alias_guid(dev, port_num,
+ (NUM_ALIAS_GUID_IN_REC * block_num) + i);
+ /*
+ * Check if guid is not the same as in the cache,
+ * If it is different, wait for the snoop_smp or the port mgmt
+ * change event to update the slave on its port state change
+ */
+ if (tmp_cur_ag != form_cache_ag)
+ continue;
+ mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
+
+ /*2 cases: Valid GUID, and Invalid Guid*/
+
+ if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
+ prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num);
+ new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
+ MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
+ &gen_event);
+ pr_debug("slave: %d, port: %d prev_port_state: %d,"
+ " new_port_state: %d, gen_event: %d\n",
+ slave_id, port_num, prev_state, new_state, gen_event);
+ if (gen_event == SLAVE_PORT_GEN_EVENT_UP) {
+ pr_debug("sending PORT_UP event to slave: %d, port: %d\n",
+ slave_id, port_num);
+ mlx4_gen_port_state_change_eqe(dev->dev, slave_id,
+ port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE);
+ }
+ } else { /* request to invalidate GUID */
+ set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
+ MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
+ &gen_event);
+ pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
+ slave_id, port_num);
+ mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num,
+ MLX4_PORT_CHANGE_SUBTYPE_DOWN);
+ }
+ }
+}
+
+static void aliasguid_query_handler(int status,
+ struct ib_sa_guidinfo_rec *guid_rec,
+ void *context)
+{
+ struct mlx4_ib_dev *dev;
+ struct mlx4_alias_guid_work_context *cb_ctx = context;
+ u8 port_index ;
+ int i;
+ struct mlx4_sriov_alias_guid_info_rec_det *rec;
+ unsigned long flags, flags1;
+
+ if (!context)
+ return;
+
+ dev = cb_ctx->dev;
+ port_index = cb_ctx->port - 1;
+ rec = &dev->sriov.alias_guid.ports_guid[port_index].
+ all_rec_per_port[cb_ctx->block_num];
+
+ if (status) {
+ rec->status = MLX4_GUID_INFO_STATUS_IDLE;
+ pr_debug("(port: %d) failed: status = %d\n",
+ cb_ctx->port, status);
+ goto out;
+ }
+
+ if (guid_rec->block_num != cb_ctx->block_num) {
+ pr_err("block num mismatch: %d != %d\n",
+ cb_ctx->block_num, guid_rec->block_num);
+ goto out;
+ }
+
+ pr_debug("lid/port: %d/%d, block_num: %d\n",
+ be16_to_cpu(guid_rec->lid), cb_ctx->port,
+ guid_rec->block_num);
+
+ rec = &dev->sriov.alias_guid.ports_guid[port_index].
+ all_rec_per_port[guid_rec->block_num];
+
+ rec->status = MLX4_GUID_INFO_STATUS_SET;
+ rec->method = MLX4_GUID_INFO_RECORD_SET;
+
+ for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ __be64 tmp_cur_ag;
+ tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
+ /* check if the SM didn't assign one of the records.
+ * if it didn't, if it was not sysadmin request:
+ * ask the SM to give a new GUID, (instead of the driver request).
+ */
+ if (tmp_cur_ag == MLX4_NOT_SET_GUID) {
+ mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in "
+ "block_num: %d was declined by SM, "
+ "ownership by %d (0 = driver, 1=sysAdmin,"
+ " 2=None)\n", __func__, i,
+ guid_rec->block_num, rec->ownership);
+ if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) {
+ /* if it is driver assign, asks for new GUID from SM*/
+ *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
+ MLX4_NOT_SET_GUID;
+
+ /* Mark the record as not assigned, and let it
+ * be sent again in the next work sched.*/
+ rec->status = MLX4_GUID_INFO_STATUS_IDLE;
+ rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
+ }
+ } else {
+ /* properly assigned record. */
+ /* We save the GUID we just got from the SM in the
+ * admin_guid in order to be persistent, and in the
+ * request from the sm the process will ask for the same GUID */
+ if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN &&
+ tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) {
+ /* the sysadmin assignment failed.*/
+ mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
+ " admin guid after SysAdmin "
+ "configuration. "
+ "Record num %d in block_num:%d "
+ "was declined by SM, "
+ "new val(0x%llx) was kept\n",
+ __func__, i,
+ guid_rec->block_num,
+ (long long)be64_to_cpu(*(__be64 *) &
+ rec->all_recs[i * GUID_REC_SIZE]));
+ } else {
+ memcpy(&rec->all_recs[i * GUID_REC_SIZE],
+ &guid_rec->guid_info_list[i * GUID_REC_SIZE],
+ GUID_REC_SIZE);
+ }
+ }
+ }
+ /*
+ The func is call here to close the cases when the
+ sm doesn't send smp, so in the sa response the driver
+ notifies the slave.
+ */
+ mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num,
+ cb_ctx->port,
+ guid_rec->guid_info_list);
+out:
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ if (!dev->sriov.is_going_down)
+ queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
+ &dev->sriov.alias_guid.ports_guid[port_index].
+ alias_guid_work, 0);
+ if (cb_ctx->sa_query) {
+ list_del(&cb_ctx->list);
+ kfree(cb_ctx);
+ } else
+ complete(&cb_ctx->done);
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
+static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
+{
+ int i;
+ u64 cur_admin_val;
+ ib_sa_comp_mask comp_mask = 0;
+
+ dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
+ = MLX4_GUID_INFO_STATUS_IDLE;
+ dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method
+ = MLX4_GUID_INFO_RECORD_SET;
+
+ /* calculate the comp_mask for that record.*/
+ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ cur_admin_val =
+ *(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
+ all_rec_per_port[index].all_recs[GUID_REC_SIZE * i];
+ /*
+ check the admin value: if it's for delete (~00LL) or
+ it is the first guid of the first record (hw guid) or
+ the records is not in ownership of the sysadmin and the sm doesn't
+ need to assign GUIDs, then don't put it up for assignment.
+ */
+ if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
+ (!index && !i) ||
+ MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid.
+ ports_guid[port - 1].all_rec_per_port[index].ownership)
+ continue;
+ comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
+ }
+ dev->sriov.alias_guid.ports_guid[port - 1].
+ all_rec_per_port[index].guid_indexes = comp_mask;
+}
+
+static int set_guid_rec(struct ib_device *ibdev,
+ u8 port, int index,
+ struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
+{
+ int err;
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct ib_sa_guidinfo_rec guid_info_rec;
+ ib_sa_comp_mask comp_mask;
+ struct ib_port_attr attr;
+ struct mlx4_alias_guid_work_context *callback_context;
+ unsigned long resched_delay, flags, flags1;
+ struct list_head *head =
+ &dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
+
+ err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
+ if (err) {
+ pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
+ err, port);
+ return err;
+ }
+ /*check the port was configured by the sm, otherwise no need to send */
+ if (attr.state != IB_PORT_ACTIVE) {
+ pr_debug("port %d not active...rescheduling\n", port);
+ resched_delay = 5 * HZ;
+ err = -EAGAIN;
+ goto new_schedule;
+ }
+
+ callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL);
+ if (!callback_context) {
+ err = -ENOMEM;
+ resched_delay = HZ * 5;
+ goto new_schedule;
+ }
+ callback_context->port = port;
+ callback_context->dev = dev;
+ callback_context->block_num = index;
+
+ memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
+
+ guid_info_rec.lid = cpu_to_be16(attr.lid);
+ guid_info_rec.block_num = index;
+
+ memcpy(guid_info_rec.guid_info_list, rec_det->all_recs,
+ GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC);
+ comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM |
+ rec_det->guid_indexes;
+
+ init_completion(&callback_context->done);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ list_add_tail(&callback_context->list, head);
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+
+ callback_context->query_id =
+ ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
+ ibdev, port, &guid_info_rec,
+ comp_mask, rec_det->method, 1000,
+ GFP_KERNEL, aliasguid_query_handler,
+ callback_context,
+ &callback_context->sa_query);
+ if (callback_context->query_id < 0) {
+ pr_debug("ib_sa_guid_info_rec_query failed, query_id: "
+ "%d. will reschedule to the next 1 sec.\n",
+ callback_context->query_id);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ list_del(&callback_context->list);
+ kfree(callback_context);
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ resched_delay = 1 * HZ;
+ err = -EAGAIN;
+ goto new_schedule;
+ }
+ err = 0;
+ goto out;
+
+new_schedule:
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ invalidate_guid_record(dev, port, index);
+ if (!dev->sriov.is_going_down) {
+ queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
+ &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
+ resched_delay);
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+
+out:
+ return err;
+}
+
+void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
+{
+ int i;
+ unsigned long flags, flags1;
+
+ pr_debug("port %d\n", port);
+
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
+ invalidate_guid_record(dev, port, i);
+
+ if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) {
+ /*
+ make sure no work waits in the queue, if the work is already
+ queued(not on the timer) the cancel will fail. That is not a problem
+ because we just want the work started.
+ */
+ cancel_delayed_work(&dev->sriov.alias_guid.
+ ports_guid[port - 1].alias_guid_work);
+ queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
+ &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
+ 0);
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
+/* The function returns the next record that was
+ * not configured (or failed to be configured) */
+static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
+ struct mlx4_next_alias_guid_work *rec)
+{
+ int j;
+ unsigned long flags;
+
+ for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
+ if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status ==
+ MLX4_GUID_INFO_STATUS_IDLE) {
+ memcpy(&rec->rec_det,
+ &dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j],
+ sizeof (struct mlx4_sriov_alias_guid_info_rec_det));
+ rec->port = port;
+ rec->block_num = j;
+ dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status =
+ MLX4_GUID_INFO_STATUS_PENDING;
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
+ return 0;
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
+ }
+ return -ENOENT;
+}
+
+static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port,
+ int rec_index,
+ struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
+{
+ dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes =
+ rec_det->guid_indexes;
+ memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs,
+ rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
+ dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status =
+ rec_det->status;
+}
+
+static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port)
+{
+ int j;
+ struct mlx4_sriov_alias_guid_info_rec_det rec_det ;
+
+ for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) {
+ memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
+ rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) |
+ IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 |
+ IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 |
+ IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 |
+ IB_SA_GUIDINFO_REC_GID7;
+ rec_det.status = MLX4_GUID_INFO_STATUS_IDLE;
+ set_administratively_guid_record(dev, port, j, &rec_det);
+ }
+}
+
+static void alias_guid_work(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ int ret = 0;
+ struct mlx4_next_alias_guid_work *rec;
+ struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port =
+ container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det,
+ alias_guid_work);
+ struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent;
+ struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid,
+ struct mlx4_ib_sriov,
+ alias_guid);
+ struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov);
+
+ rec = kzalloc(sizeof *rec, GFP_KERNEL);
+ if (!rec) {
+ pr_err("alias_guid_work: No Memory\n");
+ return;
+ }
+
+ pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1);
+ ret = get_next_record_to_update(dev, sriov_alias_port->port, rec);
+ if (ret) {
+ pr_debug("No more records to update.\n");
+ goto out;
+ }
+
+ set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num,
+ &rec->rec_det);
+
+out:
+ kfree(rec);
+}
+
+
+void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
+{
+ unsigned long flags, flags1;
+
+ if (!mlx4_is_master(dev->dev))
+ return;
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ if (!dev->sriov.is_going_down) {
+ queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
+ &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
+void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev)
+{
+ int i;
+ struct mlx4_ib_sriov *sriov = &dev->sriov;
+ struct mlx4_alias_guid_work_context *cb_ctx;
+ struct mlx4_sriov_alias_guid_port_rec_det *det;
+ struct ib_sa_query *sa_query;
+ unsigned long flags;
+
+ for (i = 0 ; i < dev->num_ports; i++) {
+ cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work);
+ det = &sriov->alias_guid.ports_guid[i];
+ spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
+ while (!list_empty(&det->cb_list)) {
+ cb_ctx = list_entry(det->cb_list.next,
+ struct mlx4_alias_guid_work_context,
+ list);
+ sa_query = cb_ctx->sa_query;
+ cb_ctx->sa_query = NULL;
+ list_del(&cb_ctx->list);
+ spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
+ ib_sa_cancel_query(cb_ctx->query_id, sa_query);
+ wait_for_completion(&cb_ctx->done);
+ kfree(cb_ctx);
+ spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
+ }
+ spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
+ }
+ for (i = 0 ; i < dev->num_ports; i++) {
+ flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
+ destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
+ }
+ ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
+ kfree(dev->sriov.alias_guid.sa_client);
+}
+
+int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
+{
+ char alias_wq_name[15];
+ int ret = 0;
+ int i, j, k;
+ union ib_gid gid;
+
+ if (!mlx4_is_master(dev->dev))
+ return 0;
+ dev->sriov.alias_guid.sa_client =
+ kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL);
+ if (!dev->sriov.alias_guid.sa_client)
+ return -ENOMEM;
+
+ ib_sa_register_client(dev->sriov.alias_guid.sa_client);
+
+ spin_lock_init(&dev->sriov.alias_guid.ag_work_lock);
+
+ for (i = 1; i <= dev->num_ports; ++i) {
+ if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) {
+ ret = -EFAULT;
+ goto err_unregister;
+ }
+ }
+
+ for (i = 0 ; i < dev->num_ports; i++) {
+ memset(&dev->sriov.alias_guid.ports_guid[i], 0,
+ sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
+ /*Check if the SM doesn't need to assign the GUIDs*/
+ for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
+ if (mlx4_ib_sm_guid_assign) {
+ dev->sriov.alias_guid.ports_guid[i].
+ all_rec_per_port[j].
+ ownership = MLX4_GUID_DRIVER_ASSIGN;
+ continue;
+ }
+ dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j].
+ ownership = MLX4_GUID_NONE_ASSIGN;
+ /*mark each val as it was deleted,
+ till the sysAdmin will give it valid val*/
+ for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
+ *(__be64 *)&dev->sriov.alias_guid.ports_guid[i].
+ all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] =
+ cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
+ }
+ }
+ INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
+ /*prepare the records, set them to be allocated by sm*/
+ for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
+ invalidate_guid_record(dev, i + 1, j);
+
+ dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
+ dev->sriov.alias_guid.ports_guid[i].port = i;
+ if (mlx4_ib_sm_guid_assign)
+ set_all_slaves_guids(dev, i);
+
+ snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
+ dev->sriov.alias_guid.ports_guid[i].wq =
+ create_singlethread_workqueue(alias_wq_name);
+ if (!dev->sriov.alias_guid.ports_guid[i].wq) {
+ ret = -ENOMEM;
+ goto err_thread;
+ }
+ INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work,
+ alias_guid_work);
+ }
+ return 0;
+
+err_thread:
+ for (--i; i >= 0; i--) {
+ destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
+ dev->sriov.alias_guid.ports_guid[i].wq = NULL;
+ }
+
+err_unregister:
+ ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
+ kfree(dev->sriov.alias_guid.sa_client);
+ dev->sriov.alias_guid.sa_client = NULL;
+ pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret);
+ return ret;
+}
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/cm.c b/sys/ofed/drivers/infiniband/hw/mlx4/cm.c
new file mode 100644
index 0000000..3745367
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/cm.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_mad.h>
+
+#include <linux/mlx4/cmd.h>
+#include <linux/idr.h>
+#include <rdma/ib_cm.h>
+
+#include "mlx4_ib.h"
+
+#define CM_CLEANUP_CACHE_TIMEOUT (5 * HZ)
+
+struct id_map_entry {
+ struct rb_node node;
+
+ u32 sl_cm_id;
+ u32 pv_cm_id;
+ int slave_id;
+ int scheduled_delete;
+ struct mlx4_ib_dev *dev;
+
+ struct list_head list;
+ struct delayed_work timeout;
+};
+
+struct cm_generic_msg {
+ struct ib_mad_hdr hdr;
+
+ __be32 local_comm_id;
+ __be32 remote_comm_id;
+};
+
+struct cm_req_msg {
+ unsigned char unused[0x60];
+ union ib_gid primary_path_sgid;
+};
+
+
+static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
+{
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ msg->local_comm_id = cpu_to_be32(cm_id);
+}
+
+static u32 get_local_comm_id(struct ib_mad *mad)
+{
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+
+ return be32_to_cpu(msg->local_comm_id);
+}
+
+static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id)
+{
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ msg->remote_comm_id = cpu_to_be32(cm_id);
+}
+
+static u32 get_remote_comm_id(struct ib_mad *mad)
+{
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+
+ return be32_to_cpu(msg->remote_comm_id);
+}
+
+static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad)
+{
+ struct cm_req_msg *msg = (struct cm_req_msg *)mad;
+
+ return msg->primary_path_sgid;
+}
+
+/* Lock should be taken before called */
+static struct id_map_entry *
+id_map_find_by_sl_id(struct ib_device *ibdev, u32 slave_id, u32 sl_cm_id)
+{
+ struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
+ struct rb_node *node = sl_id_map->rb_node;
+
+ while (node) {
+ struct id_map_entry *id_map_entry =
+ rb_entry(node, struct id_map_entry, node);
+
+ if (id_map_entry->sl_cm_id > sl_cm_id)
+ node = node->rb_left;
+ else if (id_map_entry->sl_cm_id < sl_cm_id)
+ node = node->rb_right;
+ else if (id_map_entry->slave_id > slave_id)
+ node = node->rb_left;
+ else if (id_map_entry->slave_id < slave_id)
+ node = node->rb_right;
+ else
+ return id_map_entry;
+ }
+ return NULL;
+}
+
+static void id_map_ent_timeout(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ struct id_map_entry *ent = container_of(delay, struct id_map_entry, timeout);
+ struct id_map_entry *db_ent, *found_ent;
+ struct mlx4_ib_dev *dev = ent->dev;
+ struct mlx4_ib_sriov *sriov = &dev->sriov;
+ struct rb_root *sl_id_map = &sriov->sl_id_map;
+ int pv_id = (int) ent->pv_cm_id;
+
+ spin_lock(&sriov->id_map_lock);
+ db_ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_id);
+ if (!db_ent)
+ goto out;
+ found_ent = id_map_find_by_sl_id(&dev->ib_dev, ent->slave_id, ent->sl_cm_id);
+ if (found_ent && found_ent == ent)
+ rb_erase(&found_ent->node, sl_id_map);
+ idr_remove(&sriov->pv_id_table, pv_id);
+
+out:
+ list_del(&ent->list);
+ spin_unlock(&sriov->id_map_lock);
+ kfree(ent);
+}
+
+static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id)
+{
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+ struct rb_root *sl_id_map = &sriov->sl_id_map;
+ struct id_map_entry *ent, *found_ent;
+
+ spin_lock(&sriov->id_map_lock);
+ ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_cm_id);
+ if (!ent)
+ goto out;
+ found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id);
+ if (found_ent && found_ent == ent)
+ rb_erase(&found_ent->node, sl_id_map);
+ idr_remove(&sriov->pv_id_table, pv_cm_id);
+out:
+ spin_unlock(&sriov->id_map_lock);
+}
+
+static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
+{
+ struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
+ struct rb_node **link = &sl_id_map->rb_node, *parent = NULL;
+ struct id_map_entry *ent;
+ int slave_id = new->slave_id;
+ int sl_cm_id = new->sl_cm_id;
+
+ ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id);
+ if (ent) {
+ pr_debug("overriding existing sl_id_map entry (cm_id = %x)\n",
+ sl_cm_id);
+
+ rb_replace_node(&ent->node, &new->node, sl_id_map);
+ return;
+ }
+
+ /* Go to the bottom of the tree */
+ while (*link) {
+ parent = *link;
+ ent = rb_entry(parent, struct id_map_entry, node);
+
+ if (ent->sl_cm_id > sl_cm_id || (ent->sl_cm_id == sl_cm_id && ent->slave_id > slave_id))
+ link = &(*link)->rb_left;
+ else
+ link = &(*link)->rb_right;
+ }
+
+ rb_link_node(&new->node, parent, link);
+ rb_insert_color(&new->node, sl_id_map);
+}
+
+static struct id_map_entry *
+id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
+{
+ int ret, id;
+ static int next_id;
+ struct id_map_entry *ent;
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+
+ ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL);
+ if (!ent) {
+ mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ent->sl_cm_id = sl_cm_id;
+ ent->slave_id = slave_id;
+ ent->scheduled_delete = 0;
+ ent->dev = to_mdev(ibdev);
+ INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout);
+
+ do {
+ spin_lock(&to_mdev(ibdev)->sriov.id_map_lock);
+ ret = idr_get_new_above(&sriov->pv_id_table, ent,
+ next_id, &id);
+ if (!ret) {
+ next_id = ((unsigned) id + 1) & MAX_IDR_MASK;
+ ent->pv_cm_id = (u32)id;
+ sl_id_map_add(ibdev, ent);
+ }
+
+ spin_unlock(&sriov->id_map_lock);
+ } while (ret == -EAGAIN && idr_pre_get(&sriov->pv_id_table, GFP_KERNEL));
+ /*the function idr_get_new_above can return -ENOSPC, so don't insert in that case.*/
+ if (!ret) {
+ spin_lock(&sriov->id_map_lock);
+ list_add_tail(&ent->list, &sriov->cm_list);
+ spin_unlock(&sriov->id_map_lock);
+ return ent;
+ }
+ /*error flow*/
+ kfree(ent);
+ mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret);
+ return ERR_PTR(-ENOMEM);
+}
+
+static struct id_map_entry *
+id_map_get(struct ib_device *ibdev, int *pv_cm_id, int sl_cm_id, int slave_id)
+{
+ struct id_map_entry *ent;
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+
+ spin_lock(&sriov->id_map_lock);
+ if (*pv_cm_id == -1) {
+ ent = id_map_find_by_sl_id(ibdev, sl_cm_id, slave_id);
+ if (ent)
+ *pv_cm_id = (int) ent->pv_cm_id;
+ } else
+ ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, *pv_cm_id);
+ spin_unlock(&sriov->id_map_lock);
+
+ return ent;
+}
+
+static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
+{
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+ unsigned long flags;
+
+ spin_lock(&sriov->id_map_lock);
+ spin_lock_irqsave(&sriov->going_down_lock, flags);
+ /*make sure that there is no schedule inside the scheduled work.*/
+ if (!sriov->is_going_down) {
+ id->scheduled_delete = 1;
+ schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ }
+ spin_unlock_irqrestore(&sriov->going_down_lock, flags);
+ spin_unlock(&sriov->id_map_lock);
+}
+
+int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
+ struct ib_mad *mad)
+{
+ struct id_map_entry *id;
+ u32 sl_cm_id;
+ int pv_cm_id = -1;
+
+ sl_cm_id = get_local_comm_id(mad);
+
+ if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_REP_ATTR_ID) {
+ id = id_map_alloc(ibdev, slave_id, sl_cm_id);
+ if (IS_ERR(id)) {
+ mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
+ __func__, slave_id, sl_cm_id);
+ return PTR_ERR(id);
+ }
+ } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) {
+ return 0;
+ } else {
+ id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
+ }
+
+ if (!id) {
+ pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n",
+ slave_id, sl_cm_id);
+ return -EINVAL;
+ }
+
+ set_local_comm_id(mad, id->pv_cm_id);
+
+ if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
+ schedule_delayed(ibdev, id);
+ else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID)
+ id_map_find_del(ibdev, pv_cm_id);
+
+ return 0;
+}
+
+int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
+ struct ib_mad *mad, int is_eth)
+{
+ u32 pv_cm_id;
+ struct id_map_entry *id;
+
+ if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) {
+ union ib_gid gid;
+
+ if (is_eth)
+ return 0;
+
+ gid = gid_from_req_msg(ibdev, mad);
+ *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
+ if (*slave < 0) {
+ mlx4_ib_warn(ibdev, "failed matching slave_id by gid (0x%llx)\n",
+ gid.global.interface_id);
+ return -ENOENT;
+ }
+ return 0;
+ }
+
+ pv_cm_id = get_remote_comm_id(mad);
+ id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1);
+
+ if (!id) {
+ pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id);
+ return -ENOENT;
+ }
+
+ if (!is_eth)
+ *slave = id->slave_id;
+ set_remote_comm_id(mad, id->sl_cm_id);
+
+ if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
+ schedule_delayed(ibdev, id);
+ else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) {
+ id_map_find_del(ibdev, (int) pv_cm_id);
+ }
+
+ return 0;
+}
+
+void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev)
+{
+ spin_lock_init(&dev->sriov.id_map_lock);
+ INIT_LIST_HEAD(&dev->sriov.cm_list);
+ dev->sriov.sl_id_map = RB_ROOT;
+ idr_init(&dev->sriov.pv_id_table);
+ idr_pre_get(&dev->sriov.pv_id_table, GFP_KERNEL);
+}
+
+/* slave = -1 ==> all slaves */
+/* TBD -- call paravirt clean for single slave. Need for slave RESET event */
+void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
+{
+ struct mlx4_ib_sriov *sriov = &dev->sriov;
+ struct rb_root *sl_id_map = &sriov->sl_id_map;
+ struct list_head lh;
+ struct rb_node *nd;
+ int need_flush = 1;
+ struct id_map_entry *map, *tmp_map;
+ /* cancel all delayed work queue entries */
+ INIT_LIST_HEAD(&lh);
+ spin_lock(&sriov->id_map_lock);
+ list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
+ if (slave < 0 || slave == map->slave_id) {
+ if (map->scheduled_delete)
+ need_flush &= !!cancel_delayed_work(&map->timeout);
+ }
+ }
+
+ spin_unlock(&sriov->id_map_lock);
+
+ if (!need_flush)
+ flush_scheduled_work(); /* make sure all timers were flushed */
+
+ /* now, remove all leftover entries from databases*/
+ spin_lock(&sriov->id_map_lock);
+ if (slave < 0) {
+ while (rb_first(sl_id_map)) {
+ struct id_map_entry *ent =
+ rb_entry(rb_first(sl_id_map),
+ struct id_map_entry, node);
+
+ rb_erase(&ent->node, sl_id_map);
+ idr_remove(&sriov->pv_id_table, (int) ent->pv_cm_id);
+ }
+ list_splice_init(&dev->sriov.cm_list, &lh);
+ } else {
+ /* first, move nodes belonging to slave to db remove list */
+ nd = rb_first(sl_id_map);
+ while (nd) {
+ struct id_map_entry *ent =
+ rb_entry(nd, struct id_map_entry, node);
+ nd = rb_next(nd);
+ if (ent->slave_id == slave)
+ list_move_tail(&ent->list, &lh);
+ }
+ /* remove those nodes from databases */
+ list_for_each_entry_safe(map, tmp_map, &lh, list) {
+ rb_erase(&map->node, sl_id_map);
+ idr_remove(&sriov->pv_id_table, (int) map->pv_cm_id);
+ }
+
+ /* add remaining nodes from cm_list */
+ list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
+ if (slave == map->slave_id)
+ list_move_tail(&map->list, &lh);
+ }
+ }
+
+ spin_unlock(&sriov->id_map_lock);
+
+ /* free any map entries left behind due to cancel_delayed_work above */
+ list_for_each_entry_safe(map, tmp_map, &lh, list) {
+ list_del(&map->list);
+ kfree(map);
+ }
+}
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/cq.c b/sys/ofed/drivers/infiniband/hw/mlx4/cq.c
index 31cd00d..293917a 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/cq.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/cq.c
@@ -33,13 +33,14 @@
#include <linux/mlx4/cq.h>
#include <linux/mlx4/qp.h>
-#include <linux/mlx4/srq.h>
+#include <linux/slab.h>
#include "mlx4_ib.h"
#include "user.h"
/* Which firmware version adds support for Resize CQ */
#define MLX4_FW_VER_RESIZE_CQ mlx4_fw_ver(2, 5, 0)
+#define MLX4_FW_VER_IGNORE_OVERRUN_CQ mlx4_fw_ver(2, 7, 8200)
static void mlx4_ib_cq_comp(struct mlx4_cq *cq)
{
@@ -53,7 +54,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
struct ib_cq *ibcq;
if (type != MLX4_EVENT_TYPE_CQ_ERROR) {
- printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
+ pr_warn("Unexpected event type %d "
"on CQ %06x\n", type, cq->cqn);
return;
}
@@ -69,7 +70,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
{
- return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe));
+ return mlx4_buf_offset(&buf->buf, n * buf->entry_size);
}
static void *get_cqe(struct mlx4_ib_cq *cq, int n)
@@ -80,8 +81,9 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n)
static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
{
struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
+ struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe);
- return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
+ return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
!!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
}
@@ -102,12 +104,13 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
{
int err;
- err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe),
+ err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
PAGE_SIZE * 2, &buf->buf);
if (err)
goto out;
+ buf->entry_size = dev->dev->caps.cqe_size;
err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift,
&buf->mtt);
if (err)
@@ -123,8 +126,7 @@ err_mtt:
mlx4_mtt_cleanup(dev->dev, &buf->mtt);
err_buf:
- mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe),
- &buf->buf);
+ mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf);
out:
return err;
@@ -132,7 +134,7 @@ out:
static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe)
{
- mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf);
+ mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
}
static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
@@ -140,14 +142,19 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
u64 buf_addr, int cqe)
{
int err;
+ int cqe_size = dev->dev->caps.cqe_size;
+ int shift;
+ int n;
- *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe),
+ *umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
- err = mlx4_mtt_init(dev->dev, ib_umem_page_count(*umem),
- ilog2((*umem)->page_size), &buf->mtt);
+ n = ib_umem_page_count(*umem);
+ shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n);
+ err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
+
if (err)
goto err_buf;
@@ -175,12 +182,10 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
struct mlx4_uar *uar;
int err;
- if (entries < 1 || entries > dev->dev->caps.max_cqes) {
- mlx4_ib_dbg("invalid num of entries: %d", entries);
+ if (entries < 1 || entries > dev->dev->caps.max_cqes)
return ERR_PTR(-EINVAL);
- }
- cq = kzalloc(sizeof *cq, GFP_KERNEL);
+ cq = kmalloc(sizeof *cq, GFP_KERNEL);
if (!cq)
return ERR_PTR(-ENOMEM);
@@ -227,10 +232,11 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
uar = &dev->priv_uar;
}
+ if (dev->eq_table)
+ vector = dev->eq_table[vector % ibdev->num_comp_vectors];
+
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
- cq->db.dma, &cq->mcq,
- vector == IB_CQ_VECTOR_LEAST_ATTACHED ?
- MLX4_LEAST_ATTACHED_VECTOR : vector, 0);
+ cq->db.dma, &cq->mcq, vector, 0, 0);
if (err)
goto err_dbmap;
@@ -335,16 +341,23 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
{
struct mlx4_cqe *cqe, *new_cqe;
int i;
+ int cqe_size = cq->buf.entry_size;
+ int cqe_inc = cqe_size == 64 ? 1 : 0;
i = cq->mcq.cons_index;
cqe = get_cqe(cq, i & cq->ibcq.cqe);
+ cqe += cqe_inc;
+
while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
(i + 1) & cq->resize_buf->cqe);
- memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
+ memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size);
+ new_cqe += cqe_inc;
+
new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
(((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
+ cqe += cqe_inc;
}
++cq->mcq.cons_index;
}
@@ -409,7 +422,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
} else {
struct mlx4_ib_cq_buf tmp_buf;
int tmp_cqe = 0;
-
+
spin_lock_irq(&cq->lock);
if (cq->resize_buf) {
mlx4_ib_cq_resize_copy_cqes(cq);
@@ -445,9 +458,21 @@ err_buf:
out:
mutex_unlock(&cq->resize_mutex);
+
return err;
}
+int mlx4_ib_ignore_overrun_cq(struct ib_cq *ibcq)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibcq->device);
+ struct mlx4_ib_cq *cq = to_mcq(ibcq);
+
+ if (dev->dev->caps.fw_ver < MLX4_FW_VER_IGNORE_OVERRUN_CQ)
+ return -ENOSYS;
+
+ return mlx4_cq_ignore_overrun(dev->dev, &cq->mcq);
+}
+
int mlx4_ib_destroy_cq(struct ib_cq *cq)
{
struct mlx4_ib_dev *dev = to_mdev(cq->device);
@@ -473,7 +498,7 @@ static void dump_cqe(void *cqe)
{
__be32 *buf = cqe;
- printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ pr_debug("CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]),
be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]),
be32_to_cpu(buf[6]), be32_to_cpu(buf[7]));
@@ -483,7 +508,7 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
struct ib_wc *wc)
{
if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) {
- printk(KERN_DEBUG "local QP operation err "
+ pr_debug("local QP operation err "
"(QPN %06x, WQE index %x, vendor syndrome %02x, "
"opcode = %02x)\n",
be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index),
@@ -554,6 +579,26 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
checksum == cpu_to_be16(0xffff);
}
+static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
+ unsigned tail, struct mlx4_cqe *cqe)
+{
+ struct mlx4_ib_proxy_sqp_hdr *hdr;
+
+ ib_dma_sync_single_for_cpu(qp->ibqp.device,
+ qp->sqp_proxy_rcv[tail].map,
+ sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ DMA_FROM_DEVICE);
+ hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr);
+ wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index);
+ wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
+ wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
+ wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF;
+ wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0;
+ wc->dlid_path_bits = 0;
+
+ return 0;
+}
+
static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_ib_qp **cur_qp,
struct ib_wc *wc)
@@ -562,18 +607,20 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_qp *mqp;
struct mlx4_ib_wq *wq;
struct mlx4_ib_srq *srq;
- struct mlx4_srq *msrq;
int is_send;
int is_error;
u32 g_mlpath_rqpn;
- int is_xrc_recv = 0;
u16 wqe_ctr;
+ unsigned tail = 0;
repoll:
cqe = next_cqe_sw(cq);
if (!cqe)
return -EAGAIN;
+ if (cq->buf.entry_size == 64)
+ cqe++;
+
++cq->mcq.cons_index;
/*
@@ -588,7 +635,7 @@ repoll:
if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
is_send)) {
- printk(KERN_WARNING "Completion for NOP opcode detected!\n");
+ pr_warn("Completion for NOP opcode detected!\n");
return -EINVAL;
}
@@ -608,24 +655,7 @@ repoll:
goto repoll;
}
- if ((be32_to_cpu(cqe->vlan_my_qpn) & (1 << 23)) && !is_send) {
- /*
- * We do not have to take the XRC SRQ table lock here,
- * because CQs will be locked while XRC SRQs are removed
- * from the table.
- */
- msrq = __mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev,
- be32_to_cpu(cqe->g_mlpath_rqpn) &
- 0xffffff);
- if (unlikely(!msrq)) {
- printk(KERN_WARNING "CQ %06x with entry for unknown "
- "XRC SRQ %06x\n", cq->mcq.cqn,
- be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff);
- return -EINVAL;
- }
- is_xrc_recv = 1;
- srq = to_mibsrq(msrq);
- } else if (!*cur_qp ||
+ if (!*cur_qp ||
(be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) {
/*
* We do not have to take the QP table lock here,
@@ -635,7 +665,7 @@ repoll:
mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
be32_to_cpu(cqe->vlan_my_qpn));
if (unlikely(!mqp)) {
- printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n",
+ pr_warn("CQ %06x with entry for unknown QPN %06x\n",
cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
return -EINVAL;
}
@@ -643,7 +673,7 @@ repoll:
*cur_qp = to_mibqp(mqp);
}
- wc->qp = is_xrc_recv ? NULL: &(*cur_qp)->ibqp;
+ wc->qp = &(*cur_qp)->ibqp;
if (is_send) {
wq = &(*cur_qp)->sq;
@@ -653,10 +683,6 @@ repoll:
}
wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
++wq->tail;
- } else if (is_xrc_recv) {
- wqe_ctr = be16_to_cpu(cqe->wqe_index);
- wc->wr_id = srq->wrid[wqe_ctr];
- mlx4_ib_free_srq_wqe(srq, wqe_ctr);
} else if ((*cur_qp)->ibqp.srq) {
srq = to_msrq((*cur_qp)->ibqp.srq);
wqe_ctr = be16_to_cpu(cqe->wqe_index);
@@ -664,7 +690,8 @@ repoll:
mlx4_ib_free_srq_wqe(srq, wqe_ctr);
} else {
wq = &(*cur_qp)->rq;
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ tail = wq->tail & (wq->wqe_cnt - 1);
+ wc->wr_id = wq->wrid[tail];
++wq->tail;
}
@@ -747,14 +774,26 @@ repoll:
break;
}
+ if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
+ if ((*cur_qp)->mlx4_ib_qp_type &
+ (MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
+ return use_tunnel_data(*cur_qp, cq, wc, tail, cqe);
+ }
+
wc->slid = be16_to_cpu(cqe->rlid);
- wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
wc->src_qp = g_mlpath_rqpn & 0xffffff;
wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
- wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum);
+ wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status,
+ cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
+ if (rdma_port_get_link_layer(wc->qp->device,
+ (*cur_qp)->port) == IB_LINK_LAYER_ETHERNET)
+ wc->sl = be16_to_cpu(cqe->sl_vid) >> 13;
+ else
+ wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
}
return 0;
@@ -776,8 +815,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
break;
}
- if (npolled)
- mlx4_cq_set_ci(&cq->mcq);
+ mlx4_cq_set_ci(&cq->mcq);
spin_unlock_irqrestore(&cq->lock, flags);
@@ -804,10 +842,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
int nfreed = 0;
struct mlx4_cqe *cqe, *dest;
u8 owner_bit;
- int is_xrc_srq = 0;
-
- if (srq && srq->ibsrq.xrc_cq)
- is_xrc_srq = 1;
+ int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0;
/*
* First we need to find the current producer index, so we
@@ -826,15 +861,16 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
*/
while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
- if (((be32_to_cpu(cqe->vlan_my_qpn) & 0xffffff) == qpn) ||
- (is_xrc_srq &&
- (be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff) ==
- srq->msrq.srqn)) {
+ cqe += cqe_inc;
+
+ if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
++nfreed;
} else if (nfreed) {
dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
+ dest += cqe_inc;
+
owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
memcpy(dest, cqe, sizeof *cqe);
dest->owner_sr_opcode = owner_bit |
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mad.c b/sys/ofed/drivers/infiniband/hw/mlx4/mad.c
index 2bb87ab..f130cdc 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/mad.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/mad.c
@@ -32,8 +32,13 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_cache.h>
+#include <linux/random.h>
#include <linux/mlx4/cmd.h>
+#include <linux/gfp.h>
+#include <rdma/ib_pma.h>
#include "mlx4_ib.h"
@@ -42,7 +47,62 @@ enum {
MLX4_IB_VENDOR_CLASS2 = 0xa
};
-int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+#define MLX4_TUN_SEND_WRID_SHIFT 34
+#define MLX4_TUN_QPN_SHIFT 32
+#define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT)
+#define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT)
+
+#define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1)
+#define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3)
+
+ /* Port mgmt change event handling */
+
+#define GET_BLK_PTR_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.block_ptr)
+#define GET_MASK_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.tbl_entries_mask)
+#define NUM_IDX_IN_PKEY_TBL_BLK 32
+#define GUID_TBL_ENTRY_SIZE 8 /* size in bytes */
+#define GUID_TBL_BLK_NUM_ENTRIES 8
+#define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES)
+
+struct mlx4_mad_rcv_buf {
+ struct ib_grh grh;
+ u8 payload[256];
+} __packed;
+
+struct mlx4_mad_snd_buf {
+ u8 payload[256];
+} __packed;
+
+struct mlx4_tunnel_mad {
+ struct ib_grh grh;
+ struct mlx4_ib_tunnel_header hdr;
+ struct ib_mad mad;
+} __packed;
+
+struct mlx4_rcv_tunnel_mad {
+ struct mlx4_rcv_tunnel_hdr hdr;
+ struct ib_grh grh;
+ struct ib_mad mad;
+} __packed;
+
+static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num);
+static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num);
+static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
+ int block, u32 change_bitmap);
+
+__be64 mlx4_ib_gen_node_guid(void)
+{
+#define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40))
+ return cpu_to_be64(NODE_GUID_HI | random());
+}
+
+__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx)
+{
+ return cpu_to_be64(atomic_inc_return(&ctx->tid)) |
+ cpu_to_be64(0xff00000000000000LL);
+}
+
+int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad)
{
@@ -69,10 +129,13 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
* Key check traps can't be generated unless we have in_wc to
* tell us where to send the trap.
*/
- if (ignore_mkey || !in_wc)
+ if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_MKEY) || !in_wc)
op_modifier |= 0x1;
- if (ignore_bkey || !in_wc)
+ if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_BKEY) || !in_wc)
op_modifier |= 0x2;
+ if (mlx4_is_mfunc(dev->dev) &&
+ (mad_ifc_flags & MLX4_MAD_IFC_NET_VIEW || in_wc))
+ op_modifier |= 0x8;
if (in_wc) {
struct {
@@ -105,9 +168,10 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
in_modifier |= in_wc->slid << 16;
}
- err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma,
- in_modifier, op_modifier,
- MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
+ err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier,
+ mlx4_is_master(dev->dev) ? (op_modifier & ~0x8) : op_modifier,
+ MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+ (op_modifier & 0x8) ? MLX4_CMD_NATIVE : MLX4_CMD_WRAPPED);
if (!err)
memcpy(response_mad, outmailbox->buf, 256);
@@ -122,6 +186,7 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
{
struct ib_ah *new_ah;
struct ib_ah_attr ah_attr;
+ unsigned long flags;
if (!dev->send_agent[port_num - 1][0])
return;
@@ -136,53 +201,134 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
if (IS_ERR(new_ah))
return;
- spin_lock(&dev->sm_lock);
+ spin_lock_irqsave(&dev->sm_lock, flags);
if (dev->sm_ah[port_num - 1])
ib_destroy_ah(dev->sm_ah[port_num - 1]);
dev->sm_ah[port_num - 1] = new_ah;
- spin_unlock(&dev->sm_lock);
+ spin_unlock_irqrestore(&dev->sm_lock, flags);
}
/*
- * Snoop SM MADs for port info and P_Key table sets, so we can
- * synthesize LID change and P_Key change events.
+ * Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can
+ * synthesize LID change, Client-Rereg, GID change, and P_Key change events.
*/
static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad,
- u16 prev_lid)
+ u16 prev_lid)
{
- struct ib_event event;
+ struct ib_port_info *pinfo;
+ u16 lid;
+ __be16 *base;
+ u32 bn, pkey_change_bitmap;
+ int i;
+
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
- mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
- if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
- struct ib_port_info *pinfo =
- (struct ib_port_info *) ((struct ib_smp *) mad)->data;
- u16 lid = be16_to_cpu(pinfo->lid);
+ mad->mad_hdr.method == IB_MGMT_METHOD_SET)
+ switch (mad->mad_hdr.attr_id) {
+ case IB_SMP_ATTR_PORT_INFO:
+ pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data;
+ lid = be16_to_cpu(pinfo->lid);
- update_sm_ah(to_mdev(ibdev), port_num,
+ update_sm_ah(dev, port_num,
be16_to_cpu(pinfo->sm_lid),
pinfo->neighbormtu_mastersmsl & 0xf);
- event.device = ibdev;
- event.element.port_num = port_num;
+ if (pinfo->clientrereg_resv_subnetto & 0x80)
+ handle_client_rereg_event(dev, port_num);
+
+ if (prev_lid != lid)
+ handle_lid_change_event(dev, port_num);
+ break;
- if (pinfo->clientrereg_resv_subnetto & 0x80) {
- event.event = IB_EVENT_CLIENT_REREGISTER;
- ib_dispatch_event(&event);
+ case IB_SMP_ATTR_PKEY_TABLE:
+ if (!mlx4_is_mfunc(dev->dev)) {
+ mlx4_ib_dispatch_event(dev, port_num,
+ IB_EVENT_PKEY_CHANGE);
+ break;
}
- if (prev_lid != lid) {
- event.event = IB_EVENT_LID_CHANGE;
- ib_dispatch_event(&event);
+ /* at this point, we are running in the master.
+ * Slaves do not receive SMPs.
+ */
+ bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod) & 0xFFFF;
+ base = (__be16 *) &(((struct ib_smp *)mad)->data[0]);
+ pkey_change_bitmap = 0;
+ for (i = 0; i < 32; i++) {
+ pr_debug("PKEY[%d] = x%x\n",
+ i + bn*32, be16_to_cpu(base[i]));
+ if (be16_to_cpu(base[i]) !=
+ dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32]) {
+ pkey_change_bitmap |= (1 << i);
+ dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32] =
+ be16_to_cpu(base[i]);
+ }
+ }
+ pr_debug("PKEY Change event: port=%d, "
+ "block=0x%x, change_bitmap=0x%x\n",
+ port_num, bn, pkey_change_bitmap);
+
+ if (pkey_change_bitmap) {
+ mlx4_ib_dispatch_event(dev, port_num,
+ IB_EVENT_PKEY_CHANGE);
+ if (!dev->sriov.is_going_down)
+ __propagate_pkey_ev(dev, port_num, bn,
+ pkey_change_bitmap);
}
+ break;
+
+ case IB_SMP_ATTR_GUID_INFO:
+ /* paravirtualized master's guid is guid 0 -- does not change */
+ if (!mlx4_is_master(dev->dev))
+ mlx4_ib_dispatch_event(dev, port_num,
+ IB_EVENT_GID_CHANGE);
+ /*if master, notify relevant slaves*/
+ if (mlx4_is_master(dev->dev) &&
+ !dev->sriov.is_going_down) {
+ bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod);
+ mlx4_ib_update_cache_on_guid_change(dev, bn, port_num,
+ (u8 *)(&((struct ib_smp *)mad)->data));
+ mlx4_ib_notify_slaves_on_guid_change(dev, bn, port_num,
+ (u8 *)(&((struct ib_smp *)mad)->data));
+ }
+ break;
+
+ default:
+ break;
}
+}
- if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
- event.device = ibdev;
- event.event = IB_EVENT_PKEY_CHANGE;
- event.element.port_num = port_num;
- ib_dispatch_event(&event);
+static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
+ int block, u32 change_bitmap)
+{
+ int i, ix, slave, err;
+ int have_event = 0;
+
+ for (slave = 0; slave < dev->dev->caps.sqp_demux; slave++) {
+ if (slave == mlx4_master_func_num(dev->dev))
+ continue;
+ if (!mlx4_is_slave_active(dev->dev, slave))
+ continue;
+
+ have_event = 0;
+ for (i = 0; i < 32; i++) {
+ if (!(change_bitmap & (1 << i)))
+ continue;
+ for (ix = 0;
+ ix < dev->dev->caps.pkey_table_len[port_num]; ix++) {
+ if (dev->pkeys.virt2phys_pkey[slave][port_num - 1]
+ [ix] == i + 32 * block) {
+ err = mlx4_gen_pkey_eqe(dev->dev, slave, port_num);
+ pr_debug("propagate_pkey_ev: slave %d,"
+ " port %d, ix %d (%d)\n",
+ slave, port_num, ix, err);
+ have_event = 1;
+ break;
+ }
+ }
+ if (have_event)
+ break;
}
}
}
@@ -190,13 +336,15 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad,
static void node_desc_override(struct ib_device *dev,
struct ib_mad *mad)
{
+ unsigned long flags;
+
if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
- spin_lock(&to_mdev(dev)->sm_lock);
+ spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags);
memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
- spin_unlock(&to_mdev(dev)->sm_lock);
+ spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags);
}
}
@@ -206,47 +354,357 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma
struct ib_mad_send_buf *send_buf;
struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
int ret;
+ unsigned long flags;
if (agent) {
send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
IB_MGMT_MAD_DATA, GFP_ATOMIC);
+ if (IS_ERR(send_buf))
+ return;
/*
* We rely here on the fact that MLX QPs don't use the
* address handle after the send is posted (this is
* wrong following the IB spec strictly, but we know
* it's OK for our devices).
*/
- spin_lock(&dev->sm_lock);
+ spin_lock_irqsave(&dev->sm_lock, flags);
memcpy(send_buf->mad, mad, sizeof *mad);
if ((send_buf->ah = dev->sm_ah[port_num - 1]))
ret = ib_post_send_mad(send_buf, NULL);
else
ret = -EINVAL;
- spin_unlock(&dev->sm_lock);
+ spin_unlock_irqrestore(&dev->sm_lock, flags);
if (ret)
ib_free_send_mad(send_buf);
}
}
-static int is_vendor_id(__be16 attr_id)
+static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave,
+ struct ib_sa_mad *sa_mad)
+{
+ int ret = 0;
+
+ /* dispatch to different sa handlers */
+ switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
+ case IB_SA_ATTR_MC_MEMBER_REC:
+ ret = mlx4_ib_mcg_demux_handler(ibdev, port, slave, sa_mad);
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ int i;
+
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ if (dev->sriov.demux[port - 1].guid_cache[i] == guid)
+ return i;
+ }
+ return -1;
+}
+
+
+static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave,
+ u8 port, u16 pkey, u16 *ix)
+{
+ int i, ret;
+ u8 unassigned_pkey_ix, pkey_ix, partial_ix = 0xFF;
+ u16 slot_pkey;
+
+ if (slave == mlx4_master_func_num(dev->dev))
+ return ib_find_cached_pkey(&dev->ib_dev, port, pkey, ix);
+
+ unassigned_pkey_ix = dev->dev->phys_caps.pkey_phys_table_len[port] - 1;
+
+ for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) {
+ if (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == unassigned_pkey_ix)
+ continue;
+
+ pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][i];
+
+ ret = ib_get_cached_pkey(&dev->ib_dev, port, pkey_ix, &slot_pkey);
+ if (ret)
+ continue;
+ if ((slot_pkey & 0x7FFF) == (pkey & 0x7FFF)) {
+ if (slot_pkey & 0x8000) {
+ *ix = (u16) pkey_ix;
+ return 0;
+ } else {
+ /* take first partial pkey index found */
+ if (partial_ix == 0xFF)
+ partial_ix = pkey_ix;
+ }
+ }
+ }
+
+ if (partial_ix < 0xFF) {
+ *ix = (u16) partial_ix;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
+ enum ib_qp_type dest_qpt, struct ib_wc *wc,
+ struct ib_grh *grh, struct ib_mad *mad)
{
- return (attr_id & IB_SMP_ATTR_VENDOR_MASK) == IB_SMP_ATTR_VENDOR_MASK;
+ struct ib_sge list;
+ struct ib_send_wr wr, *bad_wr;
+ struct mlx4_ib_demux_pv_ctx *tun_ctx;
+ struct mlx4_ib_demux_pv_qp *tun_qp;
+ struct mlx4_rcv_tunnel_mad *tun_mad;
+ struct ib_ah_attr attr;
+ struct ib_ah *ah;
+ struct ib_qp *src_qp = NULL;
+ unsigned tun_tx_ix = 0;
+ int dqpn;
+ int ret = 0;
+ u16 tun_pkey_ix;
+ u16 cached_pkey;
+ u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
+
+ if (dest_qpt > IB_QPT_GSI)
+ return -EINVAL;
+
+ tun_ctx = dev->sriov.demux[port-1].tun[slave];
+
+ /* check if proxy qp created */
+ if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
+ return -EAGAIN;
+
+ /* QP0 forwarding only for Dom0 */
+ if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave))
+ return -EINVAL;
+
+ if (!dest_qpt)
+ tun_qp = &tun_ctx->qp[0];
+ else
+ tun_qp = &tun_ctx->qp[1];
+
+ /* compute P_Key index to put in tunnel header for slave */
+ if (dest_qpt) {
+ u16 pkey_ix;
+ ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey);
+ if (ret)
+ return -EINVAL;
+
+ ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix);
+ if (ret)
+ return -EINVAL;
+ tun_pkey_ix = pkey_ix;
+ } else
+ tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
+
+ dqpn = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave + port + (dest_qpt * 2) - 1;
+
+ /* get tunnel tx data buf for slave */
+ src_qp = tun_qp->qp;
+
+ /* create ah. Just need an empty one with the port num for the post send.
+ * The driver will set the force loopback bit in post_send */
+ memset(&attr, 0, sizeof attr);
+ attr.port_num = port;
+ if (is_eth) {
+ memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16);
+ attr.ah_flags = IB_AH_GRH;
+ }
+ ah = ib_create_ah(tun_ctx->pd, &attr);
+ if (IS_ERR(ah))
+ return -ENOMEM;
+
+ /* allocate tunnel tx buf after pass failure returns */
+ spin_lock(&tun_qp->tx_lock);
+ if (tun_qp->tx_ix_head - tun_qp->tx_ix_tail >=
+ (MLX4_NUM_TUNNEL_BUFS - 1))
+ ret = -EAGAIN;
+ else
+ tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
+ spin_unlock(&tun_qp->tx_lock);
+ if (ret)
+ goto out;
+
+ tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
+ if (tun_qp->tx_ring[tun_tx_ix].ah)
+ ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
+ tun_qp->tx_ring[tun_tx_ix].ah = ah;
+ ib_dma_sync_single_for_cpu(&dev->ib_dev,
+ tun_qp->tx_ring[tun_tx_ix].buf.map,
+ sizeof (struct mlx4_rcv_tunnel_mad),
+ DMA_TO_DEVICE);
+
+ /* copy over to tunnel buffer */
+ if (grh)
+ memcpy(&tun_mad->grh, grh, sizeof *grh);
+ memcpy(&tun_mad->mad, mad, sizeof *mad);
+
+ /* adjust tunnel data */
+ tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
+ tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
+ tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
+ tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
+ tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
+
+ ib_dma_sync_single_for_device(&dev->ib_dev,
+ tun_qp->tx_ring[tun_tx_ix].buf.map,
+ sizeof (struct mlx4_rcv_tunnel_mad),
+ DMA_TO_DEVICE);
+
+ list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map;
+ list.length = sizeof (struct mlx4_rcv_tunnel_mad);
+ list.lkey = tun_ctx->mr->lkey;
+
+ wr.wr.ud.ah = ah;
+ wr.wr.ud.port_num = port;
+ wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
+ wr.wr.ud.remote_qpn = dqpn;
+ wr.next = NULL;
+ wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
+ wr.sg_list = &list;
+ wr.num_sge = 1;
+ wr.opcode = IB_WR_SEND;
+ wr.send_flags = IB_SEND_SIGNALED;
+
+ ret = ib_post_send(src_qp, &wr, &bad_wr);
+out:
+ if (ret)
+ ib_destroy_ah(ah);
+ return ret;
}
-static int supported_vendor_id(__be16 attr_id)
+static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
+ struct ib_wc *wc, struct ib_grh *grh,
+ struct ib_mad *mad)
{
- return 1;
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ int err;
+ int slave;
+ u8 *slave_id;
+ int is_eth = 0;
+
+ if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
+ is_eth = 0;
+ else
+ is_eth = 1;
+
+ if (is_eth) {
+ if (!wc->wc_flags & IB_WC_GRH) {
+ mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
+ return -EINVAL;
+ }
+ if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) {
+ mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
+ return -EINVAL;
+ }
+ if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) {
+ mlx4_ib_warn(ibdev, "failed matching grh\n");
+ return -ENOENT;
+ }
+ if (slave >= dev->dev->caps.sqp_demux) {
+ mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
+ slave, dev->dev->caps.sqp_demux);
+ return -ENOENT;
+ }
+
+ if (mlx4_ib_demux_cm_handler(ibdev, port, &slave, mad, is_eth))
+ return 0;
+
+ err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
+ if (err)
+ pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ slave, err);
+ return 0;
+ }
+
+ /* Initially assume that this mad is for us */
+ slave = mlx4_master_func_num(dev->dev);
+
+ /* See if the slave id is encoded in a response mad */
+ if (mad->mad_hdr.method & 0x80) {
+ slave_id = (u8 *) &mad->mad_hdr.tid;
+ slave = *slave_id;
+ if (slave != 255) /*255 indicates the dom0*/
+ *slave_id = 0; /* remap tid */
+ }
+
+ /* If a grh is present, we demux according to it */
+ if (wc->wc_flags & IB_WC_GRH) {
+ slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id);
+ if (slave < 0) {
+ mlx4_ib_warn(ibdev, "failed matching grh\n");
+ return -ENOENT;
+ }
+ }
+ /* Class-specific handling */
+ switch (mad->mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_ADM:
+ if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
+ (struct ib_sa_mad *) mad))
+ return 0;
+ break;
+ case IB_MGMT_CLASS_CM:
+ if (mlx4_ib_demux_cm_handler(ibdev, port, &slave, mad, is_eth))
+ return 0;
+ break;
+ case IB_MGMT_CLASS_DEVICE_MGMT:
+ if (mad->mad_hdr.method != IB_MGMT_METHOD_GET_RESP)
+ return 0;
+ break;
+ default:
+ /* Drop unsupported classes for slaves in tunnel mode */
+ if (slave != mlx4_master_func_num(dev->dev)) {
+ pr_debug("dropping unsupported ingress mad from class:%d "
+ "for slave:%d\n", mad->mad_hdr.mgmt_class, slave);
+ return 0;
+ }
+ }
+ /*make sure that no slave==255 was not handled yet.*/
+ if (slave >= dev->dev->caps.sqp_demux) {
+ mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
+ slave, dev->dev->caps.sqp_demux);
+ return -ENOENT;
+ }
+
+ err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
+ if (err)
+ pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ slave, err);
+ return 0;
}
static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
- struct ib_wc *in_wc, struct ib_grh *in_grh,
- struct ib_mad *in_mad, struct ib_mad *out_mad)
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
{
u16 slid, prev_lid = 0;
int err;
struct ib_port_attr pattr;
+ if (in_wc && in_wc->qp->qp_num) {
+ pr_debug("received MAD: slid:%d sqpn:%d "
+ "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n",
+ in_wc->slid, in_wc->src_qp,
+ in_wc->dlid_path_bits,
+ in_wc->qp->qp_num,
+ in_wc->wc_flags,
+ in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
+ be16_to_cpu(in_mad->mad_hdr.attr_id));
+ if (in_wc->wc_flags & IB_WC_GRH) {
+ pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
+ (long long)be64_to_cpu(in_grh->sgid.global.subnet_prefix),
+ (long long)
+ be64_to_cpu(in_grh->sgid.global.interface_id));
+ pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n",
+ (long long)be64_to_cpu(in_grh->dgid.global.subnet_prefix),
+ (long long)be64_to_cpu(in_grh->dgid.global.interface_id));
+ }
+ }
+
slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
@@ -262,12 +720,9 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
return IB_MAD_RESULT_SUCCESS;
/*
- * Don't process SMInfo queries or vendor-specific
- * MADs -- the SMA can't handle them.
+ * Don't process SMInfo queries -- the SMA can't handle them.
*/
- if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
- (is_vendor_id(in_mad->mad_hdr.attr_id) &&
- !supported_vendor_id(in_mad->mad_hdr.attr_id)))
+ if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
return IB_MAD_RESULT_SUCCESS;
} else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 ||
@@ -287,15 +742,19 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
prev_lid = pattr.lid;
err = mlx4_MAD_IFC(to_mdev(ibdev),
- mad_flags & IB_MAD_IGNORE_MKEY,
- mad_flags & IB_MAD_IGNORE_BKEY,
+ (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) |
+ (mad_flags & IB_MAD_IGNORE_BKEY ? MLX4_MAD_IFC_IGNORE_BKEY : 0) |
+ MLX4_MAD_IFC_NET_VIEW,
port_num, in_wc, in_grh, in_mad, out_mad);
if (err)
return IB_MAD_RESULT_FAILURE;
if (!out_mad->mad_hdr.status) {
- smp_snoop(ibdev, port_num, in_mad, prev_lid);
- node_desc_override(ibdev, out_mad);
+ if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV))
+ smp_snoop(ibdev, port_num, in_mad, prev_lid);
+ /* slaves get node desc from FW */
+ if (!mlx4_is_slave(to_mdev(ibdev)->dev))
+ node_desc_override(ibdev, out_mad);
}
/* set return bit in status of directed route responses */
@@ -309,72 +768,235 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
-static __be32 be64_to_be32(__be64 b64)
+static void edit_counter_ext(struct mlx4_if_stat_extended *cnt, void *counters,
+ __be16 attr_id)
{
- return cpu_to_be32(be64_to_cpu(b64) & 0xffffffff);
-}
+ switch (attr_id) {
+ case IB_PMA_PORT_COUNTERS:
+ {
+ struct ib_pma_portcounters *pma_cnt =
+ (struct ib_pma_portcounters *)counters;
+ pma_cnt->port_xmit_data =
+ cpu_to_be32((be64_to_cpu(cnt->counters[0].
+ IfTxUnicastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxMulticastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxBroadcastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxDroppedOctets)) >> 2);
+ pma_cnt->port_rcv_data =
+ cpu_to_be32((be64_to_cpu(cnt->counters[0].
+ IfRxUnicastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxMulticastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxBroadcastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxNoBufferOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxErrorOctets)) >> 2);
+ pma_cnt->port_xmit_packets =
+ cpu_to_be32(be64_to_cpu(cnt->counters[0].
+ IfTxUnicastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxMulticastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxBroadcastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxDroppedFrames));
+ pma_cnt->port_rcv_packets =
+ cpu_to_be32(be64_to_cpu(cnt->counters[0].
+ IfRxUnicastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxMulticastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxBroadcastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxNoBufferFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxErrorFrames));
+ pma_cnt->port_rcv_errors = cpu_to_be32(be64_to_cpu(cnt->
+ counters[0].
+ IfRxErrorFrames));
+ break;
+ }
+
+ case IB_PMA_PORT_COUNTERS_EXT:
+ {
+ struct ib_pma_portcounters_ext *pma_cnt_ext =
+ (struct ib_pma_portcounters_ext *)counters;
+
+ pma_cnt_ext->port_xmit_data =
+ cpu_to_be64((be64_to_cpu(cnt->counters[0].
+ IfTxUnicastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxMulticastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxBroadcastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxDroppedOctets)) >> 2);
+ pma_cnt_ext->port_rcv_data =
+ cpu_to_be64((be64_to_cpu(cnt->counters[0].
+ IfRxUnicastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxMulticastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxBroadcastOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxNoBufferOctets) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxErrorOctets)) >> 2);
+ pma_cnt_ext->port_xmit_packets =
+ cpu_to_be64(be64_to_cpu(cnt->counters[0].
+ IfTxUnicastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxMulticastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxBroadcastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxDroppedFrames));
+ pma_cnt_ext->port_rcv_packets =
+ cpu_to_be64(be64_to_cpu(cnt->counters[0].
+ IfRxUnicastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxMulticastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxBroadcastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxNoBufferFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfRxErrorFrames));
+ pma_cnt_ext->port_unicast_xmit_packets = cnt->counters[0].
+ IfTxUnicastFrames;
+ pma_cnt_ext->port_unicast_rcv_packets = cnt->counters[0].
+ IfRxUnicastFrames;
+ pma_cnt_ext->port_multicast_xmit_packets =
+ cpu_to_be64(be64_to_cpu(cnt->counters[0].
+ IfTxMulticastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxBroadcastFrames));
+ pma_cnt_ext->port_multicast_rcv_packets =
+ cpu_to_be64(be64_to_cpu(cnt->counters[0].
+ IfTxMulticastFrames) +
+ be64_to_cpu(cnt->counters[0].
+ IfTxBroadcastFrames));
+
+ break;
+ }
+
+ default:
+ pr_warn("Unsupported attr_id 0x%x\n", attr_id);
+ break;
+ }
-static void edit_counters(struct mlx4_counters *cnt, void *data)
-{
- *(__be32 *)(data + 40 + 24) = be64_to_be32(cnt->tx_bytes);
- *(__be32 *)(data + 40 + 28) = be64_to_be32(cnt->rx_bytes);
- *(__be32 *)(data + 40 + 32) = be64_to_be32(cnt->tx_frames);
- *(__be32 *)(data + 40 + 36) = be64_to_be32(cnt->rx_frames);
}
-static void edit_ext_counters(struct mlx4_counters_ext *cnt, void *data)
+static void edit_counter(struct mlx4_if_stat_basic *cnt, void *counters,
+ __be16 attr_id)
{
- *(__be32 *)(data + 40 + 24) = be64_to_be32(cnt->tx_uni_bytes);
- *(__be32 *)(data + 40 + 28) = be64_to_be32(cnt->rx_uni_bytes);
- *(__be32 *)(data + 40 + 32) = be64_to_be32(cnt->tx_uni_frames);
- *(__be32 *)(data + 40 + 36) = be64_to_be32(cnt->rx_uni_frames);
- *(__be32 *)(data + 40 + 8) = be64_to_be32(cnt->rx_err_frames);
+ switch (attr_id) {
+ case IB_PMA_PORT_COUNTERS:
+ {
+ struct ib_pma_portcounters *pma_cnt =
+ (struct ib_pma_portcounters *) counters;
+ pma_cnt->port_xmit_data =
+ cpu_to_be32(be64_to_cpu(
+ cnt->counters[0].IfTxOctets) >> 2);
+ pma_cnt->port_rcv_data =
+ cpu_to_be32(be64_to_cpu(
+ cnt->counters[0].IfRxOctets) >> 2);
+ pma_cnt->port_xmit_packets =
+ cpu_to_be32(be64_to_cpu(cnt->counters[0].IfTxFrames));
+ pma_cnt->port_rcv_packets =
+ cpu_to_be32(be64_to_cpu(cnt->counters[0].IfRxFrames));
+ break;
+ }
+ case IB_PMA_PORT_COUNTERS_EXT:
+ {
+ struct ib_pma_portcounters_ext *pma_cnt_ext =
+ (struct ib_pma_portcounters_ext *) counters;
+
+ pma_cnt_ext->port_xmit_data =
+ cpu_to_be64((be64_to_cpu(cnt->counters[0].
+ IfTxOctets) >> 2));
+ pma_cnt_ext->port_rcv_data =
+ cpu_to_be64((be64_to_cpu(cnt->counters[0].
+ IfRxOctets) >> 2));
+ pma_cnt_ext->port_xmit_packets = cnt->counters[0].IfTxFrames;
+ pma_cnt_ext->port_rcv_packets = cnt->counters[0].IfRxFrames;
+ break;
+ }
+ default:
+ pr_warn("Unsupported attr_id 0x%x\n", attr_id);
+ break;
+ }
}
-static int rdmaoe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
- struct ib_wc *in_wc, struct ib_grh *in_grh,
- struct ib_mad *in_mad, struct ib_mad *out_mad)
+int mlx4_ib_query_if_stat(struct mlx4_ib_dev *dev, u32 counter_index,
+ union mlx4_counter *counter, u8 clear)
{
struct mlx4_cmd_mailbox *mailbox;
- struct mlx4_ib_dev *dev = to_mdev(ibdev);
int err;
- u32 inmod = dev->counters[port_num - 1] & 0xffff;
- int mode;
-
- if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
- return -EINVAL;
+ u32 inmod = counter_index | ((clear & 1) << 31);
mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
if (IS_ERR(mailbox))
return IB_MAD_RESULT_FAILURE;
err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0,
- MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C);
- if (err)
+ MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_WRAPPED);
+ if (!err)
+ memcpy(counter, mailbox->buf, MLX4_IF_STAT_SZ(1));
+
+ mlx4_free_cmd_mailbox(dev->dev, mailbox);
+
+ return err;
+}
+
+static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ int err;
+ u32 counter_index = dev->counters[port_num - 1] & 0xffff;
+ u8 mode;
+ char counter_buf[MLX4_IF_STAT_SZ(1)];
+ union mlx4_counter *counter = (union mlx4_counter *)
+ counter_buf;
+
+ if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
+ return -EINVAL;
+
+ if (mlx4_ib_query_if_stat(dev, counter_index, counter, 0)) {
err = IB_MAD_RESULT_FAILURE;
- else {
+ } else {
memset(out_mad->data, 0, sizeof out_mad->data);
- mode = be32_to_cpu(((struct mlx4_counters *)mailbox->buf)->counter_mode) & 0xf;
- switch (mode) {
+ mode = counter->control.cnt_mode & 0xFF;
+ err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ switch (mode & 0xf) {
case 0:
- edit_counters(mailbox->buf, out_mad->data);
- err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ edit_counter((void *)counter,
+ (void *)(out_mad->data + 40),
+ in_mad->mad_hdr.attr_id);
break;
case 1:
- edit_ext_counters(mailbox->buf, out_mad->data);
- err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ edit_counter_ext((void *)counter,
+ (void *)(out_mad->data + 40),
+ in_mad->mad_hdr.attr_id);
break;
default:
err = IB_MAD_RESULT_FAILURE;
}
}
- mlx4_free_cmd_mailbox(dev->dev, mailbox);
return err;
}
-int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
struct ib_wc *in_wc, struct ib_grh *in_grh,
struct ib_mad *in_mad, struct ib_mad *out_mad)
{
@@ -383,7 +1005,7 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
return ib_process_mad(ibdev, mad_flags, port_num, in_wc,
in_grh, in_mad, out_mad);
case IB_LINK_LAYER_ETHERNET:
- return rdmaoe_process_mad(ibdev, mad_flags, port_num, in_wc,
+ return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
in_grh, in_mad, out_mad);
default:
return -EINVAL;
@@ -393,6 +1015,8 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
+ if (mad_send_wc->send_buf->context[0])
+ ib_destroy_ah(mad_send_wc->send_buf->context[0]);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -450,3 +1074,1221 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
ib_destroy_ah(dev->sm_ah[p]);
}
}
+
+static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num)
+{
+ mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_LID_CHANGE);
+
+ if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
+ mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
+ MLX4_EQ_PORT_INFO_LID_CHANGE_MASK);
+}
+
+static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num)
+{
+ /* re-configure the alias-guid and mcg's */
+ if (mlx4_is_master(dev->dev)) {
+ mlx4_ib_invalidate_all_guid_record(dev, port_num);
+
+ if (!dev->sriov.is_going_down) {
+ mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0);
+ mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
+ MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK);
+ }
+ }
+ mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER);
+}
+
+static void propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
+ struct mlx4_eqe *eqe)
+{
+ __propagate_pkey_ev(dev, port_num, GET_BLK_PTR_FROM_EQE(eqe),
+ GET_MASK_FROM_EQE(eqe));
+}
+
+static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num,
+ u32 guid_tbl_blk_num, u32 change_bitmap)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ u16 i;
+
+ if (!mlx4_is_mfunc(dev->dev) || !mlx4_is_master(dev->dev))
+ return;
+
+ in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad) {
+ mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n");
+ goto out;
+ }
+
+ guid_tbl_blk_num *= 4;
+
+ for (i = 0; i < 4; i++) {
+ if (change_bitmap && (!((change_bitmap >> (8 * i)) & 0xff)))
+ continue;
+ memset(in_mad, 0, sizeof *in_mad);
+ memset(out_mad, 0, sizeof *out_mad);
+
+ in_mad->base_version = 1;
+ in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ in_mad->class_version = 1;
+ in_mad->method = IB_MGMT_METHOD_GET;
+ in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
+ in_mad->attr_mod = cpu_to_be32(guid_tbl_blk_num + i);
+
+ if (mlx4_MAD_IFC(dev,
+ MLX4_MAD_IFC_IGNORE_KEYS | MLX4_MAD_IFC_NET_VIEW,
+ port_num, NULL, NULL, in_mad, out_mad)) {
+ mlx4_ib_warn(&dev->ib_dev, "Failed in get GUID INFO MAD_IFC\n");
+ goto out;
+ }
+
+ mlx4_ib_update_cache_on_guid_change(dev, guid_tbl_blk_num + i,
+ port_num,
+ (u8 *)(&((struct ib_smp *)out_mad)->data));
+ mlx4_ib_notify_slaves_on_guid_change(dev, guid_tbl_blk_num + i,
+ port_num,
+ (u8 *)(&((struct ib_smp *)out_mad)->data));
+ }
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return;
+}
+
+void handle_port_mgmt_change_event(struct work_struct *work)
+{
+ struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
+ struct mlx4_ib_dev *dev = ew->ib_dev;
+ struct mlx4_eqe *eqe = &(ew->ib_eqe);
+ u8 port = eqe->event.port_mgmt_change.port;
+ u32 changed_attr;
+ u32 tbl_block;
+ u32 change_bitmap;
+
+ switch (eqe->subtype) {
+ case MLX4_DEV_PMC_SUBTYPE_PORT_INFO:
+ changed_attr = be32_to_cpu(eqe->event.port_mgmt_change.params.port_info.changed_attr);
+
+ /* Update the SM ah - This should be done before handling
+ the other changed attributes so that MADs can be sent to the SM */
+ if (changed_attr & MSTR_SM_CHANGE_MASK) {
+ u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid);
+ u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf;
+ update_sm_ah(dev, port, lid, sl);
+ }
+
+ /* Check if it is a lid change event */
+ if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK)
+ handle_lid_change_event(dev, port);
+
+ /* Generate GUID changed event */
+ if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
+ mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
+ /*if master, notify all slaves*/
+ if (mlx4_is_master(dev->dev))
+ mlx4_gen_slaves_port_mgt_ev(dev->dev, port,
+ MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK);
+ }
+
+ if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK)
+ handle_client_rereg_event(dev, port);
+ break;
+
+ case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE:
+ mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE);
+ if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
+ propagate_pkey_ev(dev, port, eqe);
+ break;
+ case MLX4_DEV_PMC_SUBTYPE_GUID_INFO:
+ /* paravirtualized master's guid is guid 0 -- does not change */
+ if (!mlx4_is_master(dev->dev))
+ mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
+ /*if master, notify relevant slaves*/
+ else if (!dev->sriov.is_going_down) {
+ tbl_block = GET_BLK_PTR_FROM_EQE(eqe);
+ change_bitmap = GET_MASK_FROM_EQE(eqe);
+ handle_slaves_guid_change(dev, port, tbl_block, change_bitmap);
+ }
+ break;
+ default:
+ pr_warn("Unsupported subtype 0x%x for "
+ "Port Management Change event\n", eqe->subtype);
+ }
+
+ kfree(ew);
+}
+
+void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
+ enum ib_event_type type)
+{
+ struct ib_event event;
+
+ event.device = &dev->ib_dev;
+ event.element.port_num = port_num;
+ event.event = type;
+
+ ib_dispatch_event(&event);
+}
+
+static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg)
+{
+ unsigned long flags;
+ struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
+ struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
+ queue_work(ctx->wq, &ctx->work);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
+static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
+ struct mlx4_ib_demux_pv_qp *tun_qp,
+ int index)
+{
+ struct ib_sge sg_list;
+ struct ib_recv_wr recv_wr, *bad_recv_wr;
+ int size;
+
+ size = (tun_qp->qp->qp_type == IB_QPT_UD) ?
+ sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf);
+
+ sg_list.addr = tun_qp->ring[index].map;
+ sg_list.length = size;
+ sg_list.lkey = ctx->mr->lkey;
+
+ recv_wr.next = NULL;
+ recv_wr.sg_list = &sg_list;
+ recv_wr.num_sge = 1;
+ recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV |
+ MLX4_TUN_SET_WRID_QPN(tun_qp->proxy_qpt);
+ ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map,
+ size, DMA_FROM_DEVICE);
+ return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr);
+}
+
+static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
+ int slave, struct ib_sa_mad *sa_mad)
+{
+ int ret = 0;
+
+ /* dispatch to different sa handlers */
+ switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
+ case IB_SA_ATTR_MC_MEMBER_REC:
+ ret = mlx4_ib_mcg_multiplex_handler(ibdev, port, slave, sa_mad);
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
+{
+ int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
+
+ return (qpn >= proxy_start && qpn <= proxy_start + 1);
+}
+
+
+int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
+ enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
+ u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad)
+{
+ struct ib_sge list;
+ struct ib_send_wr wr, *bad_wr;
+ struct mlx4_ib_demux_pv_ctx *sqp_ctx;
+ struct mlx4_ib_demux_pv_qp *sqp;
+ struct mlx4_mad_snd_buf *sqp_mad;
+ struct ib_ah *ah;
+ struct ib_qp *send_qp = NULL;
+ unsigned wire_tx_ix = 0;
+ int ret = 0;
+ u16 wire_pkey_ix;
+ int src_qpnum;
+ u8 sgid_index;
+
+
+ sqp_ctx = dev->sriov.sqps[port-1];
+
+ /* check if proxy qp created */
+ if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
+ return -EAGAIN;
+
+ /* QP0 forwarding only for Dom0 */
+ if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave))
+ return -EINVAL;
+
+ if (dest_qpt == IB_QPT_SMI) {
+ src_qpnum = 0;
+ sqp = &sqp_ctx->qp[0];
+ wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
+ } else {
+ src_qpnum = 1;
+ sqp = &sqp_ctx->qp[1];
+ wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][pkey_index];
+ }
+
+ send_qp = sqp->qp;
+
+ /* create ah */
+ sgid_index = attr->grh.sgid_index;
+ attr->grh.sgid_index = 0;
+ ah = ib_create_ah(sqp_ctx->pd, attr);
+ if (IS_ERR(ah))
+ return -ENOMEM;
+ attr->grh.sgid_index = sgid_index;
+ to_mah(ah)->av.ib.gid_index = sgid_index;
+ /* get rid of force-loopback bit */
+ to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
+ spin_lock(&sqp->tx_lock);
+ if (sqp->tx_ix_head - sqp->tx_ix_tail >=
+ (MLX4_NUM_TUNNEL_BUFS - 1))
+ ret = -EAGAIN;
+ else
+ wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
+ spin_unlock(&sqp->tx_lock);
+ if (ret)
+ goto out;
+
+ sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
+ if (sqp->tx_ring[wire_tx_ix].ah)
+ ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
+ sqp->tx_ring[wire_tx_ix].ah = ah;
+ ib_dma_sync_single_for_cpu(&dev->ib_dev,
+ sqp->tx_ring[wire_tx_ix].buf.map,
+ sizeof (struct mlx4_mad_snd_buf),
+ DMA_TO_DEVICE);
+
+ memcpy(&sqp_mad->payload, mad, sizeof *mad);
+
+ ib_dma_sync_single_for_device(&dev->ib_dev,
+ sqp->tx_ring[wire_tx_ix].buf.map,
+ sizeof (struct mlx4_mad_snd_buf),
+ DMA_TO_DEVICE);
+
+ list.addr = sqp->tx_ring[wire_tx_ix].buf.map;
+ list.length = sizeof (struct mlx4_mad_snd_buf);
+ list.lkey = sqp_ctx->mr->lkey;
+
+ wr.wr.ud.ah = ah;
+ wr.wr.ud.port_num = port;
+ wr.wr.ud.pkey_index = wire_pkey_ix;
+ wr.wr.ud.remote_qkey = qkey;
+ wr.wr.ud.remote_qpn = remote_qpn;
+ wr.next = NULL;
+ wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
+ wr.sg_list = &list;
+ wr.num_sge = 1;
+ wr.opcode = IB_WR_SEND;
+ wr.send_flags = IB_SEND_SIGNALED;
+
+ ret = ib_post_send(send_qp, &wr, &bad_wr);
+out:
+ if (ret)
+ ib_destroy_ah(ah);
+ return ret;
+}
+
+static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port)
+{
+ int gids;
+ int vfs;
+
+ if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
+ return slave;
+
+ gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
+ vfs = dev->dev->num_vfs;
+
+ if (slave == 0)
+ return 0;
+ if (slave <= gids % vfs)
+ return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1);
+
+ return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1));
+}
+
+static int get_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port,
+ struct ib_ah_attr *ah_attr)
+{
+ if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) {
+ ah_attr->grh.sgid_index = slave;
+ return 0;
+ }
+ ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port);
+ return 0;
+}
+
+static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
+ struct mlx4_ib_demux_pv_qp *tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc->wr_id)];
+ int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1);
+ struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr;
+ struct mlx4_ib_ah ah;
+ struct ib_ah_attr ah_attr;
+ u8 *slave_id;
+ int slave;
+
+ /* Get slave that sent this packet */
+ if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
+ wc->src_qp >= dev->dev->phys_caps.base_proxy_sqpn + 8 * MLX4_MFUNC_MAX ||
+ (wc->src_qp & 0x1) != ctx->port - 1 ||
+ wc->src_qp & 0x4) {
+ mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp);
+ return;
+ }
+ slave = ((wc->src_qp & ~0x7) - dev->dev->phys_caps.base_proxy_sqpn) / 8;
+ if (slave != ctx->slave) {
+ mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
+ "belongs to another slave\n", wc->src_qp);
+ return;
+ }
+ if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) {
+ mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
+ "non-master trying to send QP0 packets\n", wc->src_qp);
+ return;
+ }
+
+ /* Map transaction ID */
+ ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
+ sizeof (struct mlx4_tunnel_mad),
+ DMA_FROM_DEVICE);
+ switch (tunnel->mad.mad_hdr.method) {
+ case IB_MGMT_METHOD_SET:
+ case IB_MGMT_METHOD_GET:
+ case IB_MGMT_METHOD_REPORT:
+ case IB_SA_METHOD_GET_TABLE:
+ case IB_SA_METHOD_DELETE:
+ case IB_SA_METHOD_GET_MULTI:
+ case IB_SA_METHOD_GET_TRACE_TBL:
+ slave_id = (u8 *) &tunnel->mad.mad_hdr.tid;
+ if (*slave_id) {
+ mlx4_ib_warn(ctx->ib_dev, "egress mad has non-null tid msb:%d "
+ "class:%d slave:%d\n", *slave_id,
+ tunnel->mad.mad_hdr.mgmt_class, slave);
+ return;
+ } else
+ *slave_id = slave;
+ default:
+ /* nothing */;
+ }
+
+ /* Class-specific handling */
+ switch (tunnel->mad.mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_ADM:
+ if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
+ (struct ib_sa_mad *) &tunnel->mad))
+ return;
+ break;
+ case IB_MGMT_CLASS_CM:
+ if (mlx4_ib_multiplex_cm_handler(ctx->ib_dev, ctx->port, slave,
+ (struct ib_mad *) &tunnel->mad))
+ return;
+ break;
+ case IB_MGMT_CLASS_DEVICE_MGMT:
+ if (tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_GET &&
+ tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_SET)
+ return;
+ break;
+ default:
+ /* Drop unsupported classes for slaves in tunnel mode */
+ if (slave != mlx4_master_func_num(dev->dev)) {
+ mlx4_ib_warn(ctx->ib_dev, "dropping unsupported egress mad from class:%d "
+ "for slave:%d\n", tunnel->mad.mad_hdr.mgmt_class, slave);
+ return;
+ }
+ }
+
+ /* We are using standard ib_core services to send the mad, so generate a
+ * stadard address handle by decoding the tunnelled mlx4_ah fields */
+ memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
+ ah.ibah.device = ctx->ib_dev;
+ mlx4_ib_query_ah(&ah.ibah, &ah_attr);
+ if (ah_attr.ah_flags & IB_AH_GRH)
+ if (get_real_sgid_index(dev, slave, ctx->port, &ah_attr))
+ return;
+
+ mlx4_ib_send_to_wire(dev, slave, ctx->port,
+ is_proxy_qp0(dev, wc->src_qp, slave) ?
+ IB_QPT_SMI : IB_QPT_GSI,
+ be16_to_cpu(tunnel->hdr.pkey_index),
+ be32_to_cpu(tunnel->hdr.remote_qpn),
+ be32_to_cpu(tunnel->hdr.qkey),
+ &ah_attr, &tunnel->mad);
+}
+
+static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
+ enum ib_qp_type qp_type, int is_tun)
+{
+ int i;
+ struct mlx4_ib_demux_pv_qp *tun_qp;
+ int rx_buf_size, tx_buf_size;
+
+ if (qp_type > IB_QPT_GSI)
+ return -EINVAL;
+
+ tun_qp = &ctx->qp[qp_type];
+
+ tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS,
+ GFP_KERNEL);
+ if (!tun_qp->ring)
+ return -ENOMEM;
+
+ tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
+ sizeof (struct mlx4_ib_tun_tx_buf),
+ GFP_KERNEL);
+ if (!tun_qp->tx_ring) {
+ kfree(tun_qp->ring);
+ tun_qp->ring = NULL;
+ return -ENOMEM;
+ }
+
+ if (is_tun) {
+ rx_buf_size = sizeof (struct mlx4_tunnel_mad);
+ tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
+ } else {
+ rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
+ tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
+ }
+
+ for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
+ if (!tun_qp->ring[i].addr)
+ goto err;
+ tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev,
+ tun_qp->ring[i].addr,
+ rx_buf_size,
+ DMA_FROM_DEVICE);
+ }
+
+ for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ tun_qp->tx_ring[i].buf.addr =
+ kmalloc(tx_buf_size, GFP_KERNEL);
+ if (!tun_qp->tx_ring[i].buf.addr)
+ goto tx_err;
+ tun_qp->tx_ring[i].buf.map =
+ ib_dma_map_single(ctx->ib_dev,
+ tun_qp->tx_ring[i].buf.addr,
+ tx_buf_size,
+ DMA_TO_DEVICE);
+ tun_qp->tx_ring[i].ah = NULL;
+ }
+ spin_lock_init(&tun_qp->tx_lock);
+ tun_qp->tx_ix_head = 0;
+ tun_qp->tx_ix_tail = 0;
+ tun_qp->proxy_qpt = qp_type;
+
+ return 0;
+
+tx_err:
+ while (i > 0) {
+ --i;
+ ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
+ tx_buf_size, DMA_TO_DEVICE);
+ kfree(tun_qp->tx_ring[i].buf.addr);
+ }
+ kfree(tun_qp->tx_ring);
+ tun_qp->tx_ring = NULL;
+ i = MLX4_NUM_TUNNEL_BUFS;
+err:
+ while (i > 0) {
+ --i;
+ ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
+ rx_buf_size, DMA_FROM_DEVICE);
+ kfree(tun_qp->ring[i].addr);
+ }
+ kfree(tun_qp->ring);
+ tun_qp->ring = NULL;
+ return -ENOMEM;
+}
+
+static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
+ enum ib_qp_type qp_type, int is_tun)
+{
+ int i;
+ struct mlx4_ib_demux_pv_qp *tun_qp;
+ int rx_buf_size, tx_buf_size;
+
+ if (qp_type > IB_QPT_GSI)
+ return;
+
+ tun_qp = &ctx->qp[qp_type];
+ if (is_tun) {
+ rx_buf_size = sizeof (struct mlx4_tunnel_mad);
+ tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
+ } else {
+ rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
+ tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
+ }
+
+
+ for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
+ rx_buf_size, DMA_FROM_DEVICE);
+ kfree(tun_qp->ring[i].addr);
+ }
+
+ for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
+ tx_buf_size, DMA_TO_DEVICE);
+ kfree(tun_qp->tx_ring[i].buf.addr);
+ if (tun_qp->tx_ring[i].ah)
+ ib_destroy_ah(tun_qp->tx_ring[i].ah);
+ }
+ kfree(tun_qp->tx_ring);
+ kfree(tun_qp->ring);
+}
+
+static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
+{
+ struct mlx4_ib_demux_pv_ctx *ctx;
+ struct mlx4_ib_demux_pv_qp *tun_qp;
+ struct ib_wc wc;
+ int ret;
+ ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
+ ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
+
+ while (ib_poll_cq(ctx->cq, 1, &wc) == 1) {
+ tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
+ if (wc.status == IB_WC_SUCCESS) {
+ switch (wc.opcode) {
+ case IB_WC_RECV:
+ mlx4_ib_multiplex_mad(ctx, &wc);
+ ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp,
+ wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1));
+ if (ret)
+ pr_err("Failed reposting tunnel "
+ "buf:%lld\n", wc.wr_id);
+ break;
+ case IB_WC_SEND:
+ pr_debug("received tunnel send completion:"
+ "wrid=0x%llx, status=0x%x\n",
+ wc.wr_id, wc.status);
+ ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ = NULL;
+ spin_lock(&tun_qp->tx_lock);
+ tun_qp->tx_ix_tail++;
+ spin_unlock(&tun_qp->tx_lock);
+
+ break;
+ default:
+ break;
+ }
+ } else {
+ pr_debug("mlx4_ib: completion error in tunnel: %d."
+ " status = %d, wrid = 0x%llx\n",
+ ctx->slave, wc.status, wc.wr_id);
+ if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
+ ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ = NULL;
+ spin_lock(&tun_qp->tx_lock);
+ tun_qp->tx_ix_tail++;
+ spin_unlock(&tun_qp->tx_lock);
+ }
+ }
+ }
+}
+
+static void pv_qp_event_handler(struct ib_event *event, void *qp_context)
+{
+ struct mlx4_ib_demux_pv_ctx *sqp = qp_context;
+
+ /* It's worse than that! He's dead, Jim! */
+ pr_err("Fatal error (%d) on a MAD QP on port %d\n",
+ event->event, sqp->port);
+}
+
+static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
+ enum ib_qp_type qp_type, int create_tun)
+{
+ int i, ret;
+ struct mlx4_ib_demux_pv_qp *tun_qp;
+ struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
+ struct ib_qp_attr attr;
+ int qp_attr_mask_INIT;
+
+ if (qp_type > IB_QPT_GSI)
+ return -EINVAL;
+
+ tun_qp = &ctx->qp[qp_type];
+
+ memset(&qp_init_attr, 0, sizeof qp_init_attr);
+ qp_init_attr.init_attr.send_cq = ctx->cq;
+ qp_init_attr.init_attr.recv_cq = ctx->cq;
+ qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
+ qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
+ qp_init_attr.init_attr.cap.max_send_sge = 1;
+ qp_init_attr.init_attr.cap.max_recv_sge = 1;
+ if (create_tun) {
+ qp_init_attr.init_attr.qp_type = IB_QPT_UD;
+ qp_init_attr.init_attr.create_flags = (enum ib_qp_create_flags)MLX4_IB_SRIOV_TUNNEL_QP;
+ qp_init_attr.port = ctx->port;
+ qp_init_attr.slave = ctx->slave;
+ qp_init_attr.proxy_qp_type = qp_type;
+ qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX |
+ IB_QP_QKEY | IB_QP_PORT;
+ } else {
+ qp_init_attr.init_attr.qp_type = qp_type;
+ qp_init_attr.init_attr.create_flags = (enum ib_qp_create_flags)MLX4_IB_SRIOV_SQP;
+ qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY;
+ }
+ qp_init_attr.init_attr.port_num = ctx->port;
+ qp_init_attr.init_attr.qp_context = ctx;
+ qp_init_attr.init_attr.event_handler = pv_qp_event_handler;
+ tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr);
+ if (IS_ERR(tun_qp->qp)) {
+ ret = PTR_ERR(tun_qp->qp);
+ tun_qp->qp = NULL;
+ pr_err("Couldn't create %s QP (%d)\n",
+ create_tun ? "tunnel" : "special", ret);
+ return ret;
+ }
+
+ memset(&attr, 0, sizeof attr);
+ attr.qp_state = IB_QPS_INIT;
+ attr.pkey_index =
+ to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
+ attr.qkey = IB_QP1_QKEY;
+ attr.port_num = ctx->port;
+ ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT);
+ if (ret) {
+ pr_err("Couldn't change %s qp state to INIT (%d)\n",
+ create_tun ? "tunnel" : "special", ret);
+ goto err_qp;
+ }
+ attr.qp_state = IB_QPS_RTR;
+ ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE);
+ if (ret) {
+ pr_err("Couldn't change %s qp state to RTR (%d)\n",
+ create_tun ? "tunnel" : "special", ret);
+ goto err_qp;
+ }
+ attr.qp_state = IB_QPS_RTS;
+ attr.sq_psn = 0;
+ ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
+ if (ret) {
+ pr_err("Couldn't change %s qp state to RTS (%d)\n",
+ create_tun ? "tunnel" : "special", ret);
+ goto err_qp;
+ }
+
+ for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
+ if (ret) {
+ pr_err(" mlx4_ib_post_pv_buf error"
+ " (err = %d, i = %d)\n", ret, i);
+ goto err_qp;
+ }
+ }
+ return 0;
+
+err_qp:
+ ib_destroy_qp(tun_qp->qp);
+ tun_qp->qp = NULL;
+ return ret;
+}
+
+/*
+ * IB MAD completion callback for real SQPs
+ */
+static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
+{
+ struct mlx4_ib_demux_pv_ctx *ctx;
+ struct mlx4_ib_demux_pv_qp *sqp;
+ struct ib_wc wc;
+ struct ib_grh *grh;
+ struct ib_mad *mad;
+
+ ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
+ ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
+
+ while (mlx4_ib_poll_cq(ctx->cq, 1, &wc) == 1) {
+ sqp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
+ if (wc.status == IB_WC_SUCCESS) {
+ switch (wc.opcode) {
+ case IB_WC_SEND:
+ ib_destroy_ah(sqp->tx_ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ = NULL;
+ spin_lock(&sqp->tx_lock);
+ sqp->tx_ix_tail++;
+ spin_unlock(&sqp->tx_lock);
+ break;
+ case IB_WC_RECV:
+ mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
+ (sqp->ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
+ grh = &(((struct mlx4_mad_rcv_buf *)
+ (sqp->ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
+ mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
+ if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)))
+ pr_err("Failed reposting SQP "
+ "buf:%lld\n", wc.wr_id);
+ break;
+ default:
+ BUG_ON(1);
+ break;
+ }
+ } else {
+ pr_debug("mlx4_ib: completion error in tunnel: %d."
+ " status = %d, wrid = 0x%llx\n",
+ ctx->slave, wc.status, wc.wr_id);
+ if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
+ ib_destroy_ah(sqp->tx_ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ = NULL;
+ spin_lock(&sqp->tx_lock);
+ sqp->tx_ix_tail++;
+ spin_unlock(&sqp->tx_lock);
+ }
+ }
+ }
+}
+
+static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port,
+ struct mlx4_ib_demux_pv_ctx **ret_ctx)
+{
+ struct mlx4_ib_demux_pv_ctx *ctx;
+
+ *ret_ctx = NULL;
+ ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL);
+ if (!ctx) {
+ pr_err("failed allocating pv resource context "
+ "for port %d, slave %d\n", port, slave);
+ return -ENOMEM;
+ }
+
+ ctx->ib_dev = &dev->ib_dev;
+ ctx->port = port;
+ ctx->slave = slave;
+ *ret_ctx = ctx;
+ return 0;
+}
+
+static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port)
+{
+ if (dev->sriov.demux[port - 1].tun[slave]) {
+ kfree(dev->sriov.demux[port - 1].tun[slave]);
+ dev->sriov.demux[port - 1].tun[slave] = NULL;
+ }
+}
+
+static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
+ int create_tun, struct mlx4_ib_demux_pv_ctx *ctx)
+{
+ int ret, cq_size;
+
+ if (ctx->state != DEMUX_PV_STATE_DOWN)
+ return -EEXIST;
+
+ ctx->state = DEMUX_PV_STATE_STARTING;
+ /* have QP0 only on port owner, and only if link layer is IB */
+ if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) &&
+ rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND)
+ ctx->has_smi = 1;
+
+ if (ctx->has_smi) {
+ ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun);
+ if (ret) {
+ pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret);
+ goto err_out;
+ }
+ }
+
+ ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun);
+ if (ret) {
+ pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret);
+ goto err_out_qp0;
+ }
+
+ cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
+ if (ctx->has_smi)
+ cq_size *= 2;
+
+ ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler,
+ NULL, ctx, cq_size, 0);
+ if (IS_ERR(ctx->cq)) {
+ ret = PTR_ERR(ctx->cq);
+ pr_err("Couldn't create tunnel CQ (%d)\n", ret);
+ goto err_buf;
+ }
+
+ ctx->pd = ib_alloc_pd(ctx->ib_dev);
+ if (IS_ERR(ctx->pd)) {
+ ret = PTR_ERR(ctx->pd);
+ pr_err("Couldn't create tunnel PD (%d)\n", ret);
+ goto err_cq;
+ }
+
+ ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(ctx->mr)) {
+ ret = PTR_ERR(ctx->mr);
+ pr_err("Couldn't get tunnel DMA MR (%d)\n", ret);
+ goto err_pd;
+ }
+
+ if (ctx->has_smi) {
+ ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun);
+ if (ret) {
+ pr_err("Couldn't create %s QP0 (%d)\n",
+ create_tun ? "tunnel for" : "", ret);
+ goto err_mr;
+ }
+ }
+
+ ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun);
+ if (ret) {
+ pr_err("Couldn't create %s QP1 (%d)\n",
+ create_tun ? "tunnel for" : "", ret);
+ goto err_qp0;
+ }
+
+ if (create_tun)
+ INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker);
+ else
+ INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
+
+ ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
+
+ ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
+ if (ret) {
+ pr_err("Couldn't arm tunnel cq (%d)\n", ret);
+ goto err_wq;
+ }
+ ctx->state = DEMUX_PV_STATE_ACTIVE;
+ return 0;
+
+err_wq:
+ ctx->wq = NULL;
+ ib_destroy_qp(ctx->qp[1].qp);
+ ctx->qp[1].qp = NULL;
+
+
+err_qp0:
+ if (ctx->has_smi)
+ ib_destroy_qp(ctx->qp[0].qp);
+ ctx->qp[0].qp = NULL;
+
+err_mr:
+ ib_dereg_mr(ctx->mr);
+ ctx->mr = NULL;
+
+err_pd:
+ ib_dealloc_pd(ctx->pd);
+ ctx->pd = NULL;
+
+err_cq:
+ ib_destroy_cq(ctx->cq);
+ ctx->cq = NULL;
+
+err_buf:
+ mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun);
+
+err_out_qp0:
+ if (ctx->has_smi)
+ mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun);
+err_out:
+ ctx->state = DEMUX_PV_STATE_DOWN;
+ return ret;
+}
+
+static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port,
+ struct mlx4_ib_demux_pv_ctx *ctx, int flush)
+{
+ if (!ctx)
+ return;
+ if (ctx->state > DEMUX_PV_STATE_DOWN) {
+ ctx->state = DEMUX_PV_STATE_DOWNING;
+ if (flush)
+ flush_workqueue(ctx->wq);
+ if (ctx->has_smi) {
+ ib_destroy_qp(ctx->qp[0].qp);
+ ctx->qp[0].qp = NULL;
+ mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1);
+ }
+ ib_destroy_qp(ctx->qp[1].qp);
+ ctx->qp[1].qp = NULL;
+ mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1);
+ ib_dereg_mr(ctx->mr);
+ ctx->mr = NULL;
+ ib_dealloc_pd(ctx->pd);
+ ctx->pd = NULL;
+ ib_destroy_cq(ctx->cq);
+ ctx->cq = NULL;
+ ctx->state = DEMUX_PV_STATE_DOWN;
+ }
+}
+
+static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave,
+ int port, int do_init)
+{
+ int ret = 0;
+
+ if (!do_init) {
+ clean_vf_mcast(&dev->sriov.demux[port - 1], slave);
+ /* for master, destroy real sqp resources */
+ if (slave == mlx4_master_func_num(dev->dev))
+ destroy_pv_resources(dev, slave, port,
+ dev->sriov.sqps[port - 1], 1);
+ /* destroy the tunnel qp resources */
+ destroy_pv_resources(dev, slave, port,
+ dev->sriov.demux[port - 1].tun[slave], 1);
+ return 0;
+ }
+
+ /* create the tunnel qp resources */
+ ret = create_pv_resources(&dev->ib_dev, slave, port, 1,
+ dev->sriov.demux[port - 1].tun[slave]);
+
+ /* for master, create the real sqp resources */
+ if (!ret && slave == mlx4_master_func_num(dev->dev))
+ ret = create_pv_resources(&dev->ib_dev, slave, port, 0,
+ dev->sriov.sqps[port - 1]);
+ return ret;
+}
+
+void mlx4_ib_tunnels_update_work(struct work_struct *work)
+{
+ struct mlx4_ib_demux_work *dmxw;
+
+ dmxw = container_of(work, struct mlx4_ib_demux_work, work);
+ mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port,
+ dmxw->do_init);
+ kfree(dmxw);
+ return;
+}
+
+static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
+ struct mlx4_ib_demux_ctx *ctx,
+ int port)
+{
+ char name[12];
+ int ret = 0;
+ int i;
+
+ ctx->tun = kcalloc(dev->dev->caps.sqp_demux,
+ sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL);
+ if (!ctx->tun)
+ return -ENOMEM;
+
+ ctx->dev = dev;
+ ctx->port = port;
+ ctx->ib_dev = &dev->ib_dev;
+
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
+ if (ret) {
+ ret = -ENOMEM;
+ goto err_mcg;
+ }
+ }
+
+ ret = mlx4_ib_mcg_port_init(ctx);
+ if (ret) {
+ pr_err("Failed initializing mcg para-virt (%d)\n", ret);
+ goto err_mcg;
+ }
+
+ snprintf(name, sizeof name, "mlx4_ibt%d", port);
+ ctx->wq = create_singlethread_workqueue(name);
+ if (!ctx->wq) {
+ pr_err("Failed to create tunnelling WQ for port %d\n", port);
+ ret = -ENOMEM;
+ goto err_wq;
+ }
+
+ snprintf(name, sizeof name, "mlx4_ibud%d", port);
+ ctx->ud_wq = create_singlethread_workqueue(name);
+ if (!ctx->ud_wq) {
+ pr_err("Failed to create up/down WQ for port %d\n", port);
+ ret = -ENOMEM;
+ goto err_udwq;
+ }
+
+ return 0;
+
+err_udwq:
+ destroy_workqueue(ctx->wq);
+ ctx->wq = NULL;
+
+err_wq:
+ mlx4_ib_mcg_port_cleanup(ctx, 1);
+err_mcg:
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++)
+ free_pv_object(dev, i, port);
+ kfree(ctx->tun);
+ ctx->tun = NULL;
+ return ret;
+}
+
+static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx)
+{
+ if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) {
+ sqp_ctx->state = DEMUX_PV_STATE_DOWNING;
+ flush_workqueue(sqp_ctx->wq);
+ if (sqp_ctx->has_smi) {
+ ib_destroy_qp(sqp_ctx->qp[0].qp);
+ sqp_ctx->qp[0].qp = NULL;
+ mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0);
+ }
+ ib_destroy_qp(sqp_ctx->qp[1].qp);
+ sqp_ctx->qp[1].qp = NULL;
+ mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0);
+ ib_dereg_mr(sqp_ctx->mr);
+ sqp_ctx->mr = NULL;
+ ib_dealloc_pd(sqp_ctx->pd);
+ sqp_ctx->pd = NULL;
+ ib_destroy_cq(sqp_ctx->cq);
+ sqp_ctx->cq = NULL;
+ sqp_ctx->state = DEMUX_PV_STATE_DOWN;
+ }
+}
+
+static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
+{
+ int i;
+ if (ctx) {
+ struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
+ mlx4_ib_mcg_port_cleanup(ctx, 1);
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ if (!ctx->tun[i])
+ continue;
+ if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN)
+ ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
+ }
+ flush_workqueue(ctx->wq);
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
+ free_pv_object(dev, i, ctx->port);
+ }
+ kfree(ctx->tun);
+ destroy_workqueue(ctx->ud_wq);
+ destroy_workqueue(ctx->wq);
+ }
+}
+
+static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init)
+{
+ int i;
+
+ if (!mlx4_is_master(dev->dev))
+ return;
+ /* initialize or tear down tunnel QPs for the master */
+ for (i = 0; i < dev->dev->caps.num_ports; i++)
+ mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init);
+ return;
+}
+
+int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
+{
+ int i = 0;
+ int err;
+
+ if (!mlx4_is_mfunc(dev->dev))
+ return 0;
+
+ dev->sriov.is_going_down = 0;
+ spin_lock_init(&dev->sriov.going_down_lock);
+ mlx4_ib_cm_paravirt_init(dev);
+
+ mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n");
+
+ if (mlx4_is_slave(dev->dev)) {
+ mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n");
+ return 0;
+ }
+
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ if (i == mlx4_master_func_num(dev->dev))
+ mlx4_put_slave_node_guid(dev->dev, i, dev->ib_dev.node_guid);
+ else
+ mlx4_put_slave_node_guid(dev->dev, i, mlx4_ib_gen_node_guid());
+ }
+
+ err = mlx4_ib_init_alias_guid_service(dev);
+ if (err) {
+ mlx4_ib_warn(&dev->ib_dev, "Failed init alias guid process.\n");
+ goto paravirt_err;
+ }
+ err = mlx4_ib_device_register_sysfs(dev);
+ if (err) {
+ mlx4_ib_warn(&dev->ib_dev, "Failed to register sysfs\n");
+ goto sysfs_err;
+ }
+
+ mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n",
+ dev->dev->caps.sqp_demux);
+ for (i = 0; i < dev->num_ports; i++) {
+ union ib_gid gid;
+ err = __mlx4_ib_query_gid(&dev->ib_dev, i + 1, 0, &gid, 1);
+ if (err)
+ goto demux_err;
+ dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
+ err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
+ &dev->sriov.sqps[i]);
+ if (err)
+ goto demux_err;
+ err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1);
+ if (err)
+ goto demux_err;
+ }
+ mlx4_ib_master_tunnels(dev, 1);
+ return 0;
+
+demux_err:
+ while (i > 0) {
+ free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
+ mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
+ --i;
+ }
+ mlx4_ib_device_unregister_sysfs(dev);
+
+sysfs_err:
+ mlx4_ib_destroy_alias_guid_service(dev);
+
+paravirt_err:
+ mlx4_ib_cm_paravirt_clean(dev, -1);
+
+ return err;
+}
+
+void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev)
+{
+ int i;
+ unsigned long flags;
+
+ if (!mlx4_is_mfunc(dev->dev))
+ return;
+
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ dev->sriov.is_going_down = 1;
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+ if (mlx4_is_master(dev->dev)) {
+ for (i = 0; i < dev->num_ports; i++) {
+ flush_workqueue(dev->sriov.demux[i].ud_wq);
+ mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]);
+ kfree(dev->sriov.sqps[i]);
+ dev->sriov.sqps[i] = NULL;
+ mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
+ }
+
+ mlx4_ib_cm_paravirt_clean(dev, -1);
+ mlx4_ib_destroy_alias_guid_service(dev);
+ mlx4_ib_device_unregister_sysfs(dev);
+ }
+}
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/main.c b/sys/ofed/drivers/infiniband/hw/mlx4/main.c
index bc99414..328bb5a 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/main.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/main.c
@@ -32,12 +32,20 @@
*/
#include <linux/module.h>
+
+#ifdef __linux__
+#include <linux/proc_fs.h>
+#endif
+
#include <linux/init.h>
+#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/rtnetlink.h>
#include <linux/if_vlan.h>
+#include <linux/bitops.h>
+#include <linux/if_ether.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
@@ -45,45 +53,63 @@
#include <linux/mlx4/driver.h>
#include <linux/mlx4/cmd.h>
-
+#include <linux/sched.h>
#include "mlx4_ib.h"
#include "user.h"
#include "wc.h"
#define DRV_NAME MLX4_IB_DRV_NAME
-#define DRV_VERSION "1.0-ofed1.5.2"
-#define DRV_RELDATE "August 4, 2010"
+#define DRV_VERSION "1.0"
+#define DRV_RELDATE "April 4, 2008"
+
+#define MLX4_IB_DRIVER_PROC_DIR_NAME "driver/mlx4_ib"
+#define MLX4_IB_MRS_PROC_DIR_NAME "mrs"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
-#ifdef CONFIG_MLX4_DEBUG
+int mlx4_ib_sm_guid_assign = 1;
-int mlx4_ib_debug_level = 0;
-module_param_named(debug_level, mlx4_ib_debug_level, int, 0644);
-MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
+#ifdef __linux__
+struct proc_dir_entry *mlx4_mrs_dir_entry;
+static struct proc_dir_entry *mlx4_ib_driver_dir_entry;
+#endif
+
+module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
+MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
-#endif /* CONFIG_MLX4_DEBUG */
+static char dev_assign_str[512];
+//module_param_string(dev_assign_str, dev_assign_str, sizeof(dev_assign_str), 0644);
+MODULE_PARM_DESC(dev_assign_str, "Map all device function numbers to "
+ "IB device numbers following the pattern: "
+ "bb:dd.f-0,bb:dd.f-1,... (all numbers are hexadecimals)."
+ " Max supported devices - 32");
static const char mlx4_ib_version[] =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
-static void *get_ibdev(struct mlx4_dev *dev, void *ctx, u8 port)
-{
- struct mlx4_ib_dev *mlxibdev = ctx;
- return &mlxibdev->ib_dev;
-}
-
struct update_gid_work {
- struct work_struct work;
- union ib_gid gids[128];
- int port;
- struct mlx4_ib_dev *dev;
+ struct work_struct work;
+ union ib_gid gids[128];
+ struct mlx4_ib_dev *dev;
+ int port;
+};
+
+struct dev_rec {
+ int bus;
+ int dev;
+ int func;
+ int nr;
};
+#define MAX_DR 32
+static struct dev_rec dr[MAX_DR];
+
+static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
+
static struct workqueue_struct *wq;
static void init_query_mad(struct ib_smp *mad)
@@ -112,7 +138,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
+ 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@@ -123,7 +150,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_RC_RNR_NAK_GEN |
- IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ IB_DEVICE_BLOCK_MULTICAST_LOOPBACK |
+ IB_DEVICE_SHARED_MR;
+
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@@ -144,42 +173,45 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
props->device_cap_flags |= IB_DEVICE_XRC;
- if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_RAW_ETY)
- props->max_raw_ethy_qp = dev->ib_dev.phys_port_cnt;
+ props->device_cap_flags |= IB_DEVICE_QPG;
+ if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) {
+ props->device_cap_flags |= IB_DEVICE_UD_RSS;
+ props->max_rss_tbl_sz = dev->dev->caps.max_rss_tbl_sz;
+ }
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
- props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30));
+ props->vendor_part_id = dev->dev->pdev->device;
props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
props->max_mr_size = ~0ull;
props->page_size_cap = dev->dev->caps.page_size_cap;
- props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
+ props->max_qp = dev->dev->quotas.qp;
props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
props->max_sge = min(dev->dev->caps.max_sq_sg,
dev->dev->caps.max_rq_sg);
- props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
+ props->max_cq = dev->dev->quotas.cq;
props->max_cqe = dev->dev->caps.max_cqes;
- props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws;
+ props->max_mr = dev->dev->quotas.mpt;
props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma;
props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
- props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
+ props->max_srq = dev->dev->quotas.srq;
props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1;
props->max_srq_sge = dev->dev->caps.max_srq_sge;
- props->max_fast_reg_page_list_len = MAX_FAST_REG_PAGES;
+ props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
- props->masked_atomic_cap = IB_ATOMIC_HCA;
+ props->masked_atomic_cap = props->atomic_cap;
props->max_pkeys = dev->dev->caps.pkey_table_len[1];
props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
- props->max_map_per_fmr = (1 << (32 - ilog2(dev->dev->caps.num_mpts))) - 1;
+ props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
out:
kfree(in_mad);
@@ -197,10 +229,33 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
}
-static void ib_link_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props,
- struct ib_smp *out_mad)
+static int ib_link_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props, int netw_view)
{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int ext_active_speed;
+ int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
+ mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
+ in_mad, out_mad);
+ if (err)
+ goto out;
+
+
props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
props->lmc = out_mad->data[34] & 0x7;
props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
@@ -208,7 +263,10 @@ static void ib_link_query_port(struct ib_device *ibdev, u8 port,
props->state = out_mad->data[32] & 0xf;
props->phys_state = out_mad->data[33] >> 4;
props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
- props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
+ if (netw_view)
+ props->gid_tbl_len = out_mad->data[50];
+ else
+ props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
@@ -220,39 +278,46 @@ static void ib_link_query_port(struct ib_device *ibdev, u8 port,
props->subnet_timeout = out_mad->data[51] & 0x1f;
props->max_vl_num = out_mad->data[37] >> 4;
props->init_type_reply = out_mad->data[41] >> 4;
- props->link_layer = IB_LINK_LAYER_INFINIBAND;
-}
-#ifdef notyet
-static int eth_to_ib_width(int w)
-{
- switch (w) {
- case 4:
- return IB_WIDTH_4X;
- case 8:
- case 16:
- return IB_WIDTH_8X;
- case 32:
- return IB_WIDTH_12X;
- default:
- return IB_WIDTH_1X;
+ /* Check if extended speeds (EDR/FDR/...) are supported */
+ if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
+ ext_active_speed = out_mad->data[62] >> 4;
+
+ switch (ext_active_speed) {
+ case 1:
+ props->active_speed = IB_SPEED_FDR;
+ break;
+ case 2:
+ props->active_speed = IB_SPEED_EDR;
+ break;
+ }
}
-}
-static int eth_to_ib_speed(int s)
-{
- switch (s) {
- case 256:
- return 1;
- case 512:
- return 2;
- case 1024:
- return 4;
- default:
- return 1;
+ /* If reported active speed is QDR, check if is FDR-10 */
+ if (props->active_speed == IB_SPEED_QDR) {
+ init_query_mad(in_mad);
+ in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
+ NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ /* Checking LinkSpeedActive for FDR-10 */
+ if (out_mad->data[15] & 0x1)
+ props->active_speed = IB_SPEED_FDR10;
}
+
+ /* Avoid wrong speed value returned by FW if the IB link is down. */
+ if (props->state == IB_PORT_DOWN)
+ props->active_speed = IB_SPEED_SDR;
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
}
-#endif
static u8 state_to_phys_state(enum ib_port_state state)
{
@@ -260,88 +325,90 @@ static u8 state_to_phys_state(enum ib_port_state state)
}
static int eth_link_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props,
- struct ib_smp *out_mad)
+ struct ib_port_attr *props, int netw_view)
{
- struct mlx4_ib_iboe *iboe = &to_mdev(ibdev)->iboe;
+
+ struct mlx4_ib_dev *mdev = to_mdev(ibdev);
+ struct mlx4_ib_iboe *iboe = &mdev->iboe;
struct net_device *ndev;
enum ib_mtu tmp;
+ struct mlx4_cmd_mailbox *mailbox;
+ int err = 0;
- props->active_width = IB_WIDTH_4X;
- props->active_speed = 1;
+ mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
+ MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ goto out;
+
+ props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ?
+ IB_WIDTH_4X : IB_WIDTH_1X;
+ props->active_speed = IB_SPEED_QDR;
props->port_cap_flags = IB_PORT_CM_SUP;
- props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
- props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
+ if (netw_view)
+ props->gid_tbl_len = MLX4_ROCE_MAX_GIDS;
+ else
+ props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
+
+ props->max_msg_sz = mdev->dev->caps.max_msg_sz;
props->pkey_tbl_len = 1;
- props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
- props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
- props->max_mtu = IB_MTU_2048;
- props->subnet_timeout = 0;
- props->max_vl_num = out_mad->data[37] >> 4;
- props->init_type_reply = 0;
- props->link_layer = IB_LINK_LAYER_ETHERNET;
+ props->max_mtu = IB_MTU_4096;
+ props->max_vl_num = 2;
props->state = IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
props->active_mtu = IB_MTU_256;
spin_lock(&iboe->lock);
ndev = iboe->netdevs[port - 1];
if (!ndev)
- goto out;
+ goto out_unlock;
-#ifdef __linux__
- tmp = iboe_get_mtu(ndev->mtu);
-#else
tmp = iboe_get_mtu(ndev->if_mtu);
-#endif
props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
- props->state = netif_carrier_ok(ndev) && netif_oper_up(ndev) ?
+
+ props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
IB_PORT_ACTIVE : IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
-
-out:
+out_unlock:
spin_unlock(&iboe->lock);
- return 0;
+out:
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ return err;
}
-static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props)
+int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props, int netw_view)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
- int err = -ENOMEM;
-
- in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
- out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
- if (!in_mad || !out_mad)
- goto out;
+ int err;
memset(props, 0, sizeof *props);
- init_query_mad(in_mad);
- in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
- in_mad->attr_mod = cpu_to_be32(port);
-
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
- if (err)
- goto out;
-
- mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
- ib_link_query_port(ibdev, port, props, out_mad) :
- eth_link_query_port(ibdev, port, props, out_mad);
-
-out:
- kfree(in_mad);
- kfree(out_mad);
+ err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
+ ib_link_query_port(ibdev, port, props, netw_view) :
+ eth_link_query_port(ibdev, port, props, netw_view);
return err;
}
-static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
- union ib_gid *gid)
+static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ /* returns host view */
+ return __mlx4_ib_query_port(ibdev, port, props, 0);
+}
+
+int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ int clear = 0;
+ int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -352,30 +419,45 @@ static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (mlx4_is_mfunc(dev->dev) && netw_view)
+ mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
+
+ err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(gid->raw, out_mad->data + 8, 8);
+ if (mlx4_is_mfunc(dev->dev) && !netw_view) {
+ if (index) {
+ /* For any index > 0, return the null guid */
+ err = 0;
+ clear = 1;
+ goto out;
+ }
+ }
+
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
+ NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
out:
+ if (clear)
+ memset(gid->raw + 8, 0, 8);
kfree(in_mad);
kfree(out_mad);
return err;
}
static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index,
- union ib_gid *gid)
+ union ib_gid *gid)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
@@ -388,16 +470,17 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
- return __mlx4_ib_query_gid(ibdev, port, index, gid);
+ return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
else
return iboe_query_gid(ibdev, port, index, gid);
}
-static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
- u16 *pkey)
+int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
+ int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -409,7 +492,11 @@ static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
+ mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
+ in_mad, out_mad);
if (err)
goto out;
@@ -421,11 +508,16 @@ out:
return err;
}
+static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+ return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
+}
+
static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
struct ib_device_modify *props)
{
struct mlx4_cmd_mailbox *mailbox;
- int err;
+ unsigned long flags;
if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
return -EOPNOTSUPP;
@@ -433,12 +525,16 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
return 0;
- spin_lock(&to_mdev(ibdev)->sm_lock);
+ if (mlx4_is_slave(to_mdev(ibdev)->dev))
+ return -EOPNOTSUPP;
+
+ spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
memcpy(ibdev->node_desc, props->node_desc, 64);
- spin_unlock(&to_mdev(ibdev)->sm_lock);
+ spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
- /* if possible, pass node desc to FW, so it can generate
- * a 144 trap. If cmd fails, just ignore.
+ /*
+ * If possible, pass node desc to FW, so it can generate
+ * a 144 trap. If cmd fails, just ignore.
*/
mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
if (IS_ERR(mailbox))
@@ -446,10 +542,8 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
memset(mailbox->buf, 0, 256);
memcpy(mailbox->buf, props->node_desc, 64);
- err = mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
- MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A);
- if (err)
- mlx4_ib_dbg("SET_NODE command failed (%d)", err);
+ mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
+ MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
@@ -478,7 +572,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
}
err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev->dev, mailbox);
return err;
@@ -514,23 +608,36 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_ucontext *context;
+ struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
struct mlx4_ib_alloc_ucontext_resp resp;
int err;
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
- resp.qp_tab_size = dev->dev->caps.num_qps;
-
- if (mlx4_wc_enabled()) {
- resp.bf_reg_size = dev->dev->caps.bf_reg_size;
- resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+ if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
+ resp_v3.qp_tab_size = dev->dev->caps.num_qps;
+ if (mlx4_wc_enabled()) {
+ resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size;
+ resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+ } else {
+ resp_v3.bf_reg_size = 0;
+ resp_v3.bf_regs_per_page = 0;
+ }
} else {
- resp.bf_reg_size = 0;
- resp.bf_regs_per_page = 0;
+ resp.dev_caps = dev->dev->caps.userspace_caps;
+ resp.qp_tab_size = dev->dev->caps.num_qps;
+ if (mlx4_wc_enabled()) {
+ resp.bf_reg_size = dev->dev->caps.bf_reg_size;
+ resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+ } else {
+ resp.bf_reg_size = 0;
+ resp.bf_regs_per_page = 0;
+ }
+ resp.cqe_size = dev->dev->caps.cqe_size;
}
- context = kzalloc(sizeof *context, GFP_KERNEL);
+ context = kmalloc(sizeof *context, GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
@@ -543,7 +650,11 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
- err = ib_copy_to_udata(udata, &resp, sizeof resp);
+ if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
+ err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
+ else
+ err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+
if (err) {
mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
kfree(context);
@@ -562,22 +673,82 @@ static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
return 0;
}
+#ifdef __linux__
+static unsigned long mlx4_ib_get_unmapped_area(struct file *file,
+ unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned long start_addr;
+ unsigned long page_size_order;
+ unsigned long command;
+
+ mm = current->mm;
+ if (addr)
+ return current->mm->get_unmapped_area(file, addr, len,
+ pgoff, flags);
+
+ /* Last 8 bits hold the command others are data per that command */
+ command = pgoff & MLX4_IB_MMAP_CMD_MASK;
+ if (command != MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES)
+ return current->mm->get_unmapped_area(file, addr, len,
+ pgoff, flags);
+
+ page_size_order = pgoff >> MLX4_IB_MMAP_CMD_BITS;
+ /* code is based on the huge-pages get_unmapped_area code */
+ start_addr = mm->free_area_cache;
+
+ if (len <= mm->cached_hole_size)
+ start_addr = TASK_UNMAPPED_BASE;
+
+
+full_search:
+ addr = ALIGN(start_addr, 1 << page_size_order);
+
+ for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+ /* At this point: (!vma || addr < vma->vm_end). */
+ if (TASK_SIZE - len < addr) {
+ /*
+ * Start a new search - just in case we missed
+ * some holes.
+ */
+ if (start_addr != TASK_UNMAPPED_BASE) {
+ start_addr = TASK_UNMAPPED_BASE;
+ goto full_search;
+ }
+ return -ENOMEM;
+ }
+
+ if (!vma || addr + len <= vma->vm_start)
+ return addr;
+ addr = ALIGN(vma->vm_end, 1 << page_size_order);
+ }
+}
static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct mlx4_ib_dev *dev = to_mdev(context->device);
+ int err;
- if (vma->vm_end - vma->vm_start != PAGE_SIZE)
- return -EINVAL;
+ /* Last 8 bits hold the command others are data per that command */
+ unsigned long command = vma->vm_pgoff & MLX4_IB_MMAP_CMD_MASK;
- if (vma->vm_pgoff == 0) {
+ if (command < MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES) {
+ /* compatability handling for commands 0 & 1*/
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+ }
+ if (command == MLX4_IB_MMAP_UAR_PAGE) {
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
to_mucontext(context)->uar.pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
- } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
+ } else if (command == MLX4_IB_MMAP_BLUE_FLAME_PAGE &&
+ dev->dev->caps.bf_reg_size != 0) {
vma->vm_page_prot = pgprot_wc(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
@@ -585,11 +756,31 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
dev->dev->caps.num_uars,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
+ } else if (command == MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES) {
+ /* Getting contiguous physical pages */
+ unsigned long total_size = vma->vm_end - vma->vm_start;
+ unsigned long page_size_order = (vma->vm_pgoff) >>
+ MLX4_IB_MMAP_CMD_BITS;
+ struct ib_cmem *ib_cmem;
+ ib_cmem = ib_cmem_alloc_contiguous_pages(context, total_size,
+ page_size_order);
+ if (IS_ERR(ib_cmem)) {
+ err = PTR_ERR(ib_cmem);
+ return err;
+ }
+
+ err = ib_cmem_map_contiguous_pages_to_vma(ib_cmem, vma);
+ if (err) {
+ ib_cmem_release_contiguous_pages(ib_cmem);
+ return err;
+ }
+ return 0;
} else
return -EINVAL;
return 0;
}
+#endif
static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
@@ -598,7 +789,7 @@ static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
struct mlx4_ib_pd *pd;
int err;
- pd = kzalloc(sizeof *pd, GFP_KERNEL);
+ pd = kmalloc(sizeof *pd, GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
@@ -626,11 +817,62 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
return 0;
}
+static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_xrcd *xrcd;
+ int err;
+
+ if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+ return ERR_PTR(-ENOSYS);
+
+ xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
+ if (!xrcd)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
+ if (err)
+ goto err1;
+
+ xrcd->pd = ib_alloc_pd(ibdev);
+ if (IS_ERR(xrcd->pd)) {
+ err = PTR_ERR(xrcd->pd);
+ goto err2;
+ }
+
+ xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0);
+ if (IS_ERR(xrcd->cq)) {
+ err = PTR_ERR(xrcd->cq);
+ goto err3;
+ }
+
+ return &xrcd->ibxrcd;
+
+err3:
+ ib_dealloc_pd(xrcd->pd);
+err2:
+ mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
+err1:
+ kfree(xrcd);
+ return ERR_PTR(err);
+}
+
+static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+{
+ ib_destroy_cq(to_mxrcd(xrcd)->cq);
+ ib_dealloc_pd(to_mxrcd(xrcd)->pd);
+ mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
+ kfree(xrcd);
+
+ return 0;
+}
+
static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
{
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
- struct gid_entry *ge;
+ struct mlx4_ib_gid_entry *ge;
ge = kzalloc(sizeof *ge, GFP_KERNEL);
if (!ge)
@@ -658,11 +900,13 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
if (!mqp->port)
return 0;
+
spin_lock(&mdev->iboe.lock);
ndev = mdev->iboe.netdevs[mqp->port - 1];
if (ndev)
dev_hold(ndev);
spin_unlock(&mdev->iboe.lock);
+
if (ndev) {
rdma_get_mcast_mac((struct in6_addr *)gid, mac);
rtnl_lock();
@@ -675,37 +919,269 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
return ret;
}
+struct mlx4_ib_steering {
+ struct list_head list;
+ u64 reg_id;
+ union ib_gid gid;
+};
+
static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+ u64 reg_id;
+ struct mlx4_ib_steering *ib_steering = NULL;
+
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
+ if (!ib_steering)
+ return -ENOMEM;
+ }
- err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, !!(mqp->flags &
- MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
- (ibqp->qp_type == IB_QPT_RAW_ETH) ?
- MLX4_MCAST_PROT_EN : MLX4_MCAST_PROT_IB);
+ err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
+ !!(mqp->flags &
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
+ MLX4_PROT_IB_IPV6, &reg_id);
if (err)
- return err;
+ goto err_malloc;
err = add_gid_entry(ibqp, gid);
if (err)
goto err_add;
+ if (ib_steering) {
+ memcpy(ib_steering->gid.raw, gid->raw, 16);
+ ib_steering->reg_id = reg_id;
+ mutex_lock(&mqp->mutex);
+ list_add(&ib_steering->list, &mqp->steering_rules);
+ mutex_unlock(&mqp->mutex);
+ }
return 0;
err_add:
mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- (ibqp->qp_type == IB_QPT_RAW_ETH) ?
- MLX4_MCAST_PROT_EN : MLX4_MCAST_PROT_IB);
+ MLX4_PROT_IB_IPV6, reg_id);
+err_malloc:
+ kfree(ib_steering);
+
+ return err;
+}
+
+enum {
+ IBV_FLOW_L4_NONE = 0,
+ IBV_FLOW_L4_OTHER = 3,
+ IBV_FLOW_L4_UDP = 5,
+ IBV_FLOW_L4_TCP = 6
+};
+
+struct mlx4_cm_steering {
+ struct list_head list;
+ u64 reg_id;
+ struct ib_flow_spec spec;
+};
+
+static int flow_spec_to_net_rule(struct ib_device *dev, struct ib_flow_spec *flow_spec,
+ struct list_head *rule_list_h)
+{
+ struct mlx4_spec_list *spec_l2, *spec_l3, *spec_l4;
+ u64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+ spec_l2 = kzalloc(sizeof *spec_l2, GFP_KERNEL);
+ if (!spec_l2)
+ return -ENOMEM;
+
+ switch (flow_spec->type) {
+ case IB_FLOW_ETH:
+ spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(spec_l2->eth.dst_mac, flow_spec->l2_id.eth.mac, ETH_ALEN);
+ memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN);
+ spec_l2->eth.ether_type = flow_spec->l2_id.eth.ethertype;
+ if (flow_spec->l2_id.eth.vlan_present) {
+ spec_l2->eth.vlan_id = flow_spec->l2_id.eth.vlan;
+ spec_l2->eth.vlan_id_msk = cpu_to_be16(0x0fff);
+ }
+ break;
+ case IB_FLOW_IB_UC:
+ spec_l2->id = MLX4_NET_TRANS_RULE_ID_IB;
+ if(flow_spec->l2_id.ib_uc.qpn) {
+ spec_l2->ib.r_u_qpn = cpu_to_be32(flow_spec->l2_id.ib_uc.qpn);
+ spec_l2->ib.qpn_msk = cpu_to_be32(0xffffff);
+ }
+ break;
+ case IB_FLOW_IB_MC_IPV4:
+ case IB_FLOW_IB_MC_IPV6:
+ spec_l2->id = MLX4_NET_TRANS_RULE_ID_IB;
+ memcpy(spec_l2->ib.dst_gid, flow_spec->l2_id.ib_mc.mgid, 16);
+ memset(spec_l2->ib.dst_gid_msk, 0xff, 16);
+ break;
+ }
+
+
+ list_add_tail(&spec_l2->list, rule_list_h);
+
+ if (flow_spec->l2_id.eth.ethertype == cpu_to_be16(ETH_P_IP) ||
+ flow_spec->type != IB_FLOW_ETH) {
+ spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL);
+ if (!spec_l3)
+ return -ENOMEM;
+
+ spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
+ spec_l3->ipv4.src_ip = flow_spec->src_ip;
+ if (flow_spec->type != IB_FLOW_IB_MC_IPV4 &&
+ flow_spec->type != IB_FLOW_IB_MC_IPV6)
+ spec_l3->ipv4.dst_ip = flow_spec->dst_ip;
+
+ if (spec_l3->ipv4.src_ip)
+ spec_l3->ipv4.src_ip_msk = MLX4_BE_WORD_MASK;
+ if (spec_l3->ipv4.dst_ip)
+ spec_l3->ipv4.dst_ip_msk = MLX4_BE_WORD_MASK;
+
+ list_add_tail(&spec_l3->list, rule_list_h);
+ }
+
+ if (flow_spec->l4_protocol) {
+ spec_l4 = kzalloc(sizeof(*spec_l4), GFP_KERNEL);
+ if (!spec_l4)
+ return -ENOMEM;
+
+ spec_l4->tcp_udp.src_port = flow_spec->src_port;
+ spec_l4->tcp_udp.dst_port = flow_spec->dst_port;
+ if (spec_l4->tcp_udp.src_port)
+ spec_l4->tcp_udp.src_port_msk =
+ MLX4_BE_SHORT_MASK;
+ if (spec_l4->tcp_udp.dst_port)
+ spec_l4->tcp_udp.dst_port_msk =
+ MLX4_BE_SHORT_MASK;
+
+ switch (flow_spec->l4_protocol) {
+ case IBV_FLOW_L4_UDP:
+ spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP;
+ break;
+ case IBV_FLOW_L4_TCP:
+ spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP;
+ break;
+ default:
+ dev_err(dev->dma_device,
+ "Unsupported l4 protocol.\n");
+ kfree(spec_l4);
+ return -EPROTONOSUPPORT;
+ }
+ list_add_tail(&spec_l4->list, rule_list_h);
+ }
+ return 0;
+}
+
+static int __mlx4_ib_flow_attach(struct mlx4_ib_dev *mdev,
+ struct mlx4_ib_qp *mqp,
+ struct ib_flow_spec *flow_spec,
+ int priority, int lock_qp)
+{
+ u64 reg_id = 0;
+ int err = 0;
+ struct mlx4_cm_steering *cm_flow;
+ struct mlx4_spec_list *spec, *tmp_spec;
+
+ struct mlx4_net_trans_rule rule =
+ { .queue_mode = MLX4_NET_TRANS_Q_FIFO,
+ .exclusive = 0,
+ };
+
+ rule.promisc_mode = flow_spec->rule_type;
+ rule.port = mqp->port;
+ rule.qpn = mqp->mqp.qpn;
+ INIT_LIST_HEAD(&rule.list);
+
+ cm_flow = kmalloc(sizeof(*cm_flow), GFP_KERNEL);
+ if (!cm_flow)
+ return -ENOMEM;
+
+ if (rule.promisc_mode == MLX4_FS_REGULAR) {
+ rule.allow_loopback = !flow_spec->block_mc_loopback;
+ rule.priority = MLX4_DOMAIN_UVERBS | priority;
+ err = flow_spec_to_net_rule(&mdev->ib_dev, flow_spec,
+ &rule.list);
+ if (err)
+ goto free_list;
+ }
+
+ err = mlx4_flow_attach(mdev->dev, &rule, &reg_id);
+ if (err)
+ goto free_list;
+
+ memcpy(&cm_flow->spec, flow_spec, sizeof(*flow_spec));
+ cm_flow->reg_id = reg_id;
+
+ if (lock_qp)
+ mutex_lock(&mqp->mutex);
+ list_add(&cm_flow->list, &mqp->rules_list);
+ if (lock_qp)
+ mutex_unlock(&mqp->mutex);
+
+free_list:
+ list_for_each_entry_safe(spec, tmp_spec, &rule.list, list) {
+ list_del(&spec->list);
+ kfree(spec);
+ }
+ if (err) {
+ kfree(cm_flow);
+ dev_err(mdev->ib_dev.dma_device,
+ "Fail to attach flow steering rule\n");
+ }
return err;
}
-static struct gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
+static int __mlx4_ib_flow_detach(struct mlx4_ib_dev *mdev,
+ struct mlx4_ib_qp *mqp,
+ struct ib_flow_spec *spec, int priority,
+ int lock_qp)
{
- struct gid_entry *ge;
- struct gid_entry *tmp;
- struct gid_entry *ret = NULL;
+ struct mlx4_cm_steering *cm_flow;
+ int ret;
+
+ if (lock_qp)
+ mutex_lock(&mqp->mutex);
+ list_for_each_entry(cm_flow, &mqp->rules_list, list) {
+ if (!memcmp(&cm_flow->spec, spec, sizeof(*spec))) {
+ list_del(&cm_flow->list);
+ break;
+ }
+ }
+ if (lock_qp)
+ mutex_unlock(&mqp->mutex);
+
+ if (&cm_flow->list == &mqp->rules_list) {
+ dev_err(mdev->ib_dev.dma_device, "Couldn't find reg_id for flow spec. "
+ "Steering rule is left attached\n");
+ return -EINVAL;
+ }
+
+ ret = mlx4_flow_detach(mdev->dev, cm_flow->reg_id);
+
+ kfree(cm_flow);
+ return ret;
+}
+
+static int mlx4_ib_flow_attach(struct ib_qp *qp, struct ib_flow_spec *flow_spec,
+ int priority)
+{
+ return __mlx4_ib_flow_attach(to_mdev(qp->device), to_mqp(qp),
+ flow_spec, priority, 1);
+}
+
+static int mlx4_ib_flow_detach(struct ib_qp *qp, struct ib_flow_spec *spec,
+ int priority)
+{
+ return __mlx4_ib_flow_detach(to_mdev(qp->device), to_mqp(qp),
+ spec, priority, 1);
+}
+
+static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
+{
+ struct mlx4_ib_gid_entry *ge;
+ struct mlx4_ib_gid_entry *tmp;
+ struct mlx4_ib_gid_entry *ret = NULL;
list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
if (!memcmp(raw, ge->gid.raw, 16)) {
@@ -724,11 +1200,31 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
u8 mac[6];
struct net_device *ndev;
- struct gid_entry *ge;
+ struct mlx4_ib_gid_entry *ge;
+ u64 reg_id = 0;
+
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ struct mlx4_ib_steering *ib_steering;
+
+ mutex_lock(&mqp->mutex);
+ list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
+ if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
+ list_del(&ib_steering->list);
+ break;
+ }
+ }
+ mutex_unlock(&mqp->mutex);
+ if (&ib_steering->list == &mqp->steering_rules) {
+ pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
+ return -EINVAL;
+ }
+ reg_id = ib_steering->reg_id;
+ kfree(ib_steering);
+ }
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- (ibqp->qp_type == IB_QPT_RAW_ETH) ?
- MLX4_MCAST_PROT_EN : MLX4_MCAST_PROT_IB);
+ MLX4_PROT_IB_IPV6, reg_id);
if (err)
return err;
@@ -750,91 +1246,18 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
list_del(&ge->list);
kfree(ge);
} else
- printk(KERN_WARNING "could not find mgid entry\n");
+ pr_warn("could not find mgid entry\n");
mutex_unlock(&mqp->mutex);
return 0;
}
-static void mlx4_dummy_comp_handler(struct ib_cq *cq, void *cq_context)
-{
-}
-
-static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
-{
- struct mlx4_ib_xrcd *xrcd;
- struct mlx4_ib_dev *mdev = to_mdev(ibdev);
- struct ib_pd *pd;
- struct ib_cq *cq;
- int err;
-
- if (!(mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
- return ERR_PTR(-ENOSYS);
-
- xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
- if (!xrcd)
- return ERR_PTR(-ENOMEM);
-
- err = mlx4_xrcd_alloc(mdev->dev, &xrcd->xrcdn);
- if (err)
- goto err_xrcd;
-
- pd = mlx4_ib_alloc_pd(ibdev, NULL, NULL);
- if (IS_ERR(pd)) {
- err = PTR_ERR(pd);
- goto err_pd;
- }
- pd->device = ibdev;
-
- cq = mlx4_ib_create_cq(ibdev, 1, 0, NULL, NULL);
- if (IS_ERR(cq)) {
- err = PTR_ERR(cq);
- goto err_cq;
- }
- cq->device = ibdev;
- cq->comp_handler = mlx4_dummy_comp_handler;
-
- if (context)
- if (ib_copy_to_udata(udata, &xrcd->xrcdn, sizeof(__u32))) {
- err = -EFAULT;
- goto err_copy;
- }
-
- xrcd->cq = cq;
- xrcd->pd = pd;
- return &xrcd->ibxrcd;
-
-err_copy:
- mlx4_ib_destroy_cq(cq);
-err_cq:
- mlx4_ib_dealloc_pd(pd);
-err_pd:
- mlx4_xrcd_free(mdev->dev, xrcd->xrcdn);
-err_xrcd:
- kfree(xrcd);
- return ERR_PTR(err);
-}
-
-static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
-{
- struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd);
-
- mlx4_ib_destroy_cq(mxrcd->cq);
- mlx4_ib_dealloc_pd(mxrcd->pd);
- mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
- kfree(xrcd);
-
- return 0;
-}
-
-
static int init_node_data(struct mlx4_ib_dev *dev)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
+ int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -844,8 +1267,10 @@ static int init_node_data(struct mlx4_ib_dev *dev)
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
+ if (mlx4_is_master(dev->dev))
+ mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
- err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@@ -853,7 +1278,7 @@ static int init_node_data(struct mlx4_ib_dev *dev)
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
- err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@@ -913,144 +1338,14 @@ static struct device_attribute *mlx4_class_attributes[] = {
&dev_attr_board_id
};
-/*
- * create show function and a device_attribute struct pointing to
- * the function for _name
- */
-#define DEVICE_DIAG_RPRT_ATTR(_name, _offset, _op_mod) \
-static ssize_t show_rprt_##_name(struct device *dev, \
- struct device_attribute *attr, \
- char *buf){ \
- return show_diag_rprt(dev, buf, _offset, _op_mod); \
-} \
-static DEVICE_ATTR(_name, S_IRUGO, show_rprt_##_name, NULL);
-
-#define MLX4_DIAG_RPRT_CLEAR_DIAGS 3
-
-static size_t show_diag_rprt(struct device *device, char *buf,
- u32 offset, u8 op_modifier)
-{
- size_t ret;
- u32 counter_offset = offset;
- u32 diag_counter = 0;
- struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev,
- ib_dev.dev);
-
- ret = mlx4_query_diag_counters(dev->dev, 1, op_modifier,
- &counter_offset, &diag_counter);
- if (ret)
- return ret;
-
- return sprintf(buf,"%d\n", diag_counter);
-}
-
-static ssize_t clear_diag_counters(struct device *device,
- struct device_attribute *attr,
- const char *buf, size_t length)
-{
- size_t ret;
- struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev,
- ib_dev.dev);
-
- ret = mlx4_query_diag_counters(dev->dev, 0, MLX4_DIAG_RPRT_CLEAR_DIAGS,
- NULL, NULL);
- if (ret)
- return ret;
-
- return length;
-}
-
-DEVICE_DIAG_RPRT_ATTR(rq_num_lle , 0x00, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_lle , 0x04, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_lqpoe , 0x08, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_lqpoe , 0x0C, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_leeoe , 0x10, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_leeoe , 0x14, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_lpe , 0x18, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_lpe , 0x1C, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_wrfe , 0x20, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_wrfe , 0x24, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_mwbe , 0x2C, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_bre , 0x34, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_lae , 0x38, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_rire , 0x44, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_rire , 0x48, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_rae , 0x4C, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_rae , 0x50, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_roe , 0x54, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_tree , 0x5C, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_rree , 0x64, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_rnr , 0x68, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_rnr , 0x6C, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_rabrte , 0x7C, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_ieecne , 0x84, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_ieecse , 0x8C, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_oos , 0x100, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_oos , 0x104, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_mce , 0x108, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_rsync , 0x110, 2);
-DEVICE_DIAG_RPRT_ATTR(sq_num_rsync , 0x114, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_udsdprd , 0x118, 2);
-DEVICE_DIAG_RPRT_ATTR(rq_num_ucsdprd , 0x120, 2);
-DEVICE_DIAG_RPRT_ATTR(num_cqovf , 0x1A0, 2);
-DEVICE_DIAG_RPRT_ATTR(num_eqovf , 0x1A4, 2);
-DEVICE_DIAG_RPRT_ATTR(num_baddb , 0x1A8, 2);
-
-static DEVICE_ATTR(clear_diag, S_IWUGO, NULL, clear_diag_counters);
-
-static struct attribute *diag_rprt_attrs[] = {
- &dev_attr_rq_num_lle.attr,
- &dev_attr_sq_num_lle.attr,
- &dev_attr_rq_num_lqpoe.attr,
- &dev_attr_sq_num_lqpoe.attr,
- &dev_attr_rq_num_leeoe.attr,
- &dev_attr_sq_num_leeoe.attr,
- &dev_attr_rq_num_lpe.attr,
- &dev_attr_sq_num_lpe.attr,
- &dev_attr_rq_num_wrfe.attr,
- &dev_attr_sq_num_wrfe.attr,
- &dev_attr_sq_num_mwbe.attr,
- &dev_attr_sq_num_bre.attr,
- &dev_attr_rq_num_lae.attr,
- &dev_attr_sq_num_rire.attr,
- &dev_attr_rq_num_rire.attr,
- &dev_attr_sq_num_rae.attr,
- &dev_attr_rq_num_rae.attr,
- &dev_attr_sq_num_roe.attr,
- &dev_attr_sq_num_tree.attr,
- &dev_attr_sq_num_rree.attr,
- &dev_attr_rq_num_rnr.attr,
- &dev_attr_sq_num_rnr.attr,
- &dev_attr_sq_num_rabrte.attr,
- &dev_attr_sq_num_ieecne.attr,
- &dev_attr_sq_num_ieecse.attr,
- &dev_attr_rq_num_oos.attr,
- &dev_attr_sq_num_oos.attr,
- &dev_attr_rq_num_mce.attr,
- &dev_attr_rq_num_rsync.attr,
- &dev_attr_sq_num_rsync.attr,
- &dev_attr_rq_num_udsdprd.attr,
- &dev_attr_rq_num_ucsdprd.attr,
- &dev_attr_num_cqovf.attr,
- &dev_attr_num_eqovf.attr,
- &dev_attr_num_baddb.attr,
- &dev_attr_clear_diag.attr,
- NULL
-};
-
-struct attribute_group diag_counters_group = {
- .name = "diag_counters",
- .attrs = diag_rprt_attrs
-};
-
static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev)
{
#ifdef __linux__
memcpy(eui, dev->dev_addr, 3);
memcpy(eui + 5, dev->dev_addr + 3, 3);
#else
- memcpy(eui, IF_LLADDR(dev), 3);
- memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
+ memcpy(eui, IF_LLADDR(dev), 3);
+ memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
#endif
if (vlan_id < 0x1000) {
eui[3] = vlan_id >> 8;
@@ -1069,11 +1364,10 @@ static void update_gids_task(struct work_struct *work)
union ib_gid *gids;
int err;
struct mlx4_dev *dev = gw->dev->dev;
- struct ib_event event;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
- printk(KERN_WARNING "update gid table failed %ld\n", PTR_ERR(mailbox));
+ pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox));
return;
}
@@ -1081,25 +1375,19 @@ static void update_gids_task(struct work_struct *work)
memcpy(gids, gw->gids, sizeof gw->gids);
err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
- 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B);
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
if (err)
- printk(KERN_WARNING "set port command failed\n");
+ pr_warn("set port command failed\n");
else {
memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids);
- event.device = &gw->dev->ib_dev;
- event.element.port_num = gw->port;
- event.event = IB_EVENT_GID_CHANGE;
- ib_dispatch_event(&event);
+ mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
}
mlx4_free_cmd_mailbox(dev, mailbox);
kfree(gw);
}
-enum {
- MLX4_MAX_EFF_VLANS = 128 - MLX4_VLAN_REGULAR,
-};
-
static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
{
struct net_device *ndev = dev->iboe.netdevs[port - 1];
@@ -1107,40 +1395,42 @@ static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
struct net_device *tmp;
int i;
u8 *hits;
- int ret;
union ib_gid gid;
- int tofree;
+ int index_free;
int found;
int need_update = 0;
+ int max_gids;
u16 vid;
work = kzalloc(sizeof *work, GFP_ATOMIC);
if (!work)
return -ENOMEM;
- hits = kzalloc(MLX4_MAX_EFF_VLANS + 1, GFP_ATOMIC);
+ hits = kzalloc(128, GFP_ATOMIC);
if (!hits) {
- ret = -ENOMEM;
- goto out;
+ kfree(work);
+ return -ENOMEM;
}
+ max_gids = dev->dev->caps.gid_table_len[port];
+
#ifdef __linux__
- read_lock(&dev_base_lock);
- for_each_netdev(&init_net, tmp) {
+ rcu_read_lock();
+ for_each_netdev_rcu(&init_net, tmp) {
#else
- IFNET_RLOCK();
- TAILQ_FOREACH(tmp, &V_ifnet, if_link) {
+ IFNET_RLOCK();
+ TAILQ_FOREACH(tmp, &V_ifnet, if_link) {
#endif
if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) {
gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
vid = rdma_vlan_dev_vlan_id(tmp);
mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev);
found = 0;
- tofree = -1;
- for (i = 0; i < MLX4_MAX_EFF_VLANS + 1; ++i) {
- if (tofree < 0 &&
+ index_free = -1;
+ for (i = 0; i < max_gids; ++i) {
+ if (index_free < 0 &&
!memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
- tofree = i;
+ index_free = i;
if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) {
hits[i] = 1;
found = 1;
@@ -1149,33 +1439,36 @@ static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
}
if (!found) {
- if (tmp == ndev && (memcmp(&dev->iboe.gid_table[port - 1][0], &gid, sizeof gid) || !memcmp(&dev->iboe.gid_table[port - 1][0], &zgid, sizeof gid))) {
+ if (tmp == ndev &&
+ (memcmp(&dev->iboe.gid_table[port - 1][0],
+ &gid, sizeof gid) ||
+ !memcmp(&dev->iboe.gid_table[port - 1][0],
+ &zgid, sizeof gid))) {
dev->iboe.gid_table[port - 1][0] = gid;
++need_update;
hits[0] = 1;
- } else if (tofree >= 0) {
- dev->iboe.gid_table[port - 1][tofree] = gid;
- hits[tofree] = 1;
+ } else if (index_free >= 0) {
+ dev->iboe.gid_table[port - 1][index_free] = gid;
+ hits[index_free] = 1;
++need_update;
}
}
}
-#ifdef __linux__
- }
- read_unlock(&dev_base_lock);
+#ifdef __linux__
+ }
+ rcu_read_unlock();
#else
- }
- IFNET_RUNLOCK();
+ }
+ IFNET_RUNLOCK();
#endif
- for (i = 0; i < MLX4_MAX_EFF_VLANS + 1; ++i)
+ for (i = 0; i < max_gids; ++i)
if (!hits[i]) {
if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
++need_update;
dev->iboe.gid_table[port - 1][i] = zgid;
}
-
if (need_update) {
memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids);
INIT_WORK(&work->work, update_gids_task);
@@ -1187,10 +1480,6 @@ static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
kfree(hits);
return 0;
-
-out:
- kfree(work);
- return ret;
}
static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event)
@@ -1239,7 +1528,8 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event
spin_lock(&iboe->lock);
mlx4_foreach_ib_transport_port(port, ibdev->dev) {
oldnd = iboe->netdevs[port - 1];
- iboe->netdevs[port - 1] = mlx4_get_prot_dev(ibdev->dev, MLX4_PROT_EN, port);
+ iboe->netdevs[port - 1] =
+ mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
if (oldnd != iboe->netdevs[port - 1]) {
if (iboe->netdevs[port - 1])
netdev_added(ibdev, port);
@@ -1260,20 +1550,352 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event
return NOTIFY_DONE;
}
+static void init_pkeys(struct mlx4_ib_dev *ibdev)
+{
+ int port;
+ int slave;
+ int i;
+
+ if (mlx4_is_master(ibdev->dev)) {
+ for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
+ for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
+ for (i = 0;
+ i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
+ ++i) {
+ ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
+ /* master has the identity virt2phys pkey mapping */
+ (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
+ ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
+ mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
+ ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
+ }
+ }
+ }
+ /* initialize pkey cache */
+ for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
+ for (i = 0;
+ i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
+ ++i)
+ ibdev->pkeys.phys_pkey_cache[port-1][i] =
+ (i) ? 0 : 0xFFFF;
+ }
+ }
+}
+
+static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
+{
+ char name[32];
+ int eq_per_port = 0;
+ int added_eqs = 0;
+ int total_eqs = 0;
+ int i, j, eq;
+
+ /* Legacy mode or comp_pool is not large enough */
+ if (dev->caps.comp_pool == 0 ||
+ dev->caps.num_ports > dev->caps.comp_pool)
+ return;
+
+ eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/
+ dev->caps.num_ports);
+
+ /* Init eq table */
+ added_eqs = 0;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+ added_eqs += eq_per_port;
+
+ total_eqs = dev->caps.num_comp_vectors + added_eqs;
+
+ ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL);
+ if (!ibdev->eq_table)
+ return;
+
+ ibdev->eq_added = added_eqs;
+
+ eq = 0;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
+ for (j = 0; j < eq_per_port; j++) {
+ //sprintf(name, "mlx4-ib-%d-%d@%s",
+ // i, j, dev->pdev->bus->conf.pd_name);
+ /* Set IRQ for specific name (per ring) */
+ if (mlx4_assign_eq(dev, name,
+ &ibdev->eq_table[eq])) {
+ /* Use legacy (same as mlx4_en driver) */
+ pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq);
+ ibdev->eq_table[eq] =
+ (eq % dev->caps.num_comp_vectors);
+ }
+ eq++;
+ }
+ }
+
+ /* Fill the reset of the vector with legacy EQ */
+ for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++)
+ ibdev->eq_table[eq++] = i;
+
+ /* Advertise the new number of EQs to clients */
+ ibdev->ib_dev.num_comp_vectors = total_eqs;
+}
+
+static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
+{
+ int i;
+
+ /* no additional eqs were added */
+ if (!ibdev->eq_table)
+ return;
+
+ /* Reset the advertised EQ number */
+ ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
+
+ /* Free only the added eqs */
+ for (i = 0; i < ibdev->eq_added; i++) {
+ /* Don't free legacy eqs if used */
+ if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors)
+ continue;
+ mlx4_release_eq(dev, ibdev->eq_table[i]);
+ }
+
+ kfree(ibdev->eq_table);
+}
+
+/*
+ * create show function and a device_attribute struct pointing to
+ * the function for _name
+ */
+#define DEVICE_DIAG_RPRT_ATTR(_name, _offset, _op_mod) \
+static ssize_t show_rprt_##_name(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf){ \
+ return show_diag_rprt(dev, buf, _offset, _op_mod); \
+} \
+static DEVICE_ATTR(_name, S_IRUGO, show_rprt_##_name, NULL);
+
+#define MLX4_DIAG_RPRT_CLEAR_DIAGS 3
+
+static size_t show_diag_rprt(struct device *device, char *buf,
+ u32 offset, u8 op_modifier)
+{
+ size_t ret;
+ u32 counter_offset = offset;
+ u32 diag_counter = 0;
+ struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev,
+ ib_dev.dev);
+
+ ret = mlx4_query_diag_counters(dev->dev, 1, op_modifier,
+ &counter_offset, &diag_counter);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "%d\n", diag_counter);
+}
+
+static ssize_t clear_diag_counters(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t length)
+{
+ size_t ret;
+ struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev,
+ ib_dev.dev);
+
+ ret = mlx4_query_diag_counters(dev->dev, 0, MLX4_DIAG_RPRT_CLEAR_DIAGS,
+ NULL, NULL);
+ if (ret)
+ return ret;
+
+ return length;
+}
+
+DEVICE_DIAG_RPRT_ATTR(rq_num_lle , 0x00, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_lle , 0x04, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_lqpoe , 0x08, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_lqpoe , 0x0C, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_lpe , 0x18, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_lpe , 0x1C, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_wrfe , 0x20, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_wrfe , 0x24, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_mwbe , 0x2C, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_bre , 0x34, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_lae , 0x38, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_rire , 0x44, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_rire , 0x48, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_rae , 0x4C, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_rae , 0x50, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_roe , 0x54, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_tree , 0x5C, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_rree , 0x64, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_rnr , 0x68, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_rnr , 0x6C, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_oos , 0x100, 2);
+DEVICE_DIAG_RPRT_ATTR(sq_num_oos , 0x104, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_mce , 0x108, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_udsdprd , 0x118, 2);
+DEVICE_DIAG_RPRT_ATTR(rq_num_ucsdprd , 0x120, 2);
+DEVICE_DIAG_RPRT_ATTR(num_cqovf , 0x1A0, 2);
+DEVICE_DIAG_RPRT_ATTR(num_eqovf , 0x1A4, 2);
+DEVICE_DIAG_RPRT_ATTR(num_baddb , 0x1A8, 2);
+
+static DEVICE_ATTR(clear_diag, S_IWUSR, NULL, clear_diag_counters);
+
+static struct attribute *diag_rprt_attrs[] = {
+ &dev_attr_rq_num_lle.attr,
+ &dev_attr_sq_num_lle.attr,
+ &dev_attr_rq_num_lqpoe.attr,
+ &dev_attr_sq_num_lqpoe.attr,
+ &dev_attr_rq_num_lpe.attr,
+ &dev_attr_sq_num_lpe.attr,
+ &dev_attr_rq_num_wrfe.attr,
+ &dev_attr_sq_num_wrfe.attr,
+ &dev_attr_sq_num_mwbe.attr,
+ &dev_attr_sq_num_bre.attr,
+ &dev_attr_rq_num_lae.attr,
+ &dev_attr_sq_num_rire.attr,
+ &dev_attr_rq_num_rire.attr,
+ &dev_attr_sq_num_rae.attr,
+ &dev_attr_rq_num_rae.attr,
+ &dev_attr_sq_num_roe.attr,
+ &dev_attr_sq_num_tree.attr,
+ &dev_attr_sq_num_rree.attr,
+ &dev_attr_rq_num_rnr.attr,
+ &dev_attr_sq_num_rnr.attr,
+ &dev_attr_rq_num_oos.attr,
+ &dev_attr_sq_num_oos.attr,
+ &dev_attr_rq_num_mce.attr,
+ &dev_attr_rq_num_udsdprd.attr,
+ &dev_attr_rq_num_ucsdprd.attr,
+ &dev_attr_num_cqovf.attr,
+ &dev_attr_num_eqovf.attr,
+ &dev_attr_num_baddb.attr,
+ &dev_attr_clear_diag.attr,
+ NULL
+};
+
+static struct attribute_group diag_counters_group = {
+ .name = "diag_counters",
+ .attrs = diag_rprt_attrs
+};
+
+#ifdef __linux__
+static int mlx4_ib_proc_init(void)
+{
+ /* Creating procfs directories /proc/drivers/mlx4_ib/ &&
+ /proc/drivers/mlx4_ib/mrs for further use by the driver.
+ */
+ int err;
+
+ mlx4_ib_driver_dir_entry = proc_mkdir(MLX4_IB_DRIVER_PROC_DIR_NAME,
+ NULL);
+ if (!mlx4_ib_driver_dir_entry) {
+ pr_err("mlx4_ib_proc_init has failed for %s\n",
+ MLX4_IB_DRIVER_PROC_DIR_NAME);
+ err = -ENODEV;
+ goto error;
+ }
+
+ mlx4_mrs_dir_entry = proc_mkdir(MLX4_IB_MRS_PROC_DIR_NAME,
+ mlx4_ib_driver_dir_entry);
+ if (!mlx4_mrs_dir_entry) {
+ pr_err("mlx4_ib_proc_init has failed for %s\n",
+ MLX4_IB_MRS_PROC_DIR_NAME);
+ err = -ENODEV;
+ goto remove_entry;
+ }
+
+ return 0;
+
+remove_entry:
+ remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME,
+ NULL);
+error:
+ return err;
+}
+#endif
+
+static void init_dev_assign(void)
+{
+ int bus, slot, fn, ib_idx;
+ char *p = dev_assign_str, *t;
+ char curr_val[32] = {0};
+ int ret;
+ int j, i = 0;
+
+ memset(dr, 0, sizeof dr);
+
+ if (dev_assign_str[0] == 0)
+ return;
+
+ while (strlen(p)) {
+ ret = sscanf(p, "%02x:%02x.%x-%x", &bus, &slot, &fn, &ib_idx);
+ if (ret != 4 || ib_idx < 0)
+ goto err;
+
+ for (j = 0; j < i; j++)
+ if (dr[j].nr == ib_idx)
+ goto err;
+
+ dr[i].bus = bus;
+ dr[i].dev = slot;
+ dr[i].func = fn;
+ dr[i].nr = ib_idx;
+
+ t = strchr(p, ',');
+ sprintf(curr_val, "%02x:%02x.%x-%x", bus, slot, fn, ib_idx);
+ if ((!t) && strlen(p) == strlen(curr_val))
+ return;
+
+ if (!t || (t + 1) >= dev_assign_str + sizeof dev_assign_str)
+ goto err;
+
+ ++i;
+ if (i >= MAX_DR)
+ goto err;
+
+ p = t + 1;
+ }
+
+ return;
+err:
+ memset(dr, 0, sizeof dr);
+ printk(KERN_WARNING "mlx4_ib: The value of 'dev_assign_str' parameter "
+ "is incorrect. The parameter value is discarded!");
+}
+
+static int mlx4_ib_dev_idx(struct mlx4_dev *dev)
+{
+ int /*bus,*/ slot, fn;
+ int i;
+
+ if (!dev)
+ return -1;
+ else if (!dev->pdev)
+ return -1;
+ //else if (!dev->pdev->bus)
+ // return -1;
+
+ //bus = dev->pdev->bus->conf.pc_sel.pc_bus;
+ slot = PCI_SLOT(dev->pdev->devfn);
+ fn = PCI_FUNC(dev->pdev->devfn);
+
+ for (i = 0; i < MAX_DR; ++i) {
+ if (/*dr[i].bus == bus &&*/
+ dr[i].dev == slot &&
+ dr[i].func == fn) {
+ return dr[i].nr;
+ }
+ }
+
+ return -1;
+}
+
static void *mlx4_ib_add(struct mlx4_dev *dev)
{
- static int mlx4_ib_version_printed;
struct mlx4_ib_dev *ibdev;
int num_ports = 0;
- int i;
+ int i, j;
int err;
struct mlx4_ib_iboe *iboe;
- int k;
+ int dev_idx;
- if (!mlx4_ib_version_printed) {
- printk(KERN_INFO "%s", mlx4_ib_version);
- ++mlx4_ib_version_printed;
- }
+ printk(KERN_INFO "%s", mlx4_ib_version);
mlx4_foreach_ib_transport_port(i, dev)
num_ports++;
@@ -1296,14 +1918,22 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
goto err_pd;
- ibdev->priv_uar.map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+ ibdev->priv_uar.map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT,
+ PAGE_SIZE);
+
if (!ibdev->priv_uar.map)
goto err_uar;
+
MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
ibdev->dev = dev;
- strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
+ dev_idx = mlx4_ib_dev_idx(dev);
+ if (dev_idx >= 0)
+ sprintf(ibdev->ib_dev.name, "mlx4_%d", dev_idx);
+ else
+ strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
+
ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
@@ -1312,7 +1942,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dma_device = &dev->pdev->dev;
- ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
+ if (dev->caps.userspace_caps)
+ ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
+ else
+ ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
+
ibdev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -1334,6 +1968,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
+ (1ull << IB_USER_VERBS_CMD_OPEN_QP) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_FLOW) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_FLOW) |
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
ibdev->ib_dev.query_device = mlx4_ib_query_device;
@@ -1345,7 +1984,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.modify_port = mlx4_ib_modify_port;
ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext;
ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext;
+#ifdef __linux__
ibdev->ib_dev.mmap = mlx4_ib_mmap;
+ ibdev->ib_dev.get_unmapped_area = mlx4_ib_get_unmapped_area;
+#endif
ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd;
ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd;
ibdev->ib_dev.create_ah = mlx4_ib_create_ah;
@@ -1376,87 +2018,139 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list;
ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
+ ibdev->ib_dev.attach_flow = mlx4_ib_flow_attach;
+ ibdev->ib_dev.detach_flow = mlx4_ib_flow_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
- ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
- ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
- ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
- ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
+ if (!mlx4_is_slave(ibdev->dev)) {
+ ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
+ ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
+ ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
+ ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
+ }
+
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
- ibdev->ib_dev.create_xrc_srq = mlx4_ib_create_xrc_srq;
ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
- ibdev->ib_dev.create_xrc_rcv_qp = mlx4_ib_create_xrc_rcv_qp;
- ibdev->ib_dev.modify_xrc_rcv_qp = mlx4_ib_modify_xrc_rcv_qp;
- ibdev->ib_dev.query_xrc_rcv_qp = mlx4_ib_query_xrc_rcv_qp;
- ibdev->ib_dev.reg_xrc_rcv_qp = mlx4_ib_reg_xrc_rcv_qp;
- ibdev->ib_dev.unreg_xrc_rcv_qp = mlx4_ib_unreg_xrc_rcv_qp;
ibdev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_CREATE_XRC_SRQ) |
- (1ull << IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN) |
- (1ull << IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN) |
- (1ull << IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP) |
- (1ull << IB_USER_VERBS_CMD_REG_XRC_RCV_QP) |
- (1ull << IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP);
+ (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
+ (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
+ mlx4_ib_alloc_eqs(dev, ibdev);
spin_lock_init(&iboe->lock);
+
if (init_node_data(ibdev))
goto err_map;
- for (k = 0; k < ibdev->num_ports; ++k) {
- err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[k]);
- if (err)
- ibdev->counters[k] = -1;
- else
- mlx4_set_iboe_counter(dev, ibdev->counters[k], k + 1);
+ for (i = 0; i < ibdev->num_ports; ++i) {
+ if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
+ IB_LINK_LAYER_ETHERNET) {
+ err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]);
+ if (err)
+ ibdev->counters[i] = -1;
+ } else
+ ibdev->counters[i] = -1;
}
spin_lock_init(&ibdev->sm_lock);
mutex_init(&ibdev->cap_mask_mutex);
- mutex_init(&ibdev->xrc_reg_mutex);
- if (ib_register_device(&ibdev->ib_dev))
- goto err_counter;
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
+ !mlx4_is_slave(dev)) {
+ ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
+ err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
+ MLX4_IB_UC_STEER_QPN_ALIGN, &ibdev->steer_qpn_base, 0);
+ if (err)
+ goto err_counter;
+
+ ibdev->ib_uc_qpns_bitmap =
+ kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
+ sizeof(long),
+ GFP_KERNEL);
+ if (!ibdev->ib_uc_qpns_bitmap) {
+ dev_err(&dev->pdev->dev, "bit map alloc failed\n");
+ goto err_steer_qp_release;
+ }
+
+ bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
+
+ err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_base + ibdev->steer_qpn_count - 1);
+ if (err)
+ goto err_steer_free_bitmap;
+ }
+
+ if (ib_register_device(&ibdev->ib_dev, NULL))
+ goto err_steer_free_bitmap;
if (mlx4_ib_mad_init(ibdev))
goto err_reg;
+
+ if (mlx4_ib_init_sriov(ibdev))
+ goto err_mad;
+
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
iboe->nb.notifier_call = mlx4_ib_netdev_event;
err = register_netdevice_notifier(&iboe->nb);
if (err)
- goto err_reg;
+ goto err_sriov;
}
- for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) {
+
+ for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
if (device_create_file(&ibdev->ib_dev.dev,
- mlx4_class_attributes[i]))
+ mlx4_class_attributes[j]))
goto err_notif;
}
-
- if(sysfs_create_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group))
+ if (sysfs_create_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group))
goto err_notif;
- ibdev->ib_active = 1;
+ ibdev->ib_active = true;
+ if (mlx4_is_mfunc(ibdev->dev))
+ init_pkeys(ibdev);
+
+ /* create paravirt contexts for any VFs which are active */
+ if (mlx4_is_master(ibdev->dev)) {
+ for (j = 0; j < MLX4_MFUNC_MAX; j++) {
+ if (j == mlx4_master_func_num(ibdev->dev))
+ continue;
+ if (mlx4_is_slave_active(ibdev->dev, j))
+ do_slave_init(ibdev, j, 1);
+ }
+ }
return ibdev;
err_notif:
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
- printk(KERN_WARNING "failure unregistering notifier\n");
+ pr_warn("failure unregistering notifier\n");
flush_workqueue(wq);
+err_sriov:
+ mlx4_ib_close_sriov(ibdev);
+
+err_mad:
+ mlx4_ib_mad_cleanup(ibdev);
+
err_reg:
ib_unregister_device(&ibdev->ib_dev);
+err_steer_free_bitmap:
+ kfree(ibdev->ib_uc_qpns_bitmap);
+
+err_steer_qp_release:
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED)
+ mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_count);
err_counter:
- for (; k; --k)
- mlx4_counter_free(ibdev->dev, ibdev->counters[k - 1]);
+ for (; i; --i)
+ if (ibdev->counters[i - 1] != -1)
+ mlx4_counter_free(ibdev->dev, ibdev->counters[i - 1]);
err_map:
iounmap(ibdev->priv_uar.map);
+ mlx4_ib_free_eqs(dev, ibdev);
err_uar:
mlx4_uar_free(dev, &ibdev->priv_uar);
@@ -1470,73 +2164,215 @@ err_dealloc:
return NULL;
}
+int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
+{
+ int offset;
+
+ WARN_ON(!dev->ib_uc_qpns_bitmap);
+
+ offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
+ dev->steer_qpn_count,
+ get_count_order(count));
+ if (offset < 0)
+ return offset;
+
+ *qpn = dev->steer_qpn_base + offset;
+ return 0;
+}
+
+void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
+{
+ if (!qpn ||
+ dev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return;
+
+ BUG_ON(qpn < dev->steer_qpn_base);
+
+ bitmap_release_region(dev->ib_uc_qpns_bitmap,
+ qpn - dev->steer_qpn_base, get_count_order(count));
+}
+
+int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+ int is_attach)
+{
+ struct ib_flow_spec spec = {
+ .type = IB_FLOW_IB_UC,
+ .l2_id.ib_uc.qpn = mqp->ibqp.qp_num,
+ };
+
+ return is_attach ?
+ __mlx4_ib_flow_attach(mdev, mqp, &spec, MLX4_DOMAIN_NIC, 0)
+ : __mlx4_ib_flow_detach(mdev, mqp, &spec, MLX4_DOMAIN_NIC, 0);
+}
+
static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
{
struct mlx4_ib_dev *ibdev = ibdev_ptr;
- int p;
- int k;
+ int p,j;
+ mlx4_ib_close_sriov(ibdev);
sysfs_remove_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group);
-
mlx4_ib_mad_cleanup(ibdev);
+
+ for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
+ device_remove_file(&ibdev->ib_dev.dev, mlx4_class_attributes[j]);
+ }
+
ib_unregister_device(&ibdev->ib_dev);
- for (k = 0; k < ibdev->num_ports; ++k)
- mlx4_counter_free(ibdev->dev, ibdev->counters[k]);
+
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_count);
+ kfree(ibdev->ib_uc_qpns_bitmap);
+ }
if (ibdev->iboe.nb.notifier_call) {
- unregister_netdevice_notifier(&ibdev->iboe.nb);
- flush_workqueue(wq);
+ if (unregister_netdevice_notifier(&ibdev->iboe.nb))
+ pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
iounmap(ibdev->priv_uar.map);
-
+ for (p = 0; p < ibdev->num_ports; ++p)
+ if (ibdev->counters[p] != -1)
+ mlx4_counter_free(ibdev->dev, ibdev->counters[p]);
mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
mlx4_CLOSE_PORT(dev, p);
+ mlx4_ib_free_eqs(dev, ibdev);
+
mlx4_uar_free(dev, &ibdev->priv_uar);
mlx4_pd_free(dev, ibdev->priv_pdn);
ib_dealloc_device(&ibdev->ib_dev);
}
+static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
+{
+ struct mlx4_ib_demux_work **dm = NULL;
+ struct mlx4_dev *dev = ibdev->dev;
+ int i;
+ unsigned long flags;
+
+ if (!mlx4_is_master(dev))
+ return;
+
+ dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC);
+ if (!dm) {
+ pr_err("failed to allocate memory for tunneling qp update\n");
+ goto out;
+ }
+
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
+ if (!dm[i]) {
+ pr_err("failed to allocate memory for tunneling qp update work struct\n");
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ if (dm[i])
+ kfree(dm[i]);
+ }
+ goto out;
+ }
+ }
+ /* initialize or tear down tunnel QPs for the slave */
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
+ dm[i]->port = i + 1;
+ dm[i]->slave = slave;
+ dm[i]->do_init = do_init;
+ dm[i]->dev = ibdev;
+ spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
+ if (!ibdev->sriov.is_going_down)
+ queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
+ spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
+ }
+out:
+ if (dm)
+ kfree(dm);
+ return;
+}
+
static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
- enum mlx4_dev_event event, int port)
+ enum mlx4_dev_event event, unsigned long param)
{
struct ib_event ibev;
struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
+ struct mlx4_eqe *eqe = NULL;
+ struct ib_event_work *ew;
+ int p = 0;
- if (port > ibdev->num_ports)
- return;
+ if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
+ eqe = (struct mlx4_eqe *)param;
+ else
+ p = (int) param;
switch (event) {
case MLX4_DEV_EVENT_PORT_UP:
+ if (p > ibdev->num_ports)
+ return;
+ if (mlx4_is_master(dev) &&
+ rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
+ IB_LINK_LAYER_INFINIBAND) {
+ mlx4_ib_invalidate_all_guid_record(ibdev, p);
+ }
+ mlx4_ib_info((struct ib_device *) ibdev_ptr,
+ "Port %d logical link is up\n", p);
ibev.event = IB_EVENT_PORT_ACTIVE;
break;
case MLX4_DEV_EVENT_PORT_DOWN:
+ if (p > ibdev->num_ports)
+ return;
+ mlx4_ib_info((struct ib_device *) ibdev_ptr,
+ "Port %d logical link is down\n", p);
ibev.event = IB_EVENT_PORT_ERR;
break;
case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
- ibdev->ib_active = 0;
+ ibdev->ib_active = false;
ibev.event = IB_EVENT_DEVICE_FATAL;
break;
+ case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
+ ew = kmalloc(sizeof *ew, GFP_ATOMIC);
+ if (!ew) {
+ pr_err("failed to allocate memory for events work\n");
+ break;
+ }
+
+ INIT_WORK(&ew->work, handle_port_mgmt_change_event);
+ memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
+ ew->ib_dev = ibdev;
+ /* need to queue only for port owner, which uses GEN_EQE */
+ if (mlx4_is_master(dev))
+ queue_work(wq, &ew->work);
+ else
+ handle_port_mgmt_change_event(&ew->work);
+ return;
+
+ case MLX4_DEV_EVENT_SLAVE_INIT:
+ /* here, p is the slave id */
+ do_slave_init(ibdev, p, 1);
+ return;
+
+ case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
+ /* here, p is the slave id */
+ do_slave_init(ibdev, p, 0);
+ return;
+
default:
return;
}
ibev.device = ibdev_ptr;
- ibev.element.port_num = port;
+ ibev.element.port_num = (u8) p;
ib_dispatch_event(&ibev);
}
static struct mlx4_interface mlx4_ib_interface = {
- .add = mlx4_ib_add,
- .remove = mlx4_ib_remove,
- .event = mlx4_ib_event,
- .get_prot_dev = get_ibdev,
- .protocol = MLX4_PROT_IB,
+ .add = mlx4_ib_add,
+ .remove = mlx4_ib_remove,
+ .event = mlx4_ib_event,
+ .protocol = MLX4_PROT_IB_IPV6
};
static int __init mlx4_ib_init(void)
@@ -1547,22 +2383,55 @@ static int __init mlx4_ib_init(void)
if (!wq)
return -ENOMEM;
+#ifdef __linux__
+ err = mlx4_ib_proc_init();
+ if (err)
+ goto clean_wq;
+#endif
+
+ err = mlx4_ib_mcg_init();
+ if (err)
+ goto clean_proc;
+
+ init_dev_assign();
+
err = mlx4_register_interface(&mlx4_ib_interface);
- if (err) {
- destroy_workqueue(wq);
- return err;
- }
+ if (err)
+ goto clean_mcg;
return 0;
+
+clean_mcg:
+ mlx4_ib_mcg_destroy();
+
+clean_proc:
+#ifdef __linux__
+ remove_proc_entry(MLX4_IB_MRS_PROC_DIR_NAME,
+ mlx4_ib_driver_dir_entry);
+ remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME, NULL);
+
+clean_wq:
+#endif
+ destroy_workqueue(wq);
+ return err;
}
static void __exit mlx4_ib_cleanup(void)
{
mlx4_unregister_interface(&mlx4_ib_interface);
+ mlx4_ib_mcg_destroy();
destroy_workqueue(wq);
+
+ /* Remove proc entries */
+#ifdef __linux__
+ remove_proc_entry(MLX4_IB_MRS_PROC_DIR_NAME,
+ mlx4_ib_driver_dir_entry);
+ remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME, NULL);
+#endif
+
}
-module_init_order(mlx4_ib_init, SI_ORDER_MIDDLE);
+module_init(mlx4_ib_init);
module_exit(mlx4_ib_cleanup);
#undef MODULE_VERSION
@@ -1572,9 +2441,12 @@ mlx4ib_evhand(module_t mod, int event, void *arg)
{
return (0);
}
+
static moduledata_t mlx4ib_mod = {
.name = "mlx4ib",
.evhand = mlx4ib_evhand,
};
+
DECLARE_MODULE(mlx4ib, mlx4ib_mod, SI_SUB_SMP, SI_ORDER_ANY);
MODULE_DEPEND(mlx4ib, mlx4, 1, 1, 1);
+MODULE_DEPEND(mlx4ib, ibcore, 1, 1, 1);
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c b/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c
new file mode 100644
index 0000000..5489323
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c
@@ -0,0 +1,1254 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_sa.h>
+
+#include <linux/mlx4/cmd.h>
+#include <linux/delay.h>
+
+#include "mlx4_ib.h"
+
+#define MAX_VFS 80
+#define MAX_PEND_REQS_PER_FUNC 4
+#define MAD_TIMEOUT_MS 2000
+
+#define mcg_warn(fmt, arg...) pr_warn("MCG WARNING: " fmt, ##arg)
+#define mcg_error(fmt, arg...) pr_err(fmt, ##arg)
+#define mcg_warn_group(group, format, arg...) \
+ pr_warn("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\
+ (group)->name, group->demux->port, ## arg)
+
+#define mcg_error_group(group, format, arg...) \
+ pr_err(" %16s: " format, (group)->name, ## arg)
+
+static union ib_gid mgid0;
+
+static struct workqueue_struct *clean_wq;
+
+enum mcast_state {
+ MCAST_NOT_MEMBER = 0,
+ MCAST_MEMBER,
+};
+
+enum mcast_group_state {
+ MCAST_IDLE,
+ MCAST_JOIN_SENT,
+ MCAST_LEAVE_SENT,
+ MCAST_RESP_READY
+};
+
+struct mcast_member {
+ enum mcast_state state;
+ uint8_t join_state;
+ int num_pend_reqs;
+ struct list_head pending;
+};
+
+struct ib_sa_mcmember_data {
+ union ib_gid mgid;
+ union ib_gid port_gid;
+ __be32 qkey;
+ __be16 mlid;
+ u8 mtusel_mtu;
+ u8 tclass;
+ __be16 pkey;
+ u8 ratesel_rate;
+ u8 lifetmsel_lifetm;
+ __be32 sl_flowlabel_hoplimit;
+ u8 scope_join_state;
+ u8 proxy_join;
+ u8 reserved[2];
+};
+
+struct mcast_group {
+ struct ib_sa_mcmember_data rec;
+ struct rb_node node;
+ struct list_head mgid0_list;
+ struct mlx4_ib_demux_ctx *demux;
+ struct mcast_member func[MAX_VFS];
+ struct mutex lock;
+ struct work_struct work;
+ struct list_head pending_list;
+ int members[3];
+ enum mcast_group_state state;
+ enum mcast_group_state prev_state;
+ struct ib_sa_mad response_sa_mad;
+ __be64 last_req_tid;
+
+ char name[33]; /* MGID string */
+ struct device_attribute dentry;
+
+ /* refcount is the reference count for the following:
+ 1. Each queued request
+ 2. Each invocation of the worker thread
+ 3. Membership of the port at the SA
+ */
+ atomic_t refcount;
+
+ /* delayed work to clean pending SM request */
+ struct delayed_work timeout_work;
+ struct list_head cleanup_list;
+};
+
+struct mcast_req {
+ int func;
+ struct ib_sa_mad sa_mad;
+ struct list_head group_list;
+ struct list_head func_list;
+ struct mcast_group *group;
+ int clean;
+};
+
+
+#define safe_atomic_dec(ref) \
+ do {\
+ if (atomic_dec_and_test(ref)) \
+ mcg_warn_group(group, "did not expect to reach zero\n"); \
+ } while (0)
+
+static const char *get_state_string(enum mcast_group_state state)
+{
+ switch (state) {
+ case MCAST_IDLE:
+ return "MCAST_IDLE";
+ case MCAST_JOIN_SENT:
+ return "MCAST_JOIN_SENT";
+ case MCAST_LEAVE_SENT:
+ return "MCAST_LEAVE_SENT";
+ case MCAST_RESP_READY:
+ return "MCAST_RESP_READY";
+ }
+ return "Invalid State";
+}
+
+static struct mcast_group *mcast_find(struct mlx4_ib_demux_ctx *ctx,
+ union ib_gid *mgid)
+{
+ struct rb_node *node = ctx->mcg_table.rb_node;
+ struct mcast_group *group;
+ int ret;
+
+ while (node) {
+ group = rb_entry(node, struct mcast_group, node);
+ ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
+ if (!ret)
+ return group;
+
+ if (ret < 0)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
+ }
+ return NULL;
+}
+
+static struct mcast_group *mcast_insert(struct mlx4_ib_demux_ctx *ctx,
+ struct mcast_group *group)
+{
+ struct rb_node **link = &ctx->mcg_table.rb_node;
+ struct rb_node *parent = NULL;
+ struct mcast_group *cur_group;
+ int ret;
+
+ while (*link) {
+ parent = *link;
+ cur_group = rb_entry(parent, struct mcast_group, node);
+
+ ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
+ sizeof group->rec.mgid);
+ if (ret < 0)
+ link = &(*link)->rb_left;
+ else if (ret > 0)
+ link = &(*link)->rb_right;
+ else
+ return cur_group;
+ }
+ rb_link_node(&group->node, parent, link);
+ rb_insert_color(&group->node, &ctx->mcg_table);
+ return NULL;
+}
+
+static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
+{
+ struct mlx4_ib_dev *dev = ctx->dev;
+ struct ib_ah_attr ah_attr;
+
+ spin_lock(&dev->sm_lock);
+ if (!dev->sm_ah[ctx->port - 1]) {
+ /* port is not yet Active, sm_ah not ready */
+ spin_unlock(&dev->sm_lock);
+ return -EAGAIN;
+ }
+ mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
+ spin_unlock(&dev->sm_lock);
+ return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port,
+ IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, mad);
+}
+
+static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
+ struct ib_mad *mad)
+{
+ struct mlx4_ib_dev *dev = ctx->dev;
+ struct ib_mad_agent *agent = dev->send_agent[ctx->port - 1][1];
+ struct ib_wc wc;
+ struct ib_ah_attr ah_attr;
+
+ /* Our agent might not yet be registered when mads start to arrive */
+ if (!agent)
+ return -EAGAIN;
+
+ ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
+
+ if (ib_find_cached_pkey(&dev->ib_dev, ctx->port, IB_DEFAULT_PKEY_FULL, &wc.pkey_index))
+ return -EINVAL;
+ wc.sl = 0;
+ wc.dlid_path_bits = 0;
+ wc.port_num = ctx->port;
+ wc.slid = ah_attr.dlid; /* opensm lid */
+ wc.src_qp = 1;
+ return mlx4_ib_send_to_slave(dev, slave, ctx->port, IB_QPT_GSI, &wc, NULL, mad);
+}
+
+static int send_join_to_wire(struct mcast_group *group, struct ib_sa_mad *sa_mad)
+{
+ struct ib_sa_mad mad;
+ struct ib_sa_mcmember_data *sa_mad_data = (struct ib_sa_mcmember_data *)&mad.data;
+ int ret;
+
+ /* we rely on a mad request as arrived from a VF */
+ memcpy(&mad, sa_mad, sizeof mad);
+
+ /* fix port GID to be the real one (slave 0) */
+ sa_mad_data->port_gid.global.interface_id = group->demux->guid_cache[0];
+
+ /* assign our own TID */
+ mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
+ group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
+
+ ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
+ /* set timeout handler */
+ if (!ret) {
+ /* calls mlx4_ib_mcg_timeout_handler */
+ queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
+ msecs_to_jiffies(MAD_TIMEOUT_MS));
+ }
+
+ return ret;
+}
+
+static int send_leave_to_wire(struct mcast_group *group, u8 join_state)
+{
+ struct ib_sa_mad mad;
+ struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
+ int ret;
+
+ memset(&mad, 0, sizeof mad);
+ mad.mad_hdr.base_version = 1;
+ mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+ mad.mad_hdr.class_version = 2;
+ mad.mad_hdr.method = IB_SA_METHOD_DELETE;
+ mad.mad_hdr.status = cpu_to_be16(0);
+ mad.mad_hdr.class_specific = cpu_to_be16(0);
+ mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
+ group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
+ mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
+ mad.mad_hdr.attr_mod = cpu_to_be32(0);
+ mad.sa_hdr.sm_key = 0x0;
+ mad.sa_hdr.attr_offset = cpu_to_be16(7);
+ mad.sa_hdr.comp_mask = IB_SA_MCMEMBER_REC_MGID |
+ IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE;
+
+ *sa_data = group->rec;
+ sa_data->scope_join_state = join_state;
+
+ ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
+ if (ret)
+ group->state = MCAST_IDLE;
+
+ /* set timeout handler */
+ if (!ret) {
+ /* calls mlx4_ib_mcg_timeout_handler */
+ queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
+ msecs_to_jiffies(MAD_TIMEOUT_MS));
+ }
+
+ return ret;
+}
+
+static int send_reply_to_slave(int slave, struct mcast_group *group,
+ struct ib_sa_mad *req_sa_mad, u16 status)
+{
+ struct ib_sa_mad mad;
+ struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
+ struct ib_sa_mcmember_data *req_sa_data = (struct ib_sa_mcmember_data *)&req_sa_mad->data;
+ int ret;
+
+ memset(&mad, 0, sizeof mad);
+ mad.mad_hdr.base_version = 1;
+ mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+ mad.mad_hdr.class_version = 2;
+ mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
+ mad.mad_hdr.status = cpu_to_be16(status);
+ mad.mad_hdr.class_specific = cpu_to_be16(0);
+ mad.mad_hdr.tid = req_sa_mad->mad_hdr.tid;
+ *(u8 *)&mad.mad_hdr.tid = 0; /* resetting tid to 0 */
+ mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
+ mad.mad_hdr.attr_mod = cpu_to_be32(0);
+ mad.sa_hdr.sm_key = req_sa_mad->sa_hdr.sm_key;
+ mad.sa_hdr.attr_offset = cpu_to_be16(7);
+ mad.sa_hdr.comp_mask = 0; /* ignored on responses, see IBTA spec */
+
+ *sa_data = group->rec;
+
+ /* reconstruct VF's requested join_state and port_gid */
+ sa_data->scope_join_state &= 0xf0;
+ sa_data->scope_join_state |= (group->func[slave].join_state & 0x0f);
+ memcpy(&sa_data->port_gid, &req_sa_data->port_gid, sizeof req_sa_data->port_gid);
+
+ ret = send_mad_to_slave(slave, group->demux, (struct ib_mad *)&mad);
+ return ret;
+}
+
+static int check_selector(ib_sa_comp_mask comp_mask,
+ ib_sa_comp_mask selector_mask,
+ ib_sa_comp_mask value_mask,
+ u8 src_value, u8 dst_value)
+{
+ int err;
+ u8 selector = dst_value >> 6;
+ dst_value &= 0x3f;
+ src_value &= 0x3f;
+
+ if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
+ return 0;
+
+ switch (selector) {
+ case IB_SA_GT:
+ err = (src_value <= dst_value);
+ break;
+ case IB_SA_LT:
+ err = (src_value >= dst_value);
+ break;
+ case IB_SA_EQ:
+ err = (src_value != dst_value);
+ break;
+ default:
+ err = 0;
+ break;
+ }
+
+ return err;
+}
+
+static u16 cmp_rec(struct ib_sa_mcmember_data *src,
+ struct ib_sa_mcmember_data *dst, ib_sa_comp_mask comp_mask)
+{
+ /* src is group record, dst is request record */
+ /* MGID must already match */
+ /* Port_GID we always replace to our Port_GID, so it is a match */
+
+#define MAD_STATUS_REQ_INVALID 0x0200
+ if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
+ return MAD_STATUS_REQ_INVALID;
+ if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
+ IB_SA_MCMEMBER_REC_MTU,
+ src->mtusel_mtu, dst->mtusel_mtu))
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
+ src->tclass != dst->tclass)
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
+ return MAD_STATUS_REQ_INVALID;
+ if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
+ IB_SA_MCMEMBER_REC_RATE,
+ src->ratesel_rate, dst->ratesel_rate))
+ return MAD_STATUS_REQ_INVALID;
+ if (check_selector(comp_mask,
+ IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
+ IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
+ src->lifetmsel_lifetm, dst->lifetmsel_lifetm))
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_SL &&
+ (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0xf0000000) !=
+ (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0xf0000000))
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
+ (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x0fffff00) !=
+ (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x0fffff00))
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
+ (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x000000ff) !=
+ (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x000000ff))
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE &&
+ (src->scope_join_state & 0xf0) !=
+ (dst->scope_join_state & 0xf0))
+ return MAD_STATUS_REQ_INVALID;
+
+ /* join_state checked separately, proxy_join ignored */
+
+ return 0;
+}
+
+/* release group, return 1 if this was last release and group is destroyed
+ * timout work is canceled sync */
+static int release_group(struct mcast_group *group, int from_timeout_handler)
+{
+ struct mlx4_ib_demux_ctx *ctx = group->demux;
+ int nzgroup;
+
+ mutex_lock(&ctx->mcg_table_lock);
+ mutex_lock(&group->lock);
+ if (atomic_dec_and_test(&group->refcount)) {
+ if (!from_timeout_handler) {
+ if (group->state != MCAST_IDLE &&
+ !cancel_delayed_work(&group->timeout_work)) {
+ atomic_inc(&group->refcount);
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ return 0;
+ }
+ }
+
+ nzgroup = memcmp(&group->rec.mgid, &mgid0, sizeof mgid0);
+ if (nzgroup)
+ del_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
+ if (!list_empty(&group->pending_list))
+ mcg_warn_group(group, "releasing a group with non empty pending list\n");
+ if (nzgroup)
+ rb_erase(&group->node, &ctx->mcg_table);
+ list_del_init(&group->mgid0_list);
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ kfree(group);
+ return 1;
+ } else {
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ }
+ return 0;
+}
+
+static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
+{
+ int i;
+
+ for (i = 0; i < 3; i++, join_state >>= 1)
+ if (join_state & 0x1)
+ group->members[i] += inc;
+}
+
+static u8 get_leave_state(struct mcast_group *group)
+{
+ u8 leave_state = 0;
+ int i;
+
+ for (i = 0; i < 3; i++)
+ if (!group->members[i])
+ leave_state |= (1 << i);
+
+ return leave_state & (group->rec.scope_join_state & 7);
+}
+
+static int join_group(struct mcast_group *group, int slave, u8 join_mask)
+{
+ int ret = 0;
+ u8 join_state;
+
+ /* remove bits that slave is already member of, and adjust */
+ join_state = join_mask & (~group->func[slave].join_state);
+ adjust_membership(group, join_state, 1);
+ group->func[slave].join_state |= join_state;
+ if (group->func[slave].state != MCAST_MEMBER && join_state) {
+ group->func[slave].state = MCAST_MEMBER;
+ ret = 1;
+ }
+ return ret;
+}
+
+static int leave_group(struct mcast_group *group, int slave, u8 leave_state)
+{
+ int ret = 0;
+
+ adjust_membership(group, leave_state, -1);
+ group->func[slave].join_state &= ~leave_state;
+ if (!group->func[slave].join_state) {
+ group->func[slave].state = MCAST_NOT_MEMBER;
+ ret = 1;
+ }
+ return ret;
+}
+
+static int check_leave(struct mcast_group *group, int slave, u8 leave_mask)
+{
+ if (group->func[slave].state != MCAST_MEMBER)
+ return MAD_STATUS_REQ_INVALID;
+
+ /* make sure we're not deleting unset bits */
+ if (~group->func[slave].join_state & leave_mask)
+ return MAD_STATUS_REQ_INVALID;
+
+ if (!leave_mask)
+ return MAD_STATUS_REQ_INVALID;
+
+ return 0;
+}
+
+static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ struct mcast_group *group;
+ struct mcast_req *req = NULL;
+
+ group = container_of(delay, typeof(*group), timeout_work);
+
+ mutex_lock(&group->lock);
+ if (group->state == MCAST_JOIN_SENT) {
+ if (!list_empty(&group->pending_list)) {
+ req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ --group->func[req->func].num_pend_reqs;
+ mutex_unlock(&group->lock);
+ kfree(req);
+ if (memcmp(&group->rec.mgid, &mgid0, sizeof mgid0)) {
+ if (release_group(group, 1))
+ return;
+ } else {
+ kfree(group);
+ return;
+ }
+ mutex_lock(&group->lock);
+ } else
+ mcg_warn_group(group, "DRIVER BUG\n");
+ } else if (group->state == MCAST_LEAVE_SENT) {
+ if (group->rec.scope_join_state & 7)
+ group->rec.scope_join_state &= 0xf8;
+ group->state = MCAST_IDLE;
+ mutex_unlock(&group->lock);
+ if (release_group(group, 1))
+ return;
+ mutex_lock(&group->lock);
+ } else
+ mcg_warn_group(group, "invalid state %s\n", get_state_string(group->state));
+ group->state = MCAST_IDLE;
+ atomic_inc(&group->refcount);
+ queue_work(group->demux->mcg_wq, &group->work);
+ safe_atomic_dec(&group->refcount);
+
+ mutex_unlock(&group->lock);
+}
+
+static int handle_leave_req(struct mcast_group *group, u8 leave_mask,
+ struct mcast_req *req)
+{
+ u16 status;
+
+ if (req->clean)
+ leave_mask = group->func[req->func].join_state;
+
+ status = check_leave(group, req->func, leave_mask);
+ if (!status)
+ leave_group(group, req->func, leave_mask);
+
+ if (!req->clean)
+ send_reply_to_slave(req->func, group, &req->sa_mad, status);
+ --group->func[req->func].num_pend_reqs;
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ return 1;
+}
+
+static int handle_join_req(struct mcast_group *group, u8 join_mask,
+ struct mcast_req *req)
+{
+ u8 group_join_state = group->rec.scope_join_state & 7;
+ int ref = 0;
+ u16 status;
+ struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
+
+ if (join_mask == (group_join_state & join_mask)) {
+ /* port's membership need not change */
+ status = cmp_rec(&group->rec, sa_data, req->sa_mad.sa_hdr.comp_mask);
+ if (!status)
+ join_group(group, req->func, join_mask);
+
+ --group->func[req->func].num_pend_reqs;
+ send_reply_to_slave(req->func, group, &req->sa_mad, status);
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ ++ref;
+ } else {
+ /* port's membership needs to be updated */
+ group->prev_state = group->state;
+ if (send_join_to_wire(group, &req->sa_mad)) {
+ --group->func[req->func].num_pend_reqs;
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ ref = 1;
+ group->state = group->prev_state;
+ } else
+ group->state = MCAST_JOIN_SENT;
+ }
+
+ return ref;
+}
+
+static void mlx4_ib_mcg_work_handler(struct work_struct *work)
+{
+ struct mcast_group *group;
+ struct mcast_req *req = NULL;
+ struct ib_sa_mcmember_data *sa_data;
+ u8 req_join_state;
+ int rc = 1; /* release_count - this is for the scheduled work */
+ u16 status;
+ u8 method;
+
+ group = container_of(work, typeof(*group), work);
+
+ mutex_lock(&group->lock);
+
+ /* First, let's see if a response from SM is waiting regarding this group.
+ * If so, we need to update the group's REC. If this is a bad response, we
+ * may need to send a bad response to a VF waiting for it. If VF is waiting
+ * and this is a good response, the VF will be answered later in this func. */
+ if (group->state == MCAST_RESP_READY) {
+ /* cancels mlx4_ib_mcg_timeout_handler */
+ cancel_delayed_work(&group->timeout_work);
+ status = be16_to_cpu(group->response_sa_mad.mad_hdr.status);
+ method = group->response_sa_mad.mad_hdr.method;
+ if (group->last_req_tid != group->response_sa_mad.mad_hdr.tid) {
+ mcg_warn_group(group, "Got MAD response to existing MGID but wrong TID, dropping. Resp TID=%llx, group TID=%llx\n",
+ (long long unsigned int)be64_to_cpu(group->response_sa_mad.mad_hdr.tid),
+ (long long unsigned int)be64_to_cpu(group->last_req_tid));
+ group->state = group->prev_state;
+ goto process_requests;
+ }
+ if (status) {
+ if (!list_empty(&group->pending_list))
+ req = list_first_entry(&group->pending_list,
+ struct mcast_req, group_list);
+ if ((method == IB_MGMT_METHOD_GET_RESP)) {
+ if (req) {
+ send_reply_to_slave(req->func, group, &req->sa_mad, status);
+ --group->func[req->func].num_pend_reqs;
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ ++rc;
+ } else
+ mcg_warn_group(group, "no request for failed join\n");
+ } else if (method == IB_SA_METHOD_DELETE_RESP && group->demux->flushing)
+ ++rc;
+ } else {
+ u8 resp_join_state;
+ u8 cur_join_state;
+
+ resp_join_state = ((struct ib_sa_mcmember_data *)
+ group->response_sa_mad.data)->scope_join_state & 7;
+ cur_join_state = group->rec.scope_join_state & 7;
+
+ if (method == IB_MGMT_METHOD_GET_RESP) {
+ /* successfull join */
+ if (!cur_join_state && resp_join_state)
+ --rc;
+ } else if (!resp_join_state)
+ ++rc;
+ memcpy(&group->rec, group->response_sa_mad.data, sizeof group->rec);
+ }
+ group->state = MCAST_IDLE;
+ }
+
+process_requests:
+ /* We should now go over pending join/leave requests, as long as we are idle. */
+ while (!list_empty(&group->pending_list) && group->state == MCAST_IDLE) {
+ req = list_first_entry(&group->pending_list, struct mcast_req,
+ group_list);
+ sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
+ req_join_state = sa_data->scope_join_state & 0x7;
+
+ /* For a leave request, we will immediately answer the VF, and
+ * update our internal counters. The actual leave will be sent
+ * to SM later, if at all needed. We dequeue the request now. */
+ if (req->sa_mad.mad_hdr.method == IB_SA_METHOD_DELETE)
+ rc += handle_leave_req(group, req_join_state, req);
+ else
+ rc += handle_join_req(group, req_join_state, req);
+ }
+
+ /* Handle leaves */
+ if (group->state == MCAST_IDLE) {
+ req_join_state = get_leave_state(group);
+ if (req_join_state) {
+ group->rec.scope_join_state &= ~req_join_state;
+ group->prev_state = group->state;
+ if (send_leave_to_wire(group, req_join_state)) {
+ group->state = group->prev_state;
+ ++rc;
+ } else
+ group->state = MCAST_LEAVE_SENT;
+ }
+ }
+
+ if (!list_empty(&group->pending_list) && group->state == MCAST_IDLE)
+ goto process_requests;
+ mutex_unlock(&group->lock);
+
+ while (rc--)
+ release_group(group, 0);
+}
+
+static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx *ctx,
+ __be64 tid,
+ union ib_gid *new_mgid)
+{
+ struct mcast_group *group = NULL, *cur_group;
+ struct mcast_req *req;
+ struct list_head *pos;
+ struct list_head *n;
+
+ mutex_lock(&ctx->mcg_table_lock);
+ list_for_each_safe(pos, n, &ctx->mcg_mgid0_list) {
+ group = list_entry(pos, struct mcast_group, mgid0_list);
+ mutex_lock(&group->lock);
+ if (group->last_req_tid == tid) {
+ if (memcmp(new_mgid, &mgid0, sizeof mgid0)) {
+ group->rec.mgid = *new_mgid;
+ sprintf(group->name, "%016llx%016llx",
+ (long long unsigned int)be64_to_cpu(group->rec.mgid.global.subnet_prefix),
+ (long long unsigned int)be64_to_cpu(group->rec.mgid.global.interface_id));
+ list_del_init(&group->mgid0_list);
+ cur_group = mcast_insert(ctx, group);
+ if (cur_group) {
+ /* A race between our code and SM. Silently cleaning the new one */
+ req = list_first_entry(&group->pending_list,
+ struct mcast_req, group_list);
+ --group->func[req->func].num_pend_reqs;
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ release_group(group, 0);
+ return NULL;
+ }
+
+ atomic_inc(&group->refcount);
+ add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ return group;
+ } else {
+ struct mcast_req *tmp1, *tmp2;
+
+ list_del(&group->mgid0_list);
+ if (!list_empty(&group->pending_list) && group->state != MCAST_IDLE)
+ cancel_delayed_work_sync(&group->timeout_work);
+
+ list_for_each_entry_safe(tmp1, tmp2, &group->pending_list, group_list) {
+ list_del(&tmp1->group_list);
+ kfree(tmp1);
+ }
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ kfree(group);
+ return NULL;
+ }
+ }
+ mutex_unlock(&group->lock);
+ }
+ mutex_unlock(&ctx->mcg_table_lock);
+
+ return NULL;
+}
+
+static ssize_t sysfs_show_group(struct device *dev,
+ struct device_attribute *attr, char *buf);
+
+static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx,
+ union ib_gid *mgid, int create,
+ gfp_t gfp_mask)
+{
+ struct mcast_group *group, *cur_group;
+ int is_mgid0;
+ int i;
+
+ is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
+ if (!is_mgid0) {
+ group = mcast_find(ctx, mgid);
+ if (group)
+ goto found;
+ }
+
+ if (!create)
+ return ERR_PTR(-ENOENT);
+
+ group = kzalloc(sizeof *group, gfp_mask);
+ if (!group)
+ return ERR_PTR(-ENOMEM);
+
+ group->demux = ctx;
+ group->rec.mgid = *mgid;
+ INIT_LIST_HEAD(&group->pending_list);
+ INIT_LIST_HEAD(&group->mgid0_list);
+ for (i = 0; i < MAX_VFS; ++i)
+ INIT_LIST_HEAD(&group->func[i].pending);
+ INIT_WORK(&group->work, mlx4_ib_mcg_work_handler);
+ INIT_DELAYED_WORK(&group->timeout_work, mlx4_ib_mcg_timeout_handler);
+ mutex_init(&group->lock);
+ sprintf(group->name, "%016llx%016llx",
+ (long long unsigned int)be64_to_cpu(group->rec.mgid.global.subnet_prefix),
+ (long long unsigned int)be64_to_cpu(group->rec.mgid.global.interface_id));
+ sysfs_attr_init(&group->dentry.attr);
+ group->dentry.show = sysfs_show_group;
+ group->dentry.store = NULL;
+ group->dentry.attr.name = group->name;
+ group->dentry.attr.mode = 0400;
+ group->state = MCAST_IDLE;
+
+ if (is_mgid0) {
+ list_add(&group->mgid0_list, &ctx->mcg_mgid0_list);
+ goto found;
+ }
+
+ cur_group = mcast_insert(ctx, group);
+ if (cur_group) {
+ mcg_warn("group just showed up %s - confused\n", cur_group->name);
+ kfree(group);
+ return ERR_PTR(-EINVAL);
+ }
+
+ add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
+
+found:
+ atomic_inc(&group->refcount);
+ return group;
+}
+
+static void queue_req(struct mcast_req *req)
+{
+ struct mcast_group *group = req->group;
+
+ atomic_inc(&group->refcount); /* for the request */
+ atomic_inc(&group->refcount); /* for scheduling the work */
+ list_add_tail(&req->group_list, &group->pending_list);
+ list_add_tail(&req->func_list, &group->func[req->func].pending);
+ /* calls mlx4_ib_mcg_work_handler */
+ queue_work(group->demux->mcg_wq, &group->work);
+ safe_atomic_dec(&group->refcount);
+}
+
+int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
+ struct ib_sa_mad *mad)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)mad->data;
+ struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
+ struct mcast_group *group;
+
+ switch (mad->mad_hdr.method) {
+ case IB_MGMT_METHOD_GET_RESP:
+ case IB_SA_METHOD_DELETE_RESP:
+ mutex_lock(&ctx->mcg_table_lock);
+ group = acquire_group(ctx, &rec->mgid, 0, GFP_KERNEL);
+ mutex_unlock(&ctx->mcg_table_lock);
+ if (IS_ERR(group)) {
+ if (mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP) {
+ __be64 tid = mad->mad_hdr.tid;
+ *(u8 *)(&tid) = (u8)slave; /* in group we kept the modified TID */
+ group = search_relocate_mgid0_group(ctx, tid, &rec->mgid);
+ } else
+ group = NULL;
+ }
+
+ if (!group)
+ return 1;
+
+ mutex_lock(&group->lock);
+ group->response_sa_mad = *mad;
+ group->prev_state = group->state;
+ group->state = MCAST_RESP_READY;
+ /* calls mlx4_ib_mcg_work_handler */
+ atomic_inc(&group->refcount);
+ queue_work(ctx->mcg_wq, &group->work);
+ safe_atomic_dec(&group->refcount);
+ mutex_unlock(&group->lock);
+ release_group(group, 0);
+ return 1; /* consumed */
+ case IB_MGMT_METHOD_SET:
+ case IB_SA_METHOD_GET_TABLE:
+ case IB_SA_METHOD_GET_TABLE_RESP:
+ case IB_SA_METHOD_DELETE:
+ return 0; /* not consumed, pass-through to guest over tunnel */
+ default:
+ mcg_warn("In demux, port %d: unexpected MCMember method: 0x%x, dropping\n",
+ port, mad->mad_hdr.method);
+ return 1; /* consumed */
+ }
+}
+
+int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
+ int slave, struct ib_sa_mad *sa_mad)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)sa_mad->data;
+ struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
+ struct mcast_group *group;
+ struct mcast_req *req;
+ int may_create = 0;
+
+ if (ctx->flushing)
+ return -EAGAIN;
+
+ switch (sa_mad->mad_hdr.method) {
+ case IB_MGMT_METHOD_SET:
+ may_create = 1;
+ case IB_SA_METHOD_DELETE:
+ req = kzalloc(sizeof *req, GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ req->func = slave;
+ req->sa_mad = *sa_mad;
+
+ mutex_lock(&ctx->mcg_table_lock);
+ group = acquire_group(ctx, &rec->mgid, may_create, GFP_KERNEL);
+ mutex_unlock(&ctx->mcg_table_lock);
+ if (IS_ERR(group)) {
+ kfree(req);
+ return PTR_ERR(group);
+ }
+ mutex_lock(&group->lock);
+ if (group->func[slave].num_pend_reqs > MAX_PEND_REQS_PER_FUNC) {
+ mutex_unlock(&group->lock);
+ mcg_warn_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n",
+ port, slave, MAX_PEND_REQS_PER_FUNC);
+ release_group(group, 0);
+ kfree(req);
+ return -ENOMEM;
+ }
+ ++group->func[slave].num_pend_reqs;
+ req->group = group;
+ queue_req(req);
+ mutex_unlock(&group->lock);
+ release_group(group, 0);
+ return 1; /* consumed */
+ case IB_SA_METHOD_GET_TABLE:
+ case IB_MGMT_METHOD_GET_RESP:
+ case IB_SA_METHOD_GET_TABLE_RESP:
+ case IB_SA_METHOD_DELETE_RESP:
+ return 0; /* not consumed, pass-through */
+ default:
+ mcg_warn("In multiplex, port %d, func %d: unexpected MCMember method: 0x%x, dropping\n",
+ port, slave, sa_mad->mad_hdr.method);
+ return 1; /* consumed */
+ }
+}
+
+static ssize_t sysfs_show_group(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct mcast_group *group =
+ container_of(attr, struct mcast_group, dentry);
+ struct mcast_req *req = NULL;
+ char pending_str[40];
+ char state_str[40];
+ ssize_t len = 0;
+ int f;
+
+ if (group->state == MCAST_IDLE)
+ sprintf(state_str, "%s", get_state_string(group->state));
+ else
+ sprintf(state_str, "%s(TID=0x%llx)",
+ get_state_string(group->state),
+ (long long unsigned int)be64_to_cpu(group->last_req_tid));
+ if (list_empty(&group->pending_list)) {
+ sprintf(pending_str, "No");
+ } else {
+ req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
+ sprintf(pending_str, "Yes(TID=0x%llx)",
+ (long long unsigned int)be64_to_cpu(req->sa_mad.mad_hdr.tid));
+ }
+ len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ",
+ group->rec.scope_join_state & 0xf,
+ group->members[2], group->members[1], group->members[0],
+ atomic_read(&group->refcount),
+ pending_str,
+ state_str);
+ for (f = 0; f < MAX_VFS; ++f)
+ if (group->func[f].state == MCAST_MEMBER)
+ len += sprintf(buf + len, "%d[%1x] ",
+ f, group->func[f].join_state);
+
+ len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x "
+ "%4x %4x %2x %2x)\n",
+ be16_to_cpu(group->rec.pkey),
+ be32_to_cpu(group->rec.qkey),
+ (group->rec.mtusel_mtu & 0xc0) >> 6,
+ group->rec.mtusel_mtu & 0x3f,
+ group->rec.tclass,
+ (group->rec.ratesel_rate & 0xc0) >> 6,
+ group->rec.ratesel_rate & 0x3f,
+ (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28,
+ (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8,
+ be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff,
+ group->rec.proxy_join);
+
+ return len;
+}
+
+int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx)
+{
+ char name[20];
+
+ atomic_set(&ctx->tid, 0);
+ sprintf(name, "mlx4_ib_mcg%d", ctx->port);
+ ctx->mcg_wq = create_singlethread_workqueue(name);
+ if (!ctx->mcg_wq)
+ return -ENOMEM;
+
+ mutex_init(&ctx->mcg_table_lock);
+ ctx->mcg_table = RB_ROOT;
+ INIT_LIST_HEAD(&ctx->mcg_mgid0_list);
+ ctx->flushing = 0;
+
+ return 0;
+}
+
+static void force_clean_group(struct mcast_group *group)
+{
+ struct mcast_req *req, *tmp
+ ;
+ list_for_each_entry_safe(req, tmp, &group->pending_list, group_list) {
+ list_del(&req->group_list);
+ kfree(req);
+ }
+ del_sysfs_port_mcg_attr(group->demux->dev, group->demux->port, &group->dentry.attr);
+ rb_erase(&group->node, &group->demux->mcg_table);
+ kfree(group);
+}
+
+static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
+{
+ int i;
+ struct rb_node *p;
+ struct mcast_group *group;
+ unsigned long end;
+ int count;
+
+ for (i = 0; i < MAX_VFS; ++i)
+ clean_vf_mcast(ctx, i);
+
+ end = jiffies + msecs_to_jiffies(MAD_TIMEOUT_MS + 3000);
+ do {
+ count = 0;
+ mutex_lock(&ctx->mcg_table_lock);
+ for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p))
+ ++count;
+ mutex_unlock(&ctx->mcg_table_lock);
+ if (!count)
+ break;
+
+ msleep(1);
+ } while (time_after(end, jiffies));
+
+ flush_workqueue(ctx->mcg_wq);
+ if (destroy_wq)
+ destroy_workqueue(ctx->mcg_wq);
+
+ mutex_lock(&ctx->mcg_table_lock);
+ while ((p = rb_first(&ctx->mcg_table)) != NULL) {
+ group = rb_entry(p, struct mcast_group, node);
+ if (atomic_read(&group->refcount))
+ mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group);
+
+ force_clean_group(group);
+ }
+ mutex_unlock(&ctx->mcg_table_lock);
+}
+
+struct clean_work {
+ struct work_struct work;
+ struct mlx4_ib_demux_ctx *ctx;
+ int destroy_wq;
+};
+
+static void mcg_clean_task(struct work_struct *work)
+{
+ struct clean_work *cw = container_of(work, struct clean_work, work);
+
+ _mlx4_ib_mcg_port_cleanup(cw->ctx, cw->destroy_wq);
+ cw->ctx->flushing = 0;
+ kfree(cw);
+}
+
+void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
+{
+ struct clean_work *work;
+
+ if (ctx->flushing)
+ return;
+
+ ctx->flushing = 1;
+
+ if (destroy_wq) {
+ _mlx4_ib_mcg_port_cleanup(ctx, destroy_wq);
+ ctx->flushing = 0;
+ return;
+ }
+
+ work = kmalloc(sizeof *work, GFP_KERNEL);
+ if (!work) {
+ ctx->flushing = 0;
+ mcg_warn("failed allocating work for cleanup\n");
+ return;
+ }
+
+ work->ctx = ctx;
+ work->destroy_wq = destroy_wq;
+ INIT_WORK(&work->work, mcg_clean_task);
+ queue_work(clean_wq, &work->work);
+}
+
+static void build_leave_mad(struct mcast_req *req)
+{
+ struct ib_sa_mad *mad = &req->sa_mad;
+
+ mad->mad_hdr.method = IB_SA_METHOD_DELETE;
+}
+
+
+static void clear_pending_reqs(struct mcast_group *group, int vf)
+{
+ struct mcast_req *req, *tmp, *group_first = NULL;
+ int clear;
+ int pend = 0;
+
+ if (!list_empty(&group->pending_list))
+ group_first = list_first_entry(&group->pending_list, struct mcast_req, group_list);
+
+ list_for_each_entry_safe(req, tmp, &group->func[vf].pending, func_list) {
+ clear = 1;
+ if (group_first == req &&
+ (group->state == MCAST_JOIN_SENT ||
+ group->state == MCAST_LEAVE_SENT)) {
+ clear = cancel_delayed_work(&group->timeout_work);
+ pend = !clear;
+ group->state = MCAST_IDLE;
+ }
+ if (clear) {
+ --group->func[vf].num_pend_reqs;
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ atomic_dec(&group->refcount);
+ }
+ }
+
+ if (!pend && (!list_empty(&group->func[vf].pending) || group->func[vf].num_pend_reqs)) {
+ mcg_warn_group(group, "DRIVER BUG: list_empty %d, num_pend_reqs %d\n",
+ list_empty(&group->func[vf].pending), group->func[vf].num_pend_reqs);
+ }
+}
+
+static int push_deleteing_req(struct mcast_group *group, int slave)
+{
+ struct mcast_req *req;
+ struct mcast_req *pend_req;
+
+ if (!group->func[slave].join_state)
+ return 0;
+
+ req = kzalloc(sizeof *req, GFP_KERNEL);
+ if (!req) {
+ mcg_warn_group(group, "failed allocation - may leave stall groups\n");
+ return -ENOMEM;
+ }
+
+ if (!list_empty(&group->func[slave].pending)) {
+ pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list);
+ if (pend_req->clean) {
+ kfree(req);
+ return 0;
+ }
+ }
+
+ req->clean = 1;
+ req->func = slave;
+ req->group = group;
+ ++group->func[slave].num_pend_reqs;
+ build_leave_mad(req);
+ queue_req(req);
+ return 0;
+}
+
+void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave)
+{
+ struct mcast_group *group;
+ struct rb_node *p;
+
+ mutex_lock(&ctx->mcg_table_lock);
+ for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p)) {
+ group = rb_entry(p, struct mcast_group, node);
+ mutex_lock(&group->lock);
+ if (atomic_read(&group->refcount)) {
+ /* clear pending requests of this VF */
+ clear_pending_reqs(group, slave);
+ push_deleteing_req(group, slave);
+ }
+ mutex_unlock(&group->lock);
+ }
+ mutex_unlock(&ctx->mcg_table_lock);
+}
+
+
+int mlx4_ib_mcg_init(void)
+{
+ clean_wq = create_singlethread_workqueue("mlx4_ib_mcg");
+ if (!clean_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx4_ib_mcg_destroy(void)
+{
+ destroy_workqueue(clean_wq);
+}
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h b/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h
index b8f6996..ffd2936 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -37,38 +37,50 @@
#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/mutex.h>
+#include <linux/idr.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_sa.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
-
+#include <linux/rbtree.h>
#define MLX4_IB_DRV_NAME "mlx4_ib"
-#ifdef CONFIG_MLX4_DEBUG
-extern int mlx4_ib_debug_level;
-
-#define mlx4_ib_dbg(format, arg...) \
- do { \
- if (mlx4_ib_debug_level) \
- printk(KERN_DEBUG "<" MLX4_IB_DRV_NAME "> %s: " format "\n",\
- __func__, ## arg); \
- } while (0)
-
-#else /* CONFIG_MLX4_DEBUG */
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+#define pr_fmt(fmt) "<" MLX4_IB_DRV_NAME "> %s: " fmt, __func__
-#define mlx4_ib_dbg(format, arg...) do {} while (0)
+#define mlx4_ib_warn(ibdev, format, arg...) \
+ dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg)
-#endif /* CONFIG_MLX4_DEBUG */
+#define mlx4_ib_info(ibdev, format, arg...) \
+ dev_info((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg)
enum {
- MLX4_IB_SQ_MIN_WQE_SHIFT = 6
+ MLX4_IB_SQ_MIN_WQE_SHIFT = 6,
+ MLX4_IB_MAX_HEADROOM = 2048
};
-#define MLX4_IB_SQ_HEADROOM(shift) ((2048 >> (shift)) + 1)
-#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT))
+#define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1)
+#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT))
+
+/*module param to indicate if SM assigns the alias_GUID*/
+extern int mlx4_ib_sm_guid_assign;
+#ifdef __linux__
+extern struct proc_dir_entry *mlx4_mrs_dir_entry;
+#endif
+
+#define MLX4_IB_UC_STEER_QPN_ALIGN 1
+#define MLX4_IB_UC_MAX_NUM_QPS (256 * 1024)
+
+
+#define MLX4_IB_MMAP_CMD_MASK 0xFF
+#define MLX4_IB_MMAP_CMD_BITS 8
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
@@ -83,15 +95,16 @@ struct mlx4_ib_pd {
};
struct mlx4_ib_xrcd {
- struct ib_xrcd ibxrcd;
- u32 xrcdn;
- struct ib_pd *pd;
- struct ib_cq *cq;
+ struct ib_xrcd ibxrcd;
+ u32 xrcdn;
+ struct ib_pd *pd;
+ struct ib_cq *cq;
};
struct mlx4_ib_cq_buf {
struct mlx4_buf buf;
struct mlx4_mtt mtt;
+ int entry_size;
};
struct mlx4_ib_cq_resize {
@@ -99,6 +112,11 @@ struct mlx4_ib_cq_resize {
int cqe;
};
+struct mlx4_shared_mr_info {
+ int mr_id;
+ struct ib_umem *umem;
+};
+
struct mlx4_ib_cq {
struct ib_cq ibcq;
struct mlx4_cq mcq;
@@ -115,6 +133,7 @@ struct mlx4_ib_mr {
struct ib_mr ibmr;
struct mlx4_mr mmr;
struct ib_umem *umem;
+ struct mlx4_shared_mr_info *smr_info;
};
struct mlx4_ib_fast_reg_page_list {
@@ -141,18 +160,127 @@ struct mlx4_ib_wq {
};
enum mlx4_ib_qp_flags {
- MLX4_IB_QP_LSO = 1 << 0,
- MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
- MLX4_IB_XRC_RCV = 1 << 2,
+ MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
+ MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
+ MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
+ MLX4_IB_SRIOV_SQP = 1 << 31,
};
-struct gid_entry {
+struct mlx4_ib_gid_entry {
struct list_head list;
union ib_gid gid;
int added;
u8 port;
};
+enum mlx4_ib_mmap_cmd {
+ MLX4_IB_MMAP_UAR_PAGE = 0,
+ MLX4_IB_MMAP_BLUE_FLAME_PAGE = 1,
+ MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES = 2,
+};
+
+enum mlx4_ib_qp_type {
+ /*
+ * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
+ * here (and in that order) since the MAD layer uses them as
+ * indices into a 2-entry table.
+ */
+ MLX4_IB_QPT_SMI = IB_QPT_SMI,
+ MLX4_IB_QPT_GSI = IB_QPT_GSI,
+
+ MLX4_IB_QPT_RC = IB_QPT_RC,
+ MLX4_IB_QPT_UC = IB_QPT_UC,
+ MLX4_IB_QPT_UD = IB_QPT_UD,
+ MLX4_IB_QPT_RAW_IPV6 = IB_QPT_RAW_IPV6,
+ MLX4_IB_QPT_RAW_ETHERTYPE = IB_QPT_RAW_ETHERTYPE,
+ MLX4_IB_QPT_RAW_PACKET = IB_QPT_RAW_PACKET,
+ MLX4_IB_QPT_XRC_INI = IB_QPT_XRC_INI,
+ MLX4_IB_QPT_XRC_TGT = IB_QPT_XRC_TGT,
+
+ MLX4_IB_QPT_PROXY_SMI_OWNER = 1 << 16,
+ MLX4_IB_QPT_PROXY_SMI = 1 << 17,
+ MLX4_IB_QPT_PROXY_GSI = 1 << 18,
+ MLX4_IB_QPT_TUN_SMI_OWNER = 1 << 19,
+ MLX4_IB_QPT_TUN_SMI = 1 << 20,
+ MLX4_IB_QPT_TUN_GSI = 1 << 21,
+};
+
+#define MLX4_IB_QPT_ANY_SRIOV (MLX4_IB_QPT_PROXY_SMI_OWNER | \
+ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \
+ MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI)
+
+enum mlx4_ib_mad_ifc_flags {
+ MLX4_MAD_IFC_IGNORE_MKEY = 1,
+ MLX4_MAD_IFC_IGNORE_BKEY = 2,
+ MLX4_MAD_IFC_IGNORE_KEYS = (MLX4_MAD_IFC_IGNORE_MKEY |
+ MLX4_MAD_IFC_IGNORE_BKEY),
+ MLX4_MAD_IFC_NET_VIEW = 4,
+};
+
+enum {
+ MLX4_NUM_TUNNEL_BUFS = 256,
+};
+
+struct mlx4_ib_tunnel_header {
+ struct mlx4_av av;
+ __be32 remote_qpn;
+ __be32 qkey;
+ __be16 vlan;
+ u8 mac[6];
+ __be16 pkey_index;
+ u8 reserved[6];
+};
+
+struct mlx4_ib_buf {
+ void *addr;
+ dma_addr_t map;
+};
+
+struct mlx4_rcv_tunnel_hdr {
+ __be32 flags_src_qp; /* flags[6:5] is defined for VLANs:
+ * 0x0 - no vlan was in the packet
+ * 0x01 - C-VLAN was in the packet */
+ u8 g_ml_path; /* gid bit stands for ipv6/4 header in RoCE */
+ u8 reserved;
+ __be16 pkey_index;
+ __be16 sl_vid;
+ __be16 slid_mac_47_32;
+ __be32 mac_31_0;
+};
+
+struct mlx4_ib_proxy_sqp_hdr {
+ struct ib_grh grh;
+ struct mlx4_rcv_tunnel_hdr tun;
+} __packed;
+
+struct mlx4_roce_smac_vlan_info {
+ u64 smac;
+ int smac_index;
+ int smac_port;
+ u64 candidate_smac;
+ int candidate_smac_index;
+ int candidate_smac_port;
+ u16 vid;
+ int vlan_index;
+ int vlan_port;
+ u16 candidate_vid;
+ int candidate_vlan_index;
+ int candidate_vlan_port;
+ int update_vid;
+};
+
+struct mlx4_ib_qpg_data {
+ unsigned long *tss_bitmap;
+ unsigned long *rss_bitmap;
+ struct mlx4_ib_qp *qpg_parent;
+ int tss_qpn_base;
+ int rss_qpn_base;
+ u32 tss_child_count;
+ u32 rss_child_count;
+ u32 qpg_tss_mask_sz;
+};
+
struct mlx4_ib_qp {
struct ib_qp ibqp;
struct mlx4_qp mqp;
@@ -168,14 +296,13 @@ struct mlx4_ib_qp {
int sq_spare_wqes;
struct mlx4_ib_wq sq;
+ enum mlx4_ib_qp_type mlx4_ib_qp_type;
struct ib_umem *umem;
struct mlx4_mtt mtt;
int buf_size;
struct mutex mutex;
- u32 flags;
- struct list_head xrc_reg_list;
- spinlock_t xrc_reg_list_lock;
u16 xrcdn;
+ u32 flags;
u8 port;
u8 alt_port;
u8 atomic_rd_en;
@@ -183,9 +310,16 @@ struct mlx4_ib_qp {
u8 sq_no_prefetch;
u8 state;
int mlx_type;
+ enum ib_qpg_type qpg_type;
+ struct mlx4_ib_qpg_data *qpg_data;
struct list_head gid_list;
- int max_inline_data;
- struct mlx4_bf bf;
+ struct list_head steering_rules;
+ struct mlx4_ib_buf *sqp_proxy_rcv;
+ struct mlx4_roce_smac_vlan_info pri;
+ struct mlx4_roce_smac_vlan_info alt;
+ struct list_head rules_list;
+ int max_inline_data;
+ struct mlx4_bf bf;
};
struct mlx4_ib_srq {
@@ -208,6 +342,138 @@ struct mlx4_ib_ah {
union mlx4_ext_av av;
};
+/****************************************/
+/* alias guid support */
+/****************************************/
+#define NUM_PORT_ALIAS_GUID 2
+#define NUM_ALIAS_GUID_IN_REC 8
+#define NUM_ALIAS_GUID_REC_IN_PORT 16
+#define GUID_REC_SIZE 8
+#define NUM_ALIAS_GUID_PER_PORT 128
+#define MLX4_NOT_SET_GUID (0x00LL)
+#define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL))
+
+enum mlx4_guid_alias_rec_status {
+ MLX4_GUID_INFO_STATUS_IDLE,
+ MLX4_GUID_INFO_STATUS_SET,
+ MLX4_GUID_INFO_STATUS_PENDING,
+};
+
+enum mlx4_guid_alias_rec_ownership {
+ MLX4_GUID_DRIVER_ASSIGN,
+ MLX4_GUID_SYSADMIN_ASSIGN,
+ MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
+};
+
+enum mlx4_guid_alias_rec_method {
+ MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET,
+ MLX4_GUID_INFO_RECORD_DELETE = IB_SA_METHOD_DELETE,
+};
+
+struct mlx4_sriov_alias_guid_info_rec_det {
+ u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
+ ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
+ enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
+ u8 method; /*set or delete*/
+ enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/
+};
+
+struct mlx4_sriov_alias_guid_port_rec_det {
+ struct mlx4_sriov_alias_guid_info_rec_det all_rec_per_port[NUM_ALIAS_GUID_REC_IN_PORT];
+ struct workqueue_struct *wq;
+ struct delayed_work alias_guid_work;
+ u8 port;
+ struct mlx4_sriov_alias_guid *parent;
+ struct list_head cb_list;
+};
+
+struct mlx4_sriov_alias_guid {
+ struct mlx4_sriov_alias_guid_port_rec_det ports_guid[MLX4_MAX_PORTS];
+ spinlock_t ag_work_lock;
+ struct ib_sa_client *sa_client;
+};
+
+struct mlx4_ib_demux_work {
+ struct work_struct work;
+ struct mlx4_ib_dev *dev;
+ int slave;
+ int do_init;
+ u8 port;
+
+};
+
+struct mlx4_ib_tun_tx_buf {
+ struct mlx4_ib_buf buf;
+ struct ib_ah *ah;
+};
+
+struct mlx4_ib_demux_pv_qp {
+ struct ib_qp *qp;
+ enum ib_qp_type proxy_qpt;
+ struct mlx4_ib_buf *ring;
+ struct mlx4_ib_tun_tx_buf *tx_ring;
+ spinlock_t tx_lock;
+ unsigned tx_ix_head;
+ unsigned tx_ix_tail;
+};
+
+enum mlx4_ib_demux_pv_state {
+ DEMUX_PV_STATE_DOWN,
+ DEMUX_PV_STATE_STARTING,
+ DEMUX_PV_STATE_ACTIVE,
+ DEMUX_PV_STATE_DOWNING,
+};
+
+struct mlx4_ib_demux_pv_ctx {
+ int port;
+ int slave;
+ enum mlx4_ib_demux_pv_state state;
+ int has_smi;
+ struct ib_device *ib_dev;
+ struct ib_cq *cq;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ struct work_struct work;
+ struct workqueue_struct *wq;
+ struct mlx4_ib_demux_pv_qp qp[2];
+};
+
+struct mlx4_ib_demux_ctx {
+ struct ib_device *ib_dev;
+ int port;
+ struct workqueue_struct *wq;
+ struct workqueue_struct *ud_wq;
+ spinlock_t ud_lock;
+ __be64 subnet_prefix;
+ __be64 guid_cache[128];
+ struct mlx4_ib_dev *dev;
+ /* the following lock protects both mcg_table and mcg_mgid0_list */
+ struct mutex mcg_table_lock;
+ struct rb_root mcg_table;
+ struct list_head mcg_mgid0_list;
+ struct workqueue_struct *mcg_wq;
+ struct mlx4_ib_demux_pv_ctx **tun;
+ atomic_t tid;
+ int flushing; /* flushing the work queue */
+};
+
+struct mlx4_ib_sriov {
+ struct mlx4_ib_demux_ctx demux[MLX4_MAX_PORTS];
+ struct mlx4_ib_demux_pv_ctx *sqps[MLX4_MAX_PORTS];
+ /* when using this spinlock you should use "irq" because
+ * it may be called from interrupt context.*/
+ spinlock_t going_down_lock;
+ int is_going_down;
+
+ struct mlx4_sriov_alias_guid alias_guid;
+
+ /* CM paravirtualization fields */
+ struct list_head cm_list;
+ spinlock_t id_map_lock;
+ struct rb_root sl_id_map;
+ struct idr pv_id_table;
+};
+
struct mlx4_ib_iboe {
spinlock_t lock;
struct net_device *netdevs[MLX4_MAX_PORTS];
@@ -215,6 +481,42 @@ struct mlx4_ib_iboe {
union ib_gid gid_table[MLX4_MAX_PORTS][128];
};
+struct pkey_mgt {
+ u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
+ u16 phys_pkey_cache[MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
+ struct list_head pkey_port_list[MLX4_MFUNC_MAX];
+ struct kobject *device_parent[MLX4_MFUNC_MAX];
+};
+
+struct mlx4_ib_iov_sysfs_attr {
+ void *ctx;
+ struct kobject *kobj;
+ unsigned long data;
+ u32 entry_num;
+ char name[15];
+ struct device_attribute dentry;
+ struct device *dev;
+};
+
+struct mlx4_ib_iov_sysfs_attr_ar {
+ struct mlx4_ib_iov_sysfs_attr dentries[3 * NUM_ALIAS_GUID_PER_PORT + 1];
+};
+
+struct mlx4_ib_iov_port {
+ char name[100];
+ u8 num;
+ struct mlx4_ib_dev *dev;
+ struct list_head list;
+ struct mlx4_ib_iov_sysfs_attr_ar *dentr_ar;
+ struct ib_port_attr attr;
+ struct kobject *cur_port;
+ struct kobject *admin_alias_parent;
+ struct kobject *gids_parent;
+ struct kobject *pkeys_parent;
+ struct kobject *mcgs_parent;
+ struct mlx4_ib_iov_sysfs_attr mcg_dentry;
+};
+
struct mlx4_ib_dev {
struct ib_device ib_dev;
struct mlx4_dev *dev;
@@ -226,12 +528,35 @@ struct mlx4_ib_dev {
struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2];
struct ib_ah *sm_ah[MLX4_MAX_PORTS];
spinlock_t sm_lock;
+ struct mlx4_ib_sriov sriov;
struct mutex cap_mask_mutex;
- struct mutex xrc_reg_mutex;
- int ib_active;
+ bool ib_active;
struct mlx4_ib_iboe iboe;
int counters[MLX4_MAX_PORTS];
+ int *eq_table;
+ int eq_added;
+ struct kobject *iov_parent;
+ struct kobject *ports_parent;
+ struct kobject *dev_ports_parent[MLX4_MFUNC_MAX];
+ struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS];
+ struct pkey_mgt pkeys;
+ unsigned long *ib_uc_qpns_bitmap;
+ int steer_qpn_count;
+ int steer_qpn_base;
+};
+
+struct ib_event_work {
+ struct work_struct work;
+ struct mlx4_ib_dev *ib_dev;
+ struct mlx4_eqe ib_eqe;
+};
+
+struct mlx4_ib_qp_tunnel_init_attr {
+ struct ib_qp_init_attr init_attr;
+ int slave;
+ enum ib_qp_type proxy_qp_type;
+ u8 port;
};
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
@@ -303,6 +628,9 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
return container_of(ibah, struct mlx4_ib_ah, ibah);
}
+int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
+void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
+
int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
struct mlx4_db *db);
void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db);
@@ -310,9 +638,12 @@ void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db)
struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc);
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem);
+int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
+ u64 start_va,
+ int *num_of_mtts);
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
- struct ib_udata *udata);
+ struct ib_udata *udata, int mr_id);
int mlx4_ib_dereg_mr(struct ib_mr *mr);
struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len);
@@ -322,6 +653,7 @@ void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
+int mlx4_ib_ignore_overrun_cq(struct ib_cq *ibcq);
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
struct ib_ucontext *context,
struct ib_udata *udata);
@@ -338,11 +670,6 @@ int mlx4_ib_destroy_ah(struct ib_ah *ah);
struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata);
-struct ib_srq *mlx4_ib_create_xrc_srq(struct ib_pd *pd,
- struct ib_cq *xrc_cq,
- struct ib_xrcd *xrcd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata);
int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
@@ -364,7 +691,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
-int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad);
int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -379,20 +706,20 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
u64 iova);
int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
-int mlx4_ib_create_xrc_rcv_qp(struct ib_qp_init_attr *init_attr,
- u32 *qp_num);
-int mlx4_ib_modify_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num,
- struct ib_qp_attr *attr, int attr_mask);
-int mlx4_ib_query_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num,
- struct ib_qp_attr *attr, int attr_mask,
- struct ib_qp_init_attr *init_attr);
-int mlx4_ib_reg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num);
-int mlx4_ib_unreg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num);
+int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props, int netw_view);
+int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey, int netw_view);
+int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid, int netw_view);
int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
u8 *mac, int *is_mcast, u8 port);
+int mlx4_ib_query_if_stat(struct mlx4_ib_dev *dev, u32 counter_index,
+ union mlx4_counter *counter, u8 clear);
+
static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;
@@ -403,7 +730,73 @@ static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
return !!(ah->av.ib.g_slid & 0x80);
}
+int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx);
+void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq);
+void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave);
+int mlx4_ib_mcg_init(void);
+void mlx4_ib_mcg_destroy(void);
+
+int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid);
+
+int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, int slave,
+ struct ib_sa_mad *sa_mad);
+int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
+ struct ib_sa_mad *mad);
+
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid);
+void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
+ enum ib_event_type type);
+
+void mlx4_ib_tunnels_update_work(struct work_struct *work);
+
+int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
+ enum ib_qp_type qpt, struct ib_wc *wc,
+ struct ib_grh *grh, struct ib_mad *mad);
+int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
+ enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
+ u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad);
+__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
+
+int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
+ struct ib_mad *mad, int is_eth);
+
+int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
+ struct ib_mad *mad);
+
+void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev);
+void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave_id);
+
+/* alias guid support */
+void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port);
+int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev);
+void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev);
+void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port);
+
+void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
+ int block_num,
+ u8 port_num, u8 *p_data);
+
+void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev,
+ int block_num, u8 port_num,
+ u8 *p_data);
+
+int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
+ struct attribute *attr);
+void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
+ struct attribute *attr);
+ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
+
+int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;
+
+void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device);
+
+__be64 mlx4_ib_gen_node_guid(void);
+
+int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
+void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
+int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+ int is_attach);
+
#endif /* MLX4_IB_H */
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mr.c b/sys/ofed/drivers/infiniband/hw/mlx4/mr.c
index c49b460..24d9520 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/mr.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/mr.c
@@ -31,6 +31,15 @@
* SOFTWARE.
*/
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+
+#ifdef __linux__
+#include <linux/proc_fs.h>
+#include <linux/cred.h>
+#endif
+
#include "mlx4_ib.h"
static u32 convert_access(int acc)
@@ -41,13 +50,67 @@ static u32 convert_access(int acc)
(acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
MLX4_PERM_LOCAL_READ;
}
+#ifdef __linux__
+static ssize_t shared_mr_proc_read(struct file *file,
+ char __user *buffer,
+ size_t len,
+ loff_t *offset)
+{
+
+ return -ENOSYS;
+
+}
+
+static ssize_t shared_mr_proc_write(struct file *file,
+ const char __user *buffer,
+ size_t len,
+ loff_t *offset)
+{
+
+ return -ENOSYS;
+}
+
+static int shared_mr_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+
+ struct proc_dir_entry *pde = PDE(filep->f_path.dentry->d_inode);
+ struct mlx4_shared_mr_info *smr_info =
+ (struct mlx4_shared_mr_info *)pde->data;
+
+ /* Prevent any mapping not on start of area */
+ if (vma->vm_pgoff != 0)
+ return -EINVAL;
+
+ return ib_umem_map_to_vma(smr_info->umem,
+ vma);
+
+}
+
+static const struct file_operations shared_mr_proc_ops = {
+ .owner = THIS_MODULE,
+ .read = shared_mr_proc_read,
+ .write = shared_mr_proc_write,
+ .mmap = shared_mr_mmap
+};
+
+static mode_t convert_shared_access(int acc)
+{
+ return (acc & IB_ACCESS_SHARED_MR_USER_READ ? S_IRUSR : 0) |
+ (acc & IB_ACCESS_SHARED_MR_USER_WRITE ? S_IWUSR : 0) |
+ (acc & IB_ACCESS_SHARED_MR_GROUP_READ ? S_IRGRP : 0) |
+ (acc & IB_ACCESS_SHARED_MR_GROUP_WRITE ? S_IWGRP : 0) |
+ (acc & IB_ACCESS_SHARED_MR_OTHER_READ ? S_IROTH : 0) |
+ (acc & IB_ACCESS_SHARED_MR_OTHER_WRITE ? S_IWOTH : 0);
+
+}
+#endif
struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
{
struct mlx4_ib_mr *mr;
int err;
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ mr = kzalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@@ -74,118 +137,350 @@ err_free:
return ERR_PTR(err);
}
+static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
+ struct mlx4_mtt *mtt,
+ u64 mtt_size,
+ u64 mtt_shift,
+ u64 len,
+ u64 cur_start_addr,
+ u64 *pages,
+ int *start_index,
+ int *npages)
+{
+ int k;
+ int err = 0;
+ u64 mtt_entries;
+ u64 cur_end_addr = cur_start_addr + len;
+ u64 cur_end_addr_aligned = 0;
+
+ len += (cur_start_addr & (mtt_size-1ULL));
+ cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
+ len += (cur_end_addr_aligned - cur_end_addr);
+ if (len & (mtt_size-1ULL)) {
+ WARN(1 ,
+ "write_block: len %llx is not aligned to mtt_size %llx\n",
+ len, mtt_size);
+ return -EINVAL;
+ }
+
+
+ mtt_entries = (len >> mtt_shift);
+
+ /* Align the MTT start address to
+ the mtt_size.
+ Required to handle cases when the MR
+ starts in the middle of an MTT record.
+ Was not required in old code since
+ the physical addresses provided by
+ the dma subsystem were page aligned,
+ which was also the MTT size.
+ */
+ cur_start_addr = round_down(cur_start_addr, mtt_size);
+ /* A new block is started ...*/
+ for (k = 0; k < mtt_entries; ++k) {
+ pages[*npages] = cur_start_addr + (mtt_size * k);
+ (*npages)++;
+ /*
+ * Be friendly to mlx4_write_mtt() and
+ * pass it chunks of appropriate size.
+ */
+ if (*npages == PAGE_SIZE / sizeof(u64)) {
+ err = mlx4_write_mtt(dev->dev,
+ mtt, *start_index,
+ *npages, pages);
+ if (err)
+ return err;
+
+ (*start_index) += *npages;
+ *npages = 0;
+ }
+ }
+
+ return 0;
+}
+
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
u64 *pages;
struct ib_umem_chunk *chunk;
- int i, j, k;
- int n;
- int len;
+ int j;
+ u64 len = 0;
int err = 0;
+ u64 mtt_size;
+ u64 cur_start_addr = 0;
+ u64 mtt_shift;
+ int start_index = 0;
+ int npages = 0;
pages = (u64 *) __get_free_page(GFP_KERNEL);
if (!pages)
return -ENOMEM;
- i = n = 0;
+ mtt_shift = mtt->page_shift;
+ mtt_size = 1ULL << mtt_shift;
list_for_each_entry(chunk, &umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(&chunk->page_list[j]) +
- umem->page_size * k;
- /*
- * Be friendly to mlx4_write_mtt() and
- * pass it chunks of appropriate size.
- */
- if (i == PAGE_SIZE / sizeof (u64)) {
- err = mlx4_write_mtt(dev->dev, mtt, n,
- i, pages);
- if (err)
- goto out;
- n += i;
- i = 0;
- }
+ if (cur_start_addr + len ==
+ sg_dma_address(&chunk->page_list[j])) {
+ /* still the same block */
+ len += sg_dma_len(&chunk->page_list[j]);
+ continue;
}
+ /* A new block is started ...*/
+ /* If len is malaligned, write an extra mtt entry to
+ cover the misaligned area (round up the division)
+ */
+ err = mlx4_ib_umem_write_mtt_block(dev,
+ mtt, mtt_size, mtt_shift,
+ len, cur_start_addr,
+ pages,
+ &start_index,
+ &npages);
+ if (err)
+ goto out;
+
+ cur_start_addr =
+ sg_dma_address(&chunk->page_list[j]);
+ len = sg_dma_len(&chunk->page_list[j]);
}
- if (i)
- err = mlx4_write_mtt(dev->dev, mtt, n, i, pages);
+ /* Handle the last block */
+ if (len > 0) {
+ /* If len is malaligned, write an extra mtt entry to cover
+ the misaligned area (round up the division)
+ */
+ err = mlx4_ib_umem_write_mtt_block(dev,
+ mtt, mtt_size, mtt_shift,
+ len, cur_start_addr,
+ pages,
+ &start_index,
+ &npages);
+ if (err)
+ goto out;
+ }
+
+
+ if (npages)
+ err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
out:
free_page((unsigned long) pages);
return err;
}
-static int handle_hugetlb_user_mr(struct ib_pd *pd, struct mlx4_ib_mr *mr,
- u64 start, u64 virt_addr, int access_flags)
+static inline u64 alignment_of(u64 ptr)
{
-#if defined(CONFIG_HUGETLB_PAGE) && !defined(__powerpc__) && !defined(__ia64__)
- struct mlx4_ib_dev *dev = to_mdev(pd->device);
- struct ib_umem_chunk *chunk;
- unsigned dsize;
- dma_addr_t daddr;
- unsigned cur_size = 0;
- dma_addr_t uninitialized_var(cur_addr);
- int n;
- struct ib_umem *umem = mr->umem;
- u64 *arr;
- int err = 0;
- int i;
- int j = 0;
- int off = start & (HPAGE_SIZE - 1);
+ return ilog2(ptr & (~(ptr-1)));
+}
- n = DIV_ROUND_UP(off + umem->length, HPAGE_SIZE);
- arr = kmalloc(n * sizeof *arr, GFP_KERNEL);
- if (!arr)
- return -ENOMEM;
+static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
+ u64 current_block_end,
+ u64 block_shift)
+{
+ /* Check whether the alignment of the new block
+ is aligned as well as the previous block.
+ Block address must start with zeros till size of entity_size.
+ */
+ if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
+ /* It is not as well aligned as the
+ previous block-reduce the mtt size
+ accordingly.
+ Here we take the last right bit
+ which is 1.
+ */
+ block_shift = alignment_of(next_block_start);
+
+ /* Check whether the alignment of the
+ end of previous block - is it aligned
+ as well as the start of the block
+ */
+ if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
+ /* It is not as well aligned as
+ the start of the block - reduce the
+ mtt size accordingly.
+ */
+ block_shift = alignment_of(current_block_end);
+
+ return block_shift;
+}
- list_for_each_entry(chunk, &umem->chunk_list, list)
- for (i = 0; i < chunk->nmap; ++i) {
- daddr = sg_dma_address(&chunk->page_list[i]);
- dsize = sg_dma_len(&chunk->page_list[i]);
- if (!cur_size) {
- cur_addr = daddr;
- cur_size = dsize;
- } else if (cur_addr + cur_size != daddr) {
- err = -EINVAL;
- goto out;
- } else
- cur_size += dsize;
+/* Calculate optimal mtt size based on contiguous pages.
+* Function will return also the number of pages that are not aligned to the
+ calculated mtt_size to be added to total number
+ of pages. For that we should check the first chunk length & last chunk
+ length and if not aligned to mtt_size we should increment
+ the non_aligned_pages number.
+ All chunks in the middle already handled as part of mtt shift calculation
+ for both their start & end addresses.
+*/
+int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
+ u64 start_va,
+ int *num_of_mtts)
+{
+ struct ib_umem_chunk *chunk;
+ int j;
+ u64 block_shift = MLX4_MAX_MTT_SHIFT;
+ u64 current_block_len = 0;
+ u64 current_block_start = 0;
+ u64 misalignment_bits;
+ u64 first_block_start = 0;
+ u64 last_block_end = 0;
+ u64 total_len = 0;
+ u64 last_block_aligned_end = 0;
+ u64 min_shift = ilog2(umem->page_size);
+
+ list_for_each_entry(chunk, &umem->chunk_list, list) {
+ /* Initialization - save the first chunk start as
+ the current_block_start - block means contiguous pages.
+ */
+ if (current_block_len == 0 && current_block_start == 0) {
+ first_block_start = current_block_start =
+ sg_dma_address(&chunk->page_list[0]);
+ /* Find the bits that are different between
+ the physical address and the virtual
+ address for the start of the MR.
+ */
+ /* umem_get aligned the start_va to a page
+ boundry. Therefore, we need to align the
+ start va to the same boundry */
+ /* misalignment_bits is needed to handle the
+ case of a single memory region. In this
+ case, the rest of the logic will not reduce
+ the block size. If we use a block size
+ which is bigger than the alignment of the
+ misalignment bits, we might use the virtual
+ page number instead of the physical page
+ number, resulting in access to the wrong
+ data. */
+ misalignment_bits =
+ (start_va & (~(((u64)(umem->page_size))-1ULL)))
+ ^ current_block_start;
+ block_shift = min(alignment_of(misalignment_bits)
+ , block_shift);
+ }
- if (cur_size > HPAGE_SIZE) {
- err = -EINVAL;
- goto out;
- } else if (cur_size == HPAGE_SIZE) {
- cur_size = 0;
- arr[j++] = cur_addr;
+ /* Go over the scatter entries in the current chunk, check
+ if they continue the previous scatter entry.
+ */
+ for (j = 0; j < chunk->nmap; ++j) {
+ u64 next_block_start =
+ sg_dma_address(&chunk->page_list[j]);
+ u64 current_block_end = current_block_start
+ + current_block_len;
+ /* If we have a split (non-contig.) between two block*/
+ if (current_block_end != next_block_start) {
+ block_shift = mlx4_ib_umem_calc_block_mtt(
+ next_block_start,
+ current_block_end,
+ block_shift);
+
+ /* If we reached the minimum shift for 4k
+ page we stop the loop.
+ */
+ if (block_shift <= min_shift)
+ goto end;
+
+ /* If not saved yet we are in first block -
+ we save the length of first block to
+ calculate the non_aligned_pages number at
+ * the end.
+ */
+ total_len += current_block_len;
+
+ /* Start a new block */
+ current_block_start = next_block_start;
+ current_block_len =
+ sg_dma_len(&chunk->page_list[j]);
+ continue;
}
+ /* The scatter entry is another part of
+ the current block, increase the block size
+ * An entry in the scatter can be larger than
+ 4k (page) as of dma mapping
+ which merge some blocks together.
+ */
+ current_block_len +=
+ sg_dma_len(&chunk->page_list[j]);
}
+ }
- if (cur_size) {
- arr[j++] = cur_addr;
+ /* Account for the last block in the total len */
+ total_len += current_block_len;
+ /* Add to the first block the misalignment that it suffers from.*/
+ total_len += (first_block_start & ((1ULL<<block_shift)-1ULL));
+ last_block_end = current_block_start+current_block_len;
+ last_block_aligned_end = round_up(last_block_end, 1<<block_shift);
+ total_len += (last_block_aligned_end - last_block_end);
+
+ WARN((total_len & ((1ULL<<block_shift)-1ULL)),
+ " misaligned total length detected (%llu, %llu)!",
+ total_len, block_shift);
+
+ *num_of_mtts = total_len >> block_shift;
+end:
+ if (block_shift < min_shift) {
+ /* If shift is less than the min we set a WARN and
+ return the min shift.
+ */
+ WARN(1,
+ "mlx4_ib_umem_calc_optimal_mtt_size - unexpected shift %lld\n",
+ block_shift);
+
+ block_shift = min_shift;
}
+ return block_shift;
+}
- err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, umem->length,
- convert_access(access_flags), n, HPAGE_SHIFT, &mr->mmr);
- if (err)
- goto out;
+#ifdef __linux__
+static int prepare_shared_mr(struct mlx4_ib_mr *mr, int access_flags, int mr_id)
+{
+ struct proc_dir_entry *mr_proc_entry;
+ mode_t mode = S_IFREG;
+ char name_buff[16];
+
+ mode |= convert_shared_access(access_flags);
+ sprintf(name_buff, "%X", mr_id);
+ mr->smr_info = kmalloc(sizeof(struct mlx4_shared_mr_info), GFP_KERNEL);
+ mr->smr_info->mr_id = mr_id;
+ mr->smr_info->umem = mr->umem;
+
+ mr_proc_entry = proc_create_data(name_buff, mode,
+ mlx4_mrs_dir_entry,
+ &shared_mr_proc_ops,
+ mr->smr_info);
+
+ if (!mr_proc_entry) {
+ pr_err("prepare_shared_mr failed via proc\n");
+ kfree(mr->smr_info);
+ return -ENODEV;
+ }
- err = mlx4_write_mtt(dev->dev, &mr->mmr.mtt, 0, n, arr);
+ current_uid_gid(&(mr_proc_entry->uid), &(mr_proc_entry->gid));
+ mr_proc_entry->size = mr->umem->length;
+ return 0;
-out:
- kfree(arr);
- return err;
-#else
- return -ENOSYS;
-#endif
}
+static int is_shared_mr(int access_flags)
+{
+ /* We should check whether IB_ACCESS_SHARED_MR_USER_READ or
+ other shared bits were turned on.
+ */
+ return !!(access_flags & (IB_ACCESS_SHARED_MR_USER_READ |
+ IB_ACCESS_SHARED_MR_USER_WRITE |
+ IB_ACCESS_SHARED_MR_GROUP_READ |
+ IB_ACCESS_SHARED_MR_GROUP_WRITE |
+ IB_ACCESS_SHARED_MR_OTHER_READ |
+ IB_ACCESS_SHARED_MR_OTHER_WRITE));
+
+}
+#endif
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
- struct ib_udata *udata)
+ struct ib_udata *udata,
+ int mr_id)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_mr *mr;
@@ -193,38 +488,49 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int err;
int n;
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ mr = kzalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->umem = ib_umem_get(pd->uobject->context, start, length,
- access_flags, 0);
+ access_flags, 0);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
}
- if (!mr->umem->hugetlb ||
- handle_hugetlb_user_mr(pd, mr, start, virt_addr, access_flags)) {
- n = ib_umem_page_count(mr->umem);
- shift = ilog2(mr->umem->page_size);
-
- err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
- convert_access(access_flags), n, shift, &mr->mmr);
- if (err)
- goto err_umem;
+ n = ib_umem_page_count(mr->umem);
+ shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start,
+ &n);
+ err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
+ convert_access(access_flags), n, shift, &mr->mmr);
+ if (err)
+ goto err_umem;
- err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
- if (err)
- goto err_mr;
- }
+ err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
+ if (err)
+ goto err_mr;
err = mlx4_mr_enable(dev->dev, &mr->mmr);
if (err)
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
+#ifdef __linux__
+ /* Check whether MR should be shared */
+ if (is_shared_mr(access_flags)) {
+ /* start address and length must be aligned to page size in order
+ to map a full page and preventing leakage of data */
+ if (mr->umem->offset || (length & ~PAGE_MASK)) {
+ err = -EINVAL;
+ goto err_mr;
+ }
+ err = prepare_shared_mr(mr, access_flags, mr_id);
+ if (err)
+ goto err_mr;
+ }
+#endif
return &mr->ibmr;
err_mr:
@@ -239,13 +545,36 @@ err_free:
return ERR_PTR(err);
}
+
int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
+ if (mr->smr_info) {
+ /* When master/parent shared mr is dereged there is
+ no ability to share this mr any more - its mr_id will be
+ returned to the kernel as part of ib_uverbs_dereg_mr
+ and may be allocated again as part of other reg_mr.
+ */
+ char name_buff[16];
+
+ sprintf(name_buff, "%X", mr->smr_info->mr_id);
+ /* Remove proc entry is checking internally that no operation
+ was strated on that proc fs file and if in the middle
+ current process will wait till end of operation.
+ That's why no sync mechanism is needed when we release
+ below the shared umem.
+ */
+#ifdef __linux__
+ remove_proc_entry(name_buff, mlx4_mrs_dir_entry);
+ kfree(mr->smr_info);
+#endif
+ }
+
if (mr->umem)
ib_umem_release(mr->umem);
+
kfree(mr);
return 0;
@@ -258,7 +587,7 @@ struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
struct mlx4_ib_mr *mr;
int err;
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ mr = kzalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@@ -291,7 +620,7 @@ struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device
struct mlx4_ib_fast_reg_page_list *mfrpl;
int size = page_list_len * sizeof (u64);
- if (page_list_len > MAX_FAST_REG_PAGES)
+ if (page_list_len > MLX4_MAX_FAST_REG_PAGES)
return ERR_PTR(-EINVAL);
mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
@@ -403,7 +732,7 @@ int mlx4_ib_unmap_fmr(struct list_head *fmr_list)
err = mlx4_SYNC_TPT(mdev);
if (err)
- printk(KERN_WARNING "mlx4_ib: SYNC_TPT error %d when "
+ pr_warn("SYNC_TPT error %d when "
"unmapping FMRs\n", err);
return 0;
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/qp.c b/sys/ofed/drivers/infiniband/hw/mlx4/qp.c
index 8958c1e..980d121 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/qp.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/qp.c
@@ -32,15 +32,24 @@
*/
#include <linux/log2.h>
+#include <linux/slab.h>
#include <linux/netdevice.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_addr.h>
+#include <rdma/ib_mad.h>
#include <linux/mlx4/qp.h>
+#include <linux/mlx4/driver.h>
#include <linux/io.h>
+#ifndef __linux__
+#define asm __asm
+#endif
+
#include "mlx4_ib.h"
#include "user.h"
@@ -52,25 +61,22 @@ enum {
MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83,
MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
MLX4_IB_LINK_TYPE_IB = 0,
- MLX4_IB_LINK_TYPE_ETH = 1,
+ MLX4_IB_LINK_TYPE_ETH = 1
};
enum {
/*
- * Largest possible UD header: send with GRH and immediate data.
- * 4 bytes added to accommodate for eth header instead of lrh
+ * Largest possible UD header: send with GRH and immediate
+ * data plus 18 bytes for an Ethernet header with VLAN/802.1Q
+ * tag. (LRH would only use 8 bytes, so Ethernet is the
+ * biggest case)
*/
- MLX4_IB_UD_HEADER_SIZE = 76,
- MLX4_IB_MAX_RAW_ETY_HDR_SIZE = 12
+ MLX4_IB_UD_HEADER_SIZE = 82,
+ MLX4_IB_LSO_HEADER_SPARE = 128,
};
enum {
- MLX4_IBOE_ETHERTYPE = 0x8915
-};
-
-struct mlx4_ib_xrc_reg_entry {
- struct list_head list;
- void *context;
+ MLX4_IB_IBOE_ETHERTYPE = 0x8915
};
struct mlx4_ib_sqp {
@@ -83,7 +89,13 @@ struct mlx4_ib_sqp {
};
enum {
- MLX4_IB_MIN_SQ_STRIDE = 6
+ MLX4_IB_MIN_SQ_STRIDE = 6,
+ MLX4_IB_CACHE_LINE_SIZE = 64,
+};
+
+enum {
+ MLX4_RAW_QP_MTU = 7,
+ MLX4_RAW_QP_MSGMAX = 31,
};
static const __be32 mlx4_ib_opcode[] = {
@@ -104,32 +116,77 @@ static const __be32 mlx4_ib_opcode[] = {
#ifndef wc_wmb
#if defined(__i386__)
- #define wc_wmb() __asm volatile("lock; addl $0,0(%%esp) " ::: "memory")
+ #define wc_wmb() asm volatile("lock; addl $0,0(%%esp) " ::: "memory")
#elif defined(__x86_64__)
- #define wc_wmb() __asm volatile("sfence" ::: "memory")
+ #define wc_wmb() asm volatile("sfence" ::: "memory")
#elif defined(__ia64__)
- #define wc_wmb() __asm volatile("fwb" ::: "memory")
+ #define wc_wmb() asm volatile("fwb" ::: "memory")
#else
#define wc_wmb() wmb()
#endif
#endif
-
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
{
return container_of(mqp, struct mlx4_ib_sqp, qp);
}
+static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+ if (!mlx4_is_master(dev->dev))
+ return 0;
+
+ return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn &&
+ qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn +
+ 8 * MLX4_MFUNC_MAX;
+}
+
static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
- return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
- qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;
+ int proxy_sqp = 0;
+ int real_sqp = 0;
+ int i;
+ /* PPF or Native -- real SQP */
+ real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
+ qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
+ qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3);
+ if (real_sqp)
+ return 1;
+ /* VF or PF -- proxy SQP */
+ if (mlx4_is_mfunc(dev->dev)) {
+ for (i = 0; i < dev->dev->caps.num_ports; i++) {
+ if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] ||
+ qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) {
+ proxy_sqp = 1;
+ break;
+ }
+ }
+ }
+ return proxy_sqp;
}
+/* used for INIT/CLOSE port logic */
static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
- return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
- qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;
+ int proxy_qp0 = 0;
+ int real_qp0 = 0;
+ int i;
+ /* PPF or Native -- real QP0 */
+ real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
+ qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
+ qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1);
+ if (real_qp0)
+ return 1;
+ /* VF or PF -- proxy QP0 */
+ if (mlx4_is_mfunc(dev->dev)) {
+ for (i = 0; i < dev->dev->caps.num_ports; i++) {
+ if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) {
+ proxy_qp0 = 1;
+ break;
+ }
+ }
+ }
+ return proxy_qp0;
}
static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
@@ -237,16 +294,14 @@ static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind)
static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
{
struct ib_event event;
- struct mlx4_ib_qp *mqp = to_mibqp(qp);
- struct ib_qp *ibqp = &mqp->ibqp;
- struct mlx4_ib_xrc_reg_entry *ctx_entry;
- unsigned long flags;
+ struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
if (type == MLX4_EVENT_TYPE_PATH_MIG)
to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
if (ibqp->event_handler) {
event.device = ibqp->device;
+ event.element.qp = ibqp;
switch (type) {
case MLX4_EVENT_TYPE_PATH_MIG:
event.event = IB_EVENT_PATH_MIG;
@@ -273,27 +328,16 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
event.event = IB_EVENT_QP_ACCESS_ERR;
break;
default:
- printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
+ pr_warn("Unexpected event type %d "
"on QP %06x\n", type, qp->qpn);
return;
}
- if (unlikely(ibqp->qp_type == IB_QPT_XRC &&
- mqp->flags & MLX4_IB_XRC_RCV)) {
- event.event |= IB_XRC_QP_EVENT_FLAG;
- event.element.xrc_qp_num = ibqp->qp_num;
- spin_lock_irqsave(&mqp->xrc_reg_list_lock, flags);
- list_for_each_entry(ctx_entry, &mqp->xrc_reg_list, list)
- ibqp->event_handler(&event, ctx_entry->context);
- spin_unlock_irqrestore(&mqp->xrc_reg_list_lock, flags);
- return;
- }
- event.element.qp = ibqp;
ibqp->event_handler(&event, ibqp->qp_context);
}
}
-static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
+static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
{
/*
* UD WQEs must have a datagram segment.
@@ -302,20 +346,29 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
* header and space for the ICRC).
*/
switch (type) {
- case IB_QPT_UD:
+ case MLX4_IB_QPT_UD:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_datagram_seg) +
- ((flags & MLX4_IB_QP_LSO) ? 128 : 0);
- case IB_QPT_UC:
+ ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
+ case MLX4_IB_QPT_PROXY_SMI_OWNER:
+ case MLX4_IB_QPT_PROXY_SMI:
+ case MLX4_IB_QPT_PROXY_GSI:
+ return sizeof (struct mlx4_wqe_ctrl_seg) +
+ sizeof (struct mlx4_wqe_datagram_seg) + 64;
+ case MLX4_IB_QPT_TUN_SMI_OWNER:
+ case MLX4_IB_QPT_TUN_GSI:
+ return sizeof (struct mlx4_wqe_ctrl_seg) +
+ sizeof (struct mlx4_wqe_datagram_seg);
+
+ case MLX4_IB_QPT_UC:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
- case IB_QPT_XRC:
- case IB_QPT_RC:
+ case MLX4_IB_QPT_RC:
return sizeof (struct mlx4_wqe_ctrl_seg) +
- sizeof (struct mlx4_wqe_atomic_seg) +
+ sizeof (struct mlx4_wqe_masked_atomic_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
- case IB_QPT_SMI:
- case IB_QPT_GSI:
+ case MLX4_IB_QPT_SMI:
+ case MLX4_IB_QPT_GSI:
return sizeof (struct mlx4_wqe_ctrl_seg) +
ALIGN(MLX4_IB_UD_HEADER_SIZE +
DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
@@ -325,44 +378,28 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
ALIGN(4 +
sizeof (struct mlx4_wqe_inline_seg),
sizeof (struct mlx4_wqe_data_seg));
- case IB_QPT_RAW_ETY:
- return sizeof(struct mlx4_wqe_ctrl_seg) +
- ALIGN(MLX4_IB_MAX_RAW_ETY_HDR_SIZE +
- sizeof(struct mlx4_wqe_inline_seg),
- sizeof(struct mlx4_wqe_data_seg));
-
default:
return sizeof (struct mlx4_wqe_ctrl_seg);
}
}
static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
- int is_user, int has_srq_or_is_xrc, struct mlx4_ib_qp *qp)
+ int is_user, int has_rq, struct mlx4_ib_qp *qp)
{
/* Sanity check RQ size before proceeding */
if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||
- cap->max_recv_sge >
- min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)) {
- mlx4_ib_dbg("Requested RQ size (sge or wr) too large");
+ cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg))
return -EINVAL;
- }
- if (has_srq_or_is_xrc) {
- /* QPs attached to an SRQ should have no RQ */
- if (cap->max_recv_wr) {
- mlx4_ib_dbg("non-zero RQ size for QP using SRQ");
+ if (!has_rq) {
+ if (cap->max_recv_wr)
return -EINVAL;
- }
qp->rq.wqe_cnt = qp->rq.max_gs = 0;
} else {
/* HW requires >= 1 RQ entry with >= 1 gather entry */
- if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) {
- mlx4_ib_dbg("user QP RQ has 0 wr's or 0 sge's "
- "(wr: 0x%x, sge: 0x%x)", cap->max_recv_wr,
- cap->max_recv_sge);
+ if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))
return -EINVAL;
- }
qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr));
qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
@@ -378,44 +415,32 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt);
cap->max_recv_sge = min(qp->rq.max_gs,
min(dev->dev->caps.max_sq_sg,
- dev->dev->caps.max_rq_sg));
+ dev->dev->caps.max_rq_sg));
}
- /* We don't support inline sends for kernel QPs (yet) */
-
return 0;
}
static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
- enum ib_qp_type type, struct mlx4_ib_qp *qp)
+ enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
{
int s;
/* Sanity check SQ size before proceeding */
- if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) ||
- cap->max_send_sge >
- min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) ||
+ if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) ||
+ cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) ||
cap->max_inline_data + send_wqe_overhead(type, qp->flags) +
- sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) {
- mlx4_ib_dbg("Requested SQ resources exceed device maxima");
+ sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
return -EINVAL;
- }
/*
* For MLX transport we need 2 extra S/G entries:
* one for the header and one for the checksum at the end
*/
- if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&
- cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) {
- mlx4_ib_dbg("No space for SQP hdr/csum sge's");
- return -EINVAL;
- }
-
- if (type == IB_QPT_RAW_ETY &&
- cap->max_send_sge + 1 > dev->dev->caps.max_sq_sg) {
- mlx4_ib_dbg("No space for RAW ETY hdr");
+ if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI ||
+ type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) &&
+ cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
return -EINVAL;
- }
s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg),
cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) +
@@ -434,7 +459,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
* anymore, so we do this only if selective signaling is off.
*
* Further, on 32-bit platforms, we can't use vmap() to make
- * the QP buffer virtually contigious. Thus we have to use
+ * the QP buffer virtually contiguous. Thus we have to use
* constant-sized WRs to make sure a WR is always fully within
* a single page-sized chunk.
*
@@ -457,7 +482,9 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
*/
if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
qp->sq_signal_bits && BITS_PER_LONG == 64 &&
- type != IB_QPT_SMI && type != IB_QPT_GSI && type != IB_QPT_RAW_ETY)
+ type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
+ !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
+ MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER)))
qp->sq.wqe_shift = ilog2(64);
else
qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
@@ -516,10 +543,8 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev,
if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes ||
ucmd->log_sq_stride >
ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
- ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE) {
- mlx4_ib_dbg("Requested max wqes or wqe stride exceeds max");
+ ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
return -EINVAL;
- }
qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
qp->sq.wqe_shift = ucmd->log_sq_stride;
@@ -530,30 +555,398 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev,
return 0;
}
+static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
+{
+ int i;
+
+ qp->sqp_proxy_rcv =
+ kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt,
+ GFP_KERNEL);
+ if (!qp->sqp_proxy_rcv)
+ return -ENOMEM;
+ for (i = 0; i < qp->rq.wqe_cnt; i++) {
+ qp->sqp_proxy_rcv[i].addr =
+ kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ GFP_KERNEL);
+ if (!qp->sqp_proxy_rcv[i].addr)
+ goto err;
+ qp->sqp_proxy_rcv[i].map =
+ ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
+ sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ DMA_FROM_DEVICE);
+ }
+ return 0;
+
+err:
+ while (i > 0) {
+ --i;
+ ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
+ sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ DMA_FROM_DEVICE);
+ kfree(qp->sqp_proxy_rcv[i].addr);
+ }
+ kfree(qp->sqp_proxy_rcv);
+ qp->sqp_proxy_rcv = NULL;
+ return -ENOMEM;
+}
+
+static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
+{
+ int i;
+
+ for (i = 0; i < qp->rq.wqe_cnt; i++) {
+ ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
+ sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ DMA_FROM_DEVICE);
+ kfree(qp->sqp_proxy_rcv[i].addr);
+ }
+ kfree(qp->sqp_proxy_rcv);
+}
+
+static int qp_has_rq(struct ib_qp_init_attr *attr)
+{
+ if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
+ return 0;
+
+ return !attr->srq;
+}
+
+static int init_qpg_parent(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *pqp,
+ struct ib_qp_init_attr *attr, int *qpn)
+{
+ struct mlx4_ib_qpg_data *qpg_data;
+ int tss_num, rss_num;
+ int tss_align_num, rss_align_num;
+ int tss_base, rss_base = 0;
+ int err;
+
+ /* Parent is part of the TSS range (in SW TSS ARP is sent via parent) */
+ tss_num = 1 + attr->parent_attrib.tss_child_count;
+ tss_align_num = roundup_pow_of_two(tss_num);
+ rss_num = attr->parent_attrib.rss_child_count;
+ rss_align_num = roundup_pow_of_two(rss_num);
+
+ if (rss_num > 1) {
+ /* RSS is requested */
+ if (!(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS))
+ return -ENOSYS;
+ if (rss_align_num > dev->dev->caps.max_rss_tbl_sz)
+ return -EINVAL;
+ /* We must work with power of two */
+ attr->parent_attrib.rss_child_count = rss_align_num;
+ }
+
+ qpg_data = kzalloc(sizeof *qpg_data, GFP_KERNEL);
+ if (!qpg_data)
+ return -ENOMEM;
+
+ if(pqp->flags & MLX4_IB_QP_NETIF)
+ err = mlx4_ib_steer_qp_alloc(dev, tss_align_num, &tss_base);
+ else
+ err = mlx4_qp_reserve_range(dev->dev, tss_align_num,
+ tss_align_num, &tss_base, 1);
+ if (err)
+ goto err1;
+
+ if (tss_num > 1) {
+ u32 alloc = BITS_TO_LONGS(tss_align_num) * sizeof(long);
+ qpg_data->tss_bitmap = kzalloc(alloc, GFP_KERNEL);
+ if (qpg_data->tss_bitmap == NULL) {
+ err = -ENOMEM;
+ goto err2;
+ }
+ bitmap_fill(qpg_data->tss_bitmap, tss_num);
+ /* Note parent takes first index */
+ clear_bit(0, qpg_data->tss_bitmap);
+ }
+
+ if (rss_num > 1) {
+ u32 alloc = BITS_TO_LONGS(rss_align_num) * sizeof(long);
+ err = mlx4_qp_reserve_range(dev->dev, rss_align_num,
+ 1, &rss_base, 0);
+ if (err)
+ goto err3;
+ qpg_data->rss_bitmap = kzalloc(alloc, GFP_KERNEL);
+ if (qpg_data->rss_bitmap == NULL) {
+ err = -ENOMEM;
+ goto err4;
+ }
+ bitmap_fill(qpg_data->rss_bitmap, rss_align_num);
+ }
+
+ qpg_data->tss_child_count = attr->parent_attrib.tss_child_count;
+ qpg_data->rss_child_count = attr->parent_attrib.rss_child_count;
+ qpg_data->qpg_parent = pqp;
+ qpg_data->qpg_tss_mask_sz = ilog2(tss_align_num);
+ qpg_data->tss_qpn_base = tss_base;
+ qpg_data->rss_qpn_base = rss_base;
+
+ pqp->qpg_data = qpg_data;
+ *qpn = tss_base;
+
+ return 0;
+
+err4:
+ mlx4_qp_release_range(dev->dev, rss_base, rss_align_num);
+
+err3:
+ if (tss_num > 1)
+ kfree(qpg_data->tss_bitmap);
+
+err2:
+ if(pqp->flags & MLX4_IB_QP_NETIF)
+ mlx4_ib_steer_qp_free(dev, tss_base, tss_align_num);
+ else
+ mlx4_qp_release_range(dev->dev, tss_base, tss_align_num);
+
+err1:
+ kfree(qpg_data);
+ return err;
+}
+
+static void free_qpg_parent(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *pqp)
+{
+ struct mlx4_ib_qpg_data *qpg_data = pqp->qpg_data;
+ int align_num;
+
+ if (qpg_data->tss_child_count > 1)
+ kfree(qpg_data->tss_bitmap);
+
+ align_num = roundup_pow_of_two(1 + qpg_data->tss_child_count);
+ if(pqp->flags & MLX4_IB_QP_NETIF)
+ mlx4_ib_steer_qp_free(dev, qpg_data->tss_qpn_base, align_num);
+ else
+ mlx4_qp_release_range(dev->dev, qpg_data->tss_qpn_base, align_num);
+
+ if (qpg_data->rss_child_count > 1) {
+ kfree(qpg_data->rss_bitmap);
+ align_num = roundup_pow_of_two(qpg_data->rss_child_count);
+ mlx4_qp_release_range(dev->dev, qpg_data->rss_qpn_base,
+ align_num);
+ }
+
+ kfree(qpg_data);
+}
+
+static int alloc_qpg_qpn(struct ib_qp_init_attr *init_attr,
+ struct mlx4_ib_qp *pqp, int *qpn)
+{
+ struct mlx4_ib_qp *mqp = to_mqp(init_attr->qpg_parent);
+ struct mlx4_ib_qpg_data *qpg_data = mqp->qpg_data;
+ u32 idx, old;
+
+ switch (init_attr->qpg_type) {
+ case IB_QPG_CHILD_TX:
+ if (qpg_data->tss_child_count == 0)
+ return -EINVAL;
+ do {
+ /* Parent took index 0 */
+ idx = find_first_bit(qpg_data->tss_bitmap,
+ qpg_data->tss_child_count + 1);
+ if (idx >= qpg_data->tss_child_count + 1)
+ return -ENOMEM;
+ old = test_and_clear_bit(idx, qpg_data->tss_bitmap);
+ } while (old == 0);
+ idx += qpg_data->tss_qpn_base;
+ break;
+ case IB_QPG_CHILD_RX:
+ if (qpg_data->rss_child_count == 0)
+ return -EINVAL;
+ do {
+ idx = find_first_bit(qpg_data->rss_bitmap,
+ qpg_data->rss_child_count);
+ if (idx >= qpg_data->rss_child_count)
+ return -ENOMEM;
+ old = test_and_clear_bit(idx, qpg_data->rss_bitmap);
+ } while (old == 0);
+ idx += qpg_data->rss_qpn_base;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pqp->qpg_data = qpg_data;
+ *qpn = idx;
+
+ return 0;
+}
+
+static void free_qpg_qpn(struct mlx4_ib_qp *mqp, int qpn)
+{
+ struct mlx4_ib_qpg_data *qpg_data = mqp->qpg_data;
+
+ switch (mqp->qpg_type) {
+ case IB_QPG_CHILD_TX:
+ /* Do range check */
+ qpn -= qpg_data->tss_qpn_base;
+ set_bit(qpn, qpg_data->tss_bitmap);
+ break;
+ case IB_QPG_CHILD_RX:
+ qpn -= qpg_data->rss_qpn_base;
+ set_bit(qpn, qpg_data->rss_bitmap);
+ break;
+ default:
+ /* error */
+ pr_warn("wrong qpg type (%d)\n", mqp->qpg_type);
+ break;
+ }
+}
+
+static int alloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
+ struct ib_qp_init_attr *attr, int *qpn)
+{
+ int err = 0;
+
+ switch (attr->qpg_type) {
+ case IB_QPG_NONE:
+ /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE
+ * BlueFlame setup flow wrongly causes VLAN insertion. */
+ if (attr->qp_type == IB_QPT_RAW_PACKET) {
+ err = mlx4_qp_reserve_range(dev->dev, 1, 1, qpn, 1);
+ } else {
+ if(qp->flags & MLX4_IB_QP_NETIF)
+ err = mlx4_ib_steer_qp_alloc(dev, 1, qpn);
+ else
+ err = mlx4_qp_reserve_range(dev->dev, 1, 1, qpn, 0);
+ }
+ break;
+ case IB_QPG_PARENT:
+ err = init_qpg_parent(dev, qp, attr, qpn);
+ break;
+ case IB_QPG_CHILD_TX:
+ case IB_QPG_CHILD_RX:
+ err = alloc_qpg_qpn(attr, qp, qpn);
+ break;
+ default:
+ qp->qpg_type = IB_QPG_NONE;
+ err = -EINVAL;
+ break;
+ }
+ if (err)
+ return err;
+ qp->qpg_type = attr->qpg_type;
+ return 0;
+}
+
+static void free_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
+ enum ib_qpg_type qpg_type, int qpn)
+{
+ switch (qpg_type) {
+ case IB_QPG_NONE:
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ mlx4_ib_steer_qp_free(dev, qpn, 1);
+ else
+ mlx4_qp_release_range(dev->dev, qpn, 1);
+ break;
+ case IB_QPG_PARENT:
+ free_qpg_parent(dev, qp);
+ break;
+ case IB_QPG_CHILD_TX:
+ case IB_QPG_CHILD_RX:
+ free_qpg_qpn(qp, qpn);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Revert allocation on create_qp_common */
+static void unalloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
+ struct ib_qp_init_attr *attr, int qpn)
+{
+ free_qpn_common(dev, qp, attr->qpg_type, qpn);
+}
+
+static void release_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+ free_qpn_common(dev, qp, qp->qpg_type, qp->mqp.qpn);
+}
+
static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
+ struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
{
int qpn;
int err;
+ struct mlx4_ib_sqp *sqp;
+ struct mlx4_ib_qp *qp;
+ enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
+
+ /* When tunneling special qps, we use a plain UD qp */
+ if (sqpn) {
+ if (mlx4_is_mfunc(dev->dev) &&
+ (!mlx4_is_master(dev->dev) ||
+ !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
+ if (init_attr->qp_type == IB_QPT_GSI)
+ qp_type = MLX4_IB_QPT_PROXY_GSI;
+ else if (mlx4_is_master(dev->dev))
+ qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
+ else
+ qp_type = MLX4_IB_QPT_PROXY_SMI;
+ }
+ qpn = sqpn;
+ /* add extra sg entry for tunneling */
+ init_attr->cap.max_recv_sge++;
+ } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) {
+ struct mlx4_ib_qp_tunnel_init_attr *tnl_init =
+ container_of(init_attr,
+ struct mlx4_ib_qp_tunnel_init_attr, init_attr);
+ if ((tnl_init->proxy_qp_type != IB_QPT_SMI &&
+ tnl_init->proxy_qp_type != IB_QPT_GSI) ||
+ !mlx4_is_master(dev->dev))
+ return -EINVAL;
+ if (tnl_init->proxy_qp_type == IB_QPT_GSI)
+ qp_type = MLX4_IB_QPT_TUN_GSI;
+ else if (tnl_init->slave == mlx4_master_func_num(dev->dev))
+ qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
+ else
+ qp_type = MLX4_IB_QPT_TUN_SMI;
+ /* we are definitely in the PPF here, since we are creating
+ * tunnel QPs. base_tunnel_sqpn is therefore valid. */
+ qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave
+ + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1;
+ sqpn = qpn;
+ }
+
+ if (!*caller_qp) {
+ if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
+ (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
+ sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL);
+ if (!sqp)
+ return -ENOMEM;
+ qp = &sqp->qp;
+ qp->pri.vid = qp->alt.vid = 0xFFFF;
+ } else {
+ qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL);
+ if (!qp)
+ return -ENOMEM;
+ qp->pri.vid = qp->alt.vid = 0xFFFF;
+ }
+ } else
+ qp = *caller_qp;
+
+ qp->mlx4_ib_qp_type = qp_type;
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
- spin_lock_init(&qp->xrc_reg_list_lock);
INIT_LIST_HEAD(&qp->gid_list);
+ INIT_LIST_HEAD(&qp->steering_rules);
+ INIT_LIST_HEAD(&qp->rules_list);
qp->state = IB_QPS_RESET;
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
- err = set_rq_size(dev, &init_attr->cap, !!pd->uobject,
- !!init_attr->srq || !!init_attr->xrc_domain , qp);
+ err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp);
if (err)
goto err;
if (pd->uobject) {
struct mlx4_ib_create_qp ucmd;
+ int shift;
+ int n;
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
err = -EFAULT;
@@ -570,30 +963,25 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->buf_size, 0, 0);
if (IS_ERR(qp->umem)) {
err = PTR_ERR(qp->umem);
- mlx4_ib_dbg("ib_umem_get error (%d)", err);
goto err;
}
- err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
- ilog2(qp->umem->page_size), &qp->mtt);
- if (err) {
- mlx4_ib_dbg("mlx4_mtt_init error (%d)", err);
+ n = ib_umem_page_count(qp->umem);
+ shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
+ err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
+
+ if (err)
goto err_buf;
- }
err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
- if (err) {
- mlx4_ib_dbg("mlx4_ib_umem_write_mtt error (%d)", err);
+ if (err)
goto err_mtt;
- }
- if (!init_attr->srq && init_attr->qp_type != IB_QPT_XRC) {
+ if (qp_has_rq(init_attr)) {
err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
ucmd.db_addr, &qp->db);
- if (err) {
- mlx4_ib_dbg("mlx4_ib_db_map_user error (%d)", err);
+ if (err)
goto err_mtt;
- }
}
} else {
qp->sq_no_prefetch = 0;
@@ -604,11 +992,17 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
qp->flags |= MLX4_IB_QP_LSO;
- err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);
+ if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP &&
+ dev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED &&
+ !mlx4_is_mfunc(dev->dev))
+ qp->flags |= MLX4_IB_QP_NETIF;
+
+ err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
if (err)
goto err;
- if (!init_attr->srq && init_attr->qp_type != IB_QPT_XRC) {
+ if (qp_has_rq(init_attr)) {
err = mlx4_db_alloc(dev->dev, &qp->db, 0);
if (err)
goto err;
@@ -617,9 +1011,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
}
if (qp->max_inline_data) {
- err = mlx4_bf_alloc(dev->dev, &qp->bf);
+ err = mlx4_bf_alloc(dev->dev, &qp->bf, 0);
if (err) {
- mlx4_ib_dbg("failed to allocate blue flame register (%d)", err);
+ pr_debug("failed to allocate blue flame"
+ " register (%d)", err);
qp->bf.uar = &dev->priv_uar;
}
} else
@@ -632,16 +1027,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
&qp->mtt);
- if (err) {
- mlx4_ib_dbg("kernel qp mlx4_mtt_init error (%d)", err);
+ if (err)
goto err_buf;
- }
err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
- if (err) {
- mlx4_ib_dbg("mlx4_buf_write_mtt error (%d)", err);
+ if (err)
goto err_mtt;
- }
qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);
@@ -653,18 +1044,24 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
}
if (sqpn) {
- qpn = sqpn;
+ if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
+ if (alloc_proxy_bufs(pd->device, qp)) {
+ err = -ENOMEM;
+ goto err_wrid;
+ }
+ }
} else {
- err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
+ err = alloc_qpn_common(dev, qp, init_attr, &qpn);
if (err)
- goto err_wrid;
+ goto err_proxy;
}
err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
if (err)
goto err_qpn;
- if (init_attr->qp_type == IB_QPT_XRC)
+ if (init_attr->qp_type == IB_QPT_XRC_TGT)
qp->mqp.qpn |= (1 << 23);
/*
@@ -675,18 +1072,20 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
qp->mqp.event = mlx4_ib_qp_event;
-
+ if (!*caller_qp)
+ *caller_qp = qp;
return 0;
err_qpn:
- if (!sqpn)
- mlx4_qp_release_range(dev->dev, qpn, 1);
+ unalloc_qpn_common(dev, qp, init_attr, qpn);
+err_proxy:
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
+ free_proxy_bufs(pd->device, qp);
err_wrid:
if (pd->uobject) {
- if (!init_attr->srq && init_attr->qp_type != IB_QPT_XRC)
- mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context),
- &qp->db);
+ if (qp_has_rq(init_attr))
+ mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
} else {
kfree(qp->sq.wrid);
kfree(qp->rq.wrid);
@@ -702,13 +1101,15 @@ err_buf:
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
err_db:
- if (!pd->uobject && !init_attr->srq && init_attr->qp_type != IB_QPT_XRC)
+ if (!pd->uobject && qp_has_rq(init_attr))
mlx4_db_free(dev->dev, &qp->db);
if (qp->max_inline_data)
mlx4_bf_free(dev->dev, &qp->bf);
err:
+ if (!*caller_qp)
+ kfree(qp);
return err;
}
@@ -727,10 +1128,12 @@ static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)
}
static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+ __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
{
- if (send_cq == recv_cq)
+ if (send_cq == recv_cq) {
spin_lock_irq(&send_cq->lock);
- else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+ __acquire(&recv_cq->lock);
+ } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_lock_irq(&send_cq->lock);
spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
} else {
@@ -740,10 +1143,12 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv
}
static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+ __releases(&send_cq->lock) __releases(&recv_cq->lock)
{
- if (send_cq == recv_cq)
+ if (send_cq == recv_cq) {
+ __release(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
- else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+ } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_unlock(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
} else {
@@ -754,7 +1159,7 @@ static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *re
static void del_gid_entries(struct mlx4_ib_qp *qp)
{
- struct gid_entry *ge, *tmp;
+ struct mlx4_ib_gid_entry *ge, *tmp;
list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
list_del(&ge->list);
@@ -762,19 +1167,66 @@ static void del_gid_entries(struct mlx4_ib_qp *qp)
}
}
+static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp)
+{
+ if (qp->ibqp.qp_type == IB_QPT_XRC_TGT)
+ return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd);
+ else
+ return to_mpd(qp->ibqp.pd);
+}
+
+static void get_cqs(struct mlx4_ib_qp *qp,
+ struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq)
+{
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_XRC_TGT:
+ *send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq);
+ *recv_cq = *send_cq;
+ break;
+ case IB_QPT_XRC_INI:
+ *send_cq = to_mcq(qp->ibqp.send_cq);
+ *recv_cq = *send_cq;
+ break;
+ default:
+ *send_cq = to_mcq(qp->ibqp.send_cq);
+ *recv_cq = to_mcq(qp->ibqp.recv_cq);
+ break;
+ }
+}
+
static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
int is_user)
{
struct mlx4_ib_cq *send_cq, *recv_cq;
- if (qp->state != IB_QPS_RESET)
+ if (qp->state != IB_QPS_RESET) {
if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
- printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",
+ pr_warn("modify QP %06x to RESET failed.\n",
qp->mqp.qpn);
+ if (qp->pri.smac) {
+ mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
+ qp->pri.smac = 0;
+ }
+ if (qp->alt.smac) {
+ mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
+ qp->alt.smac = 0;
+ }
+ if (qp->pri.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid);
+ qp->pri.vid = 0xFFFF;
+ qp->pri.candidate_vid = 0xFFFF;
+ qp->pri.update_vid = 0;
+ }
+ if (qp->alt.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid);
+ qp->alt.vid = 0xFFFF;
+ qp->alt.candidate_vid = 0xFFFF;
+ qp->alt.update_vid = 0;
+ }
+ }
- send_cq = to_mcq(qp->ibqp.send_cq);
- recv_cq = to_mcq(qp->ibqp.recv_cq);
+ get_cqs(qp, &send_cq, &recv_cq);
mlx4_ib_lock_cqs(send_cq, recv_cq);
@@ -791,106 +1243,201 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
mlx4_qp_free(dev->dev, &qp->mqp);
- if (!is_sqp(dev, qp))
- mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
+ if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp))
+ release_qpn_common(dev, qp);
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
if (is_user) {
- if (!qp->ibqp.srq && qp->ibqp.qp_type != IB_QPT_XRC)
+ if (qp->rq.wqe_cnt)
mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
&qp->db);
ib_umem_release(qp->umem);
} else {
kfree(qp->sq.wrid);
kfree(qp->rq.wrid);
+ if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
+ free_proxy_bufs(&dev->ib_dev, qp);
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
if (qp->max_inline_data)
mlx4_bf_free(dev->dev, &qp->bf);
- if (!qp->ibqp.srq && qp->ibqp.qp_type != IB_QPT_XRC)
+
+ if (qp->rq.wqe_cnt)
mlx4_db_free(dev->dev, &qp->db);
}
del_gid_entries(qp);
}
+static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
+{
+ /* Native or PPF */
+ if (!mlx4_is_mfunc(dev->dev) ||
+ (mlx4_is_master(dev->dev) &&
+ attr->create_flags & MLX4_IB_SRIOV_SQP)) {
+ return dev->dev->phys_caps.base_sqpn +
+ (attr->qp_type == IB_QPT_SMI ? 0 : 2) +
+ attr->port_num - 1;
+ }
+ /* PF or VF -- creating proxies */
+ if (attr->qp_type == IB_QPT_SMI)
+ return dev->dev->caps.qp0_proxy[attr->port_num - 1];
+ else
+ return dev->dev->caps.qp1_proxy[attr->port_num - 1];
+}
+
+static int check_qpg_attr(struct mlx4_ib_dev *dev,
+ struct ib_qp_init_attr *attr)
+{
+ if (attr->qpg_type == IB_QPG_NONE)
+ return 0;
+
+ if (attr->qp_type != IB_QPT_UD)
+ return -EINVAL;
+
+ if (attr->qpg_type == IB_QPG_PARENT) {
+ if (attr->parent_attrib.tss_child_count == 1)
+ return -EINVAL; /* Doesn't make sense */
+ if (attr->parent_attrib.rss_child_count == 1)
+ return -EINVAL; /* Doesn't make sense */
+ if ((attr->parent_attrib.tss_child_count == 0) &&
+ (attr->parent_attrib.rss_child_count == 0))
+ /* Should be called with IP_QPG_NONE */
+ return -EINVAL;
+ if (attr->parent_attrib.rss_child_count > 1) {
+ int rss_align_num;
+ if (!(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS))
+ return -ENOSYS;
+ rss_align_num = roundup_pow_of_two(
+ attr->parent_attrib.rss_child_count);
+ if (rss_align_num > dev->dev->caps.max_rss_tbl_sz)
+ return -EINVAL;
+ }
+ } else {
+ struct mlx4_ib_qpg_data *qpg_data;
+ if (attr->qpg_parent == NULL)
+ return -EINVAL;
+ if (IS_ERR(attr->qpg_parent))
+ return -EINVAL;
+ qpg_data = to_mqp(attr->qpg_parent)->qpg_data;
+ if (qpg_data == NULL)
+ return -EINVAL;
+ if (attr->qpg_type == IB_QPG_CHILD_TX &&
+ !qpg_data->tss_child_count)
+ return -EINVAL;
+ if (attr->qpg_type == IB_QPG_CHILD_RX &&
+ !qpg_data->rss_child_count)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+#define RESERVED_FLAGS_MASK ((((unsigned int)IB_QP_CREATE_RESERVED_END - 1) | IB_QP_CREATE_RESERVED_END) \
+ & ~(IB_QP_CREATE_RESERVED_START - 1))
+
+static enum mlx4_ib_qp_flags to_mlx4_ib_qp_flags(enum ib_qp_create_flags ib_qp_flags)
+{
+ enum mlx4_ib_qp_flags mlx4_ib_qp_flags = 0;
+
+ if (ib_qp_flags & IB_QP_CREATE_IPOIB_UD_LSO)
+ mlx4_ib_qp_flags |= MLX4_IB_QP_LSO;
+
+ if (ib_qp_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+ mlx4_ib_qp_flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+
+ if (ib_qp_flags & IB_QP_CREATE_NETIF_QP)
+ mlx4_ib_qp_flags |= MLX4_IB_QP_NETIF;
+
+ /* reserved flags */
+ mlx4_ib_qp_flags |= (ib_qp_flags & RESERVED_FLAGS_MASK);
+
+ return mlx4_ib_qp_flags;
+}
+
struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
- struct mlx4_ib_dev *dev = to_mdev(pd->device);
- struct mlx4_ib_sqp *sqp;
- struct mlx4_ib_qp *qp;
+ struct mlx4_ib_qp *qp = NULL;
int err;
+ u16 xrcdn = 0;
+ enum mlx4_ib_qp_flags mlx4_qp_flags = to_mlx4_ib_qp_flags(init_attr->create_flags);
+ struct ib_device *device;
+ /* see ib_core::ib_create_qp same handling */
+ device = pd ? pd->device : init_attr->xrcd->device;
/*
- * We only support LSO and multicast loopback blocking, and
- * only for kernel UD QPs.
+ * We only support LSO, vendor flag1, and multicast loopback blocking,
+ * and only for kernel UD QPs.
*/
- if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO |
- IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
+ if (mlx4_qp_flags & ~(MLX4_IB_QP_LSO |
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
+ MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP |
+ MLX4_IB_QP_NETIF))
return ERR_PTR(-EINVAL);
+ if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
+ if (init_attr->qp_type != IB_QPT_UD)
+ return ERR_PTR(-EINVAL);
+ }
+
if (init_attr->create_flags &&
- (pd->uobject || init_attr->qp_type != IB_QPT_UD))
+ (udata ||
+ ((mlx4_qp_flags & ~MLX4_IB_SRIOV_SQP) &&
+ init_attr->qp_type != IB_QPT_UD) ||
+ ((mlx4_qp_flags & MLX4_IB_SRIOV_SQP) &&
+ init_attr->qp_type > IB_QPT_GSI)))
return ERR_PTR(-EINVAL);
+ err = check_qpg_attr(to_mdev(device), init_attr);
+ if (err)
+ return ERR_PTR(err);
+
switch (init_attr->qp_type) {
- case IB_QPT_XRC:
- if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+ case IB_QPT_XRC_TGT:
+ pd = to_mxrcd(init_attr->xrcd)->pd;
+ xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
+ init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq;
+ /* fall through */
+ case IB_QPT_XRC_INI:
+ if (!(to_mdev(device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
return ERR_PTR(-ENOSYS);
+ init_attr->recv_cq = init_attr->send_cq;
+ /* fall through */
case IB_QPT_RC:
case IB_QPT_UC:
- case IB_QPT_UD:
- case IB_QPT_RAW_ETH:
- {
+ case IB_QPT_RAW_PACKET:
qp = kzalloc(sizeof *qp, GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
-
- err = create_qp_common(dev, pd, init_attr, udata, 0, qp);
+ qp->pri.vid = qp->alt.vid = 0xFFFF;
+ /* fall through */
+ case IB_QPT_UD:
+ {
+ err = create_qp_common(to_mdev(device), pd, init_attr, udata, 0, &qp);
if (err) {
kfree(qp);
return ERR_PTR(err);
}
- if (init_attr->qp_type == IB_QPT_XRC)
- qp->xrcdn = to_mxrcd(init_attr->xrc_domain)->xrcdn;
- else
- qp->xrcdn = 0;
-
qp->ibqp.qp_num = qp->mqp.qpn;
+ qp->xrcdn = xrcdn;
break;
}
- case IB_QPT_RAW_ETY:
- if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_RAW_ETY))
- return ERR_PTR(-ENOSYS);
case IB_QPT_SMI:
case IB_QPT_GSI:
{
/* Userspace is not allowed to create special QPs: */
- if (pd->uobject) {
- mlx4_ib_dbg("Userspace is not allowed to create special QPs");
+ if (udata)
return ERR_PTR(-EINVAL);
- }
- sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
- if (!sqp)
- return ERR_PTR(-ENOMEM);
-
- qp = &sqp->qp;
-
- err = create_qp_common(dev, pd, init_attr, udata,
- dev->dev->caps.sqp_start +
- (init_attr->qp_type == IB_QPT_RAW_ETY ? 4 :
- (init_attr->qp_type == IB_QPT_SMI ? 0 : 2)) +
- init_attr->port_num - 1,
- qp);
- if (err) {
- kfree(sqp);
+ err = create_qp_common(to_mdev(device), pd, init_attr, udata,
+ get_sqp_num(to_mdev(device), init_attr),
+ &qp);
+ if (err)
return ERR_PTR(err);
- }
qp->port = init_attr->port_num;
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
@@ -898,8 +1445,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
break;
}
default:
- mlx4_ib_dbg("Invalid QP type requested for create_qp (%d)",
- init_attr->qp_type);
+ /* Don't support raw QPs */
return ERR_PTR(-EINVAL);
}
@@ -910,11 +1456,13 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
{
struct mlx4_ib_dev *dev = to_mdev(qp->device);
struct mlx4_ib_qp *mqp = to_mqp(qp);
+ struct mlx4_ib_pd *pd;
if (is_qp0(dev, mqp))
mlx4_CLOSE_PORT(dev->dev, mqp->port);
- destroy_qp_common(dev, mqp, !!qp->pd->uobject);
+ pd = get_pd(mqp);
+ destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
if (is_sqp(dev, mqp))
kfree(to_msqp(mqp));
@@ -924,18 +1472,27 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
return 0;
}
-static int to_mlx4_st(enum ib_qp_type type)
+static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
{
switch (type) {
- case IB_QPT_RC: return MLX4_QP_ST_RC;
- case IB_QPT_UC: return MLX4_QP_ST_UC;
- case IB_QPT_UD: return MLX4_QP_ST_UD;
- case IB_QPT_XRC: return MLX4_QP_ST_XRC;
- case IB_QPT_RAW_ETY:
- case IB_QPT_SMI:
- case IB_QPT_GSI:
- case IB_QPT_RAW_ETH: return MLX4_QP_ST_MLX;
- default: return -1;
+ case MLX4_IB_QPT_RC: return MLX4_QP_ST_RC;
+ case MLX4_IB_QPT_UC: return MLX4_QP_ST_UC;
+ case MLX4_IB_QPT_UD: return MLX4_QP_ST_UD;
+ case MLX4_IB_QPT_XRC_INI:
+ case MLX4_IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC;
+ case MLX4_IB_QPT_SMI:
+ case MLX4_IB_QPT_GSI:
+ case MLX4_IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX;
+
+ case MLX4_IB_QPT_PROXY_SMI_OWNER:
+ case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ?
+ MLX4_QP_ST_MLX : -1);
+ case MLX4_IB_QPT_PROXY_SMI:
+ case MLX4_IB_QPT_TUN_SMI:
+ case MLX4_IB_QPT_PROXY_GSI:
+ case MLX4_IB_QPT_TUN_GSI: return (mlx4_is_mfunc(dev->dev) ?
+ MLX4_QP_ST_UD : -1);
+ default: return -1;
}
}
@@ -986,8 +1543,10 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
}
static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
- struct mlx4_qp_path *path, u8 port)
+ struct mlx4_ib_qp *qp, struct mlx4_qp_path *path,
+ u8 port, int is_primary)
{
+ struct net_device *ndev;
int err;
int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
IB_LINK_LAYER_ETHERNET;
@@ -995,6 +1554,10 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
int is_mcast;
u16 vlan_tag;
int vidx;
+ int smac_index;
+ u64 u64_mac;
+ u8 *smac;
+ struct mlx4_roce_smac_vlan_info *smac_info;
path->grh_mylmc = ah->src_path_bits & 0x7f;
path->rlid = cpu_to_be16(ah->dlid);
@@ -1008,7 +1571,7 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
if (ah->ah_flags & IB_AH_GRH) {
if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {
- printk(KERN_ERR "sgid_index (%u) too large. max is %d\n",
+ pr_err("sgid_index (%u) too large. max is %d\n",
ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1);
return -1;
}
@@ -1023,29 +1586,96 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
}
if (is_eth) {
- path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
- ((port - 1) << 6) | ((ah->sl & 0x7) << 3) | ((ah->sl & 8) >> 1);
-
if (!(ah->ah_flags & IB_AH_GRH))
return -1;
+ path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
+ ((port - 1) << 6) | ((ah->sl & 7) << 3);
+
+ if (is_primary)
+ smac_info = &qp->pri;
+ else
+ smac_info = &qp->alt;
+
+ vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]);
+ if (vlan_tag < 0x1000) {
+ if (smac_info->vid < 0x1000) {
+ /* both valid vlan ids */
+ if (smac_info->vid != vlan_tag) {
+ /* different VIDs. unreg old and reg new */
+ err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx);
+ if (err)
+ return err;
+ smac_info->candidate_vid = vlan_tag;
+ smac_info->candidate_vlan_index = vidx;
+ smac_info->candidate_vlan_port = port;
+ smac_info->update_vid = 1;
+ path->vlan_index = vidx;
+ path->fl = 1 << 6;
+ } else {
+ path->vlan_index = smac_info->vlan_index;
+ path->fl = 1 << 6;
+ }
+ } else {
+ /* no current vlan tag in qp */
+ err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx);
+ if (err)
+ return err;
+ smac_info->candidate_vid = vlan_tag;
+ smac_info->candidate_vlan_index = vidx;
+ smac_info->candidate_vlan_port = port;
+ smac_info->update_vid = 1;
+ path->vlan_index = vidx;
+ path->fl = 1 << 6;
+ }
+ } else {
+ /* have current vlan tag. unregister it at modify-qp success */
+ if (smac_info->vid < 0x1000) {
+ smac_info->candidate_vid = 0xFFFF;
+ smac_info->update_vid = 1;
+ }
+ }
+
err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port);
if (err)
return err;
+ /* get smac_index for RoCE use.
+ * If no smac was yet assigned, register one.
+ * If one was already assigned, but the new mac differs,
+ * unregister the old one and register the new one.
+ */
+ spin_lock(&dev->iboe.lock);
+ ndev = dev->iboe.netdevs[port - 1];
+ if (ndev) {
+#ifdef __linux__
+ smac = ndev->dev_addr; /* fixme: cache this value */
+#else
+ smac = IF_LLADDR(ndev); /* fixme: cache this value */
+#endif
+
+ u64_mac = mlx4_mac_to_u64(smac);
+ } else
+ u64_mac = dev->dev->caps.def_mac[port];
+ spin_unlock(&dev->iboe.lock);
+
+ if (!smac_info->smac || smac_info->smac != u64_mac) {
+ /* register candidate now, unreg if needed, after success */
+ smac_index = mlx4_register_mac(dev->dev, port, u64_mac);
+ if (smac_index >= 0) {
+ smac_info->candidate_smac_index = smac_index;
+ smac_info->candidate_smac = u64_mac;
+ smac_info->candidate_smac_port = port;
+ } else
+ return -EINVAL;
+ } else
+ smac_index = smac_info->smac_index;
+
memcpy(path->dmac, mac, 6);
path->ackto = MLX4_IB_LINK_TYPE_ETH;
- /* use index 0 into MAC table for IBoE */
- path->grh_mylmc &= 0x80;
-
- vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]);
- if (vlan_tag < 0x1000) {
- if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx))
- return -ENOENT;
+ /* put MAC table smac index for IBoE */
+ path->grh_mylmc = (u8) (smac_index) | 0x80 ;
- path->vlan_index = vidx;
- path->fl = 1 << 6;
- }
} else
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
((port - 1) << 6) | ((ah->sl & 0xf) << 2);
@@ -1055,7 +1685,7 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
- struct gid_entry *ge, *tmp;
+ struct mlx4_ib_gid_entry *ge, *tmp;
list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) {
@@ -1065,23 +1695,59 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
}
}
+static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
+ struct mlx4_qp_context *context)
+{
+ struct net_device *ndev;
+ u64 u64_mac;
+ u8 *smac;
+ int smac_index;
+
+ ndev = dev->iboe.netdevs[qp->port - 1];
+ if (ndev) {
+#ifdef __linux__
+ smac = ndev->dev_addr; /* fixme: cache this value */
+#else
+ smac = IF_LLADDR(ndev); /* fixme: cache this value */
+#endif
+ u64_mac = mlx4_mac_to_u64(smac);
+ } else
+ u64_mac = dev->dev->caps.def_mac[qp->port];
+
+ context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6);
+ if (!qp->pri.smac) {
+ smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac);
+ if (smac_index >= 0) {
+ qp->pri.candidate_smac_index = smac_index;
+ qp->pri.candidate_smac = u64_mac;
+ qp->pri.candidate_smac_port = qp->port;
+ context->pri_path.grh_mylmc = 0x80 | (u8) smac_index;
+ } else
+ return -ENOENT;
+ }
+ return 0;
+}
static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
{
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ struct mlx4_ib_pd *pd;
+ struct mlx4_ib_cq *send_cq, *recv_cq;
struct mlx4_qp_context *context;
enum mlx4_qp_optpar optpar = 0;
int sqd_event;
+ int steer_qp = 0;
int err = -EINVAL;
+ int is_eth = -1;
context = kzalloc(sizeof *context, GFP_KERNEL);
if (!context)
return -ENOMEM;
context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
- (to_mlx4_st(ibqp->qp_type) << 16));
+ (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16));
if (!(attr_mask & IB_QP_PATH_MIG_STATE))
context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@@ -1099,11 +1765,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
break;
}
}
- if (ibqp->qp_type == IB_QPT_RAW_ETH)
- context->mtu_msgmax = 0xff;
- else if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
- ibqp->qp_type == IB_QPT_RAW_ETY)
+
+ if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
+ else if (ibqp->qp_type == IB_QPT_RAW_PACKET)
+ context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX;
else if (ibqp->qp_type == IB_QPT_UD) {
if (qp->flags & MLX4_IB_QP_LSO)
context->mtu_msgmax = (IB_MTU_4096 << 5) |
@@ -1112,7 +1778,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
} else if (attr_mask & IB_QP_PATH_MTU) {
if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
- printk(KERN_ERR "path MTU (%u) is invalid\n",
+ pr_err("path MTU (%u) is invalid\n",
attr->path_mtu);
goto out;
}
@@ -1130,8 +1796,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
- if (ibqp->qp_type == IB_QPT_XRC)
- context->xrcd = cpu_to_be32((u32) qp->xrcdn);
+ context->xrcd = cpu_to_be32((u32) qp->xrcdn);
+ context->param3 |= cpu_to_be32(1 << 30);
}
if (qp->ibqp.uobject)
@@ -1150,63 +1816,67 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
- if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR &&
- dev->counters[qp->port - 1] != -1) {
- context->pri_path.counter_index = dev->counters[qp->port - 1];
- optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
+ if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+ if (dev->counters[qp->port - 1] != -1) {
+ context->pri_path.counter_index =
+ dev->counters[qp->port - 1];
+ optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
+ } else
+ context->pri_path.counter_index = 0xff;
+
+ if (qp->flags & MLX4_IB_QP_NETIF &&
+ (qp->qpg_type == IB_QPG_NONE || qp->qpg_type == IB_QPG_PARENT)) {
+ mlx4_ib_steer_qp_reg(dev, qp, 1);
+ steer_qp = 1;
+ }
}
if (attr_mask & IB_QP_PKEY_INDEX) {
+ if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
+ context->pri_path.disable_pkey_check = 0x40;
context->pri_path.pkey_index = attr->pkey_index;
optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
}
if (attr_mask & IB_QP_AV) {
- if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
- attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) {
- mlx4_ib_dbg("qpn 0x%x: could not set pri path params",
- ibqp->qp_num);
+ if (mlx4_set_path(dev, &attr->ah_attr, qp, &context->pri_path,
+ attr_mask & IB_QP_PORT ?
+ attr->port_num : qp->port, 1))
goto out;
- }
optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
MLX4_QP_OPTPAR_SCHED_QUEUE);
}
if (attr_mask & IB_QP_TIMEOUT) {
- context->pri_path.ackto |= (attr->timeout << 3);
+ context->pri_path.ackto |= attr->timeout << 3;
optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
}
if (attr_mask & IB_QP_ALT_PATH) {
if (attr->alt_port_num == 0 ||
- attr->alt_port_num > dev->num_ports) {
- mlx4_ib_dbg("qpn 0x%x: invalid alternate port num (%d)",
- ibqp->qp_num, attr->alt_port_num);
+ attr->alt_port_num > dev->dev->caps.num_ports)
goto out;
- }
if (attr->alt_pkey_index >=
- dev->dev->caps.pkey_table_len[attr->alt_port_num]) {
- mlx4_ib_dbg("qpn 0x%x: invalid alt pkey index (0x%x)",
- ibqp->qp_num, attr->alt_pkey_index);
+ dev->dev->caps.pkey_table_len[attr->alt_port_num])
goto out;
- }
- if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
- attr->alt_port_num)) {
- mlx4_ib_dbg("qpn 0x%x: could not set alt path params",
- ibqp->qp_num);
+ if (mlx4_set_path(dev, &attr->alt_ah_attr, qp, &context->alt_path,
+ attr->alt_port_num, 0))
goto out;
- }
context->alt_path.pkey_index = attr->alt_pkey_index;
context->alt_path.ackto = attr->alt_timeout << 3;
optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
}
- context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
- context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
+ pd = get_pd(qp);
+ get_cqs(qp, &send_cq, &recv_cq);
+ context->pd = cpu_to_be32(pd->pdn);
+ context->cqn_send = cpu_to_be32(send_cq->mcq.cqn);
+ context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn);
+ context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
/* Set "fast registration enabled" for all kernel QPs */
if (!qp->ibqp.uobject)
@@ -1232,8 +1902,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (attr_mask & IB_QP_SQ_PSN)
context->next_send_psn = cpu_to_be32(attr->sq_psn);
- context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);
-
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
if (attr->max_dest_rd_atomic)
context->params2 |=
@@ -1246,6 +1914,18 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;
}
+ if (attr_mask & IB_M_EXT_CLASS_1)
+ context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_MASTER);
+
+ /* for now we enable also sqe on send */
+ if (attr_mask & IB_M_EXT_CLASS_2) {
+ context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_SYNC_SQ);
+ context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_MASTER);
+ }
+
+ if (attr_mask & IB_M_EXT_CLASS_3)
+ context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_SYNC_RQ);
+
if (ibqp->srq)
context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
@@ -1256,30 +1936,65 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (attr_mask & IB_QP_RQ_PSN)
context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
- context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);
-
+ /* proxy and tunnel qp qkeys will be changed in modify-qp wrappers */
if (attr_mask & IB_QP_QKEY) {
- context->qkey = cpu_to_be32(attr->qkey);
+ if (qp->mlx4_ib_qp_type &
+ (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))
+ context->qkey = cpu_to_be32(IB_QP_SET_QKEY);
+ else {
+ if (mlx4_is_mfunc(dev->dev) &&
+ !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) &&
+ (attr->qkey & MLX4_RESERVED_QKEY_MASK) ==
+ MLX4_RESERVED_QKEY_BASE) {
+ pr_err("Cannot use reserved QKEY"
+ " 0x%x (range 0xffff0000..0xffffffff"
+ " is reserved)\n", attr->qkey);
+ err = -EINVAL;
+ goto out;
+ }
+ context->qkey = cpu_to_be32(attr->qkey);
+ }
optpar |= MLX4_QP_OPTPAR_Q_KEY;
}
if (ibqp->srq)
context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
- if (!ibqp->srq && ibqp->qp_type != IB_QPT_XRC &&
- cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
context->db_rec_addr = cpu_to_be64(qp->db.dma);
if (cur_state == IB_QPS_INIT &&
new_state == IB_QPS_RTR &&
(ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
- ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == IB_QPT_RAW_ETY ||
- ibqp->qp_type == IB_QPT_RAW_ETH)) {
+ ibqp->qp_type == IB_QPT_UD ||
+ ibqp->qp_type == IB_QPT_RAW_PACKET)) {
context->pri_path.sched_queue = (qp->port - 1) << 6;
- if (is_qp0(dev, qp))
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
+ qp->mlx4_ib_qp_type &
+ (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) {
context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
- else
+ if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI)
+ context->pri_path.fl = 0x80;
+ } else {
+ if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
+ context->pri_path.fl = 0x80;
context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
+ }
+ is_eth = rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
+ IB_LINK_LAYER_ETHERNET;
+ if (is_eth) {
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI)
+ context->pri_path.feup = 1 << 7; /* don't fsm */
+ /* handle smac_index */
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) {
+ err = handle_eth_ud_smac_index(dev, qp, context);
+ if (err)
+ return -EINVAL;
+ }
+ }
}
if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
@@ -1291,6 +2006,43 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
context->rlkey |= (1 << 4);
+ if ((attr_mask & IB_QP_GROUP_RSS) &&
+ (qp->qpg_data->rss_child_count > 1)) {
+ struct mlx4_ib_qpg_data *qpg_data = qp->qpg_data;
+ void *rss_context_base = &context->pri_path;
+ struct mlx4_rss_context *rss_context =
+ (struct mlx4_rss_context *) (rss_context_base
+ + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH);
+
+ context->flags |= cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET);
+
+ /* This should be tbl_sz_base_qpn */
+ rss_context->base_qpn = cpu_to_be32(qpg_data->rss_qpn_base |
+ (ilog2(qpg_data->rss_child_count) << 24));
+ rss_context->default_qpn = cpu_to_be32(qpg_data->rss_qpn_base);
+ /* This should be flags_hash_fn */
+ rss_context->flags = MLX4_RSS_TCP_IPV6 |
+ MLX4_RSS_TCP_IPV4;
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UDP_RSS) {
+ rss_context->base_qpn_udp = rss_context->default_qpn;
+ rss_context->flags |= MLX4_RSS_IPV6 |
+ MLX4_RSS_IPV4 |
+ MLX4_RSS_UDP_IPV6 |
+ MLX4_RSS_UDP_IPV4;
+ }
+ if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP) {
+ static const u32 rsskey[10] = { 0xD181C62C, 0xF7F4DB5B,
+ 0x1983A2FC, 0x943E1ADB, 0xD9389E6B, 0xD1039C2C,
+ 0xA74499AD, 0x593D56D9, 0xF3253C06, 0x2ADC1FFC};
+ rss_context->hash_fn = MLX4_RSS_HASH_TOP;
+ memcpy(rss_context->rss_key, rsskey,
+ sizeof(rss_context->rss_key));
+ } else {
+ rss_context->hash_fn = MLX4_RSS_HASH_XOR;
+ memset(rss_context->rss_key, 0,
+ sizeof(rss_context->rss_key));
+ }
+ }
/*
* Before passing a kernel QP to the HW, make sure that the
* ownership bits of the send queue are set and the SQ
@@ -1333,6 +2085,29 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (is_sqp(dev, qp))
store_sqp_attrs(to_msqp(qp), attr, attr_mask);
+ /* Set 'ignore_cq_overrun' bits for collectives offload */
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ if (attr_mask & (IB_M_EXT_CLASS_2 | IB_M_EXT_CLASS_3)) {
+ err = mlx4_ib_ignore_overrun_cq(ibqp->send_cq);
+ if (err) {
+ pr_err("Failed to set ignore CQ "
+ "overrun for QP 0x%x's send CQ\n",
+ ibqp->qp_num);
+ goto out;
+ }
+
+ if (ibqp->recv_cq != ibqp->send_cq) {
+ err = mlx4_ib_ignore_overrun_cq(ibqp->recv_cq);
+ if (err) {
+ pr_err("Failed to set ignore "
+ "CQ overrun for QP 0x%x's recv "
+ "CQ\n", ibqp->qp_num);
+ goto out;
+ }
+ }
+ }
+ }
+
/*
* If we moved QP0 to RTR, bring the IB link up; if we moved
* QP0 to RESET or ERROR, bring the link back down.
@@ -1340,7 +2115,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (is_qp0(dev, qp)) {
if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR)
if (mlx4_INIT_PORT(dev->dev, qp->port))
- printk(KERN_WARNING "INIT_PORT failed for port %d\n",
+ pr_warn("INIT_PORT failed for port %d\n",
qp->port);
if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
@@ -1352,23 +2127,120 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
- if (new_state == IB_QPS_RESET && !ibqp->uobject) {
- mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,
- ibqp->srq ? to_msrq(ibqp->srq): NULL);
- if (ibqp->send_cq != ibqp->recv_cq)
- mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);
+ if (new_state == IB_QPS_RESET) {
+ if (!ibqp->uobject) {
+ mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+ if (send_cq != recv_cq)
+ mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+
+ qp->rq.head = 0;
+ qp->rq.tail = 0;
+ qp->sq.head = 0;
+ qp->sq.tail = 0;
+ qp->sq_next_wqe = 0;
+ if (qp->rq.wqe_cnt)
+ *qp->db.db = 0;
+
+ if (qp->flags & MLX4_IB_QP_NETIF &&
+ (qp->qpg_type == IB_QPG_NONE ||
+ qp->qpg_type == IB_QPG_PARENT))
+ mlx4_ib_steer_qp_reg(dev, qp, 0);
+ }
+ if (qp->pri.smac) {
+ mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
+ qp->pri.smac = 0;
+ }
+ if (qp->alt.smac) {
+ mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
+ qp->alt.smac = 0;
+ }
+ if (qp->pri.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid);
+ qp->pri.vid = 0xFFFF;
+ qp->pri.candidate_vid = 0xFFFF;
+ qp->pri.update_vid = 0;
+ }
- qp->rq.head = 0;
- qp->rq.tail = 0;
- qp->sq.head = 0;
- qp->sq.tail = 0;
- qp->sq_next_wqe = 0;
- if (!ibqp->srq && ibqp->qp_type != IB_QPT_XRC)
- *qp->db.db = 0;
+ if (qp->alt.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid);
+ qp->alt.vid = 0xFFFF;
+ qp->alt.candidate_vid = 0xFFFF;
+ qp->alt.update_vid = 0;
+ }
}
out:
+ if (err && steer_qp)
+ mlx4_ib_steer_qp_reg(dev, qp, 0);
kfree(context);
+ if (qp->pri.candidate_smac) {
+ if (err)
+ mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac);
+ else {
+ if (qp->pri.smac) {
+ mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
+ }
+ qp->pri.smac = qp->pri.candidate_smac;
+ qp->pri.smac_index = qp->pri.candidate_smac_index;
+ qp->pri.smac_port = qp->pri.candidate_smac_port;
+
+ }
+ qp->pri.candidate_smac = 0;
+ qp->pri.candidate_smac_index = 0;
+ qp->pri.candidate_smac_port = 0;
+ }
+ if (qp->alt.candidate_smac) {
+ if (err)
+ mlx4_unregister_mac(dev->dev, qp->alt.candidate_smac_port, qp->pri.candidate_smac);
+ else {
+ if (qp->pri.smac) {
+ mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
+ }
+ qp->alt.smac = qp->alt.candidate_smac;
+ qp->alt.smac_index = qp->alt.candidate_smac_index;
+ qp->alt.smac_port = qp->alt.candidate_smac_port;
+
+ }
+ qp->pri.candidate_smac = 0;
+ qp->pri.candidate_smac_index = 0;
+ qp->pri.candidate_smac_port = 0;
+ }
+
+ if (qp->pri.update_vid) {
+ if (err) {
+ if (qp->pri.candidate_vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->pri.candidate_vlan_port,
+ qp->pri.candidate_vid);
+ } else {
+ if (qp->pri.vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port,
+ qp->pri.vid);
+ qp->pri.vid = qp->pri.candidate_vid;
+ qp->pri.vlan_port = qp->pri.candidate_vlan_port;
+ qp->pri.vlan_index = qp->pri.candidate_vlan_index;
+ }
+ qp->pri.candidate_vid = 0xFFFF;
+ qp->pri.update_vid = 0;
+ }
+
+ if (qp->alt.update_vid) {
+ if (err) {
+ if (qp->alt.candidate_vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->alt.candidate_vlan_port,
+ qp->alt.candidate_vid);
+ } else {
+ if (qp->alt.vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port,
+ qp->alt.vid);
+ qp->alt.vid = qp->alt.candidate_vid;
+ qp->alt.vlan_port = qp->alt.candidate_vlan_port;
+ qp->alt.vlan_index = qp->alt.candidate_vlan_index;
+ }
+ qp->alt.candidate_vid = 0xFFFF;
+ qp->alt.update_vid = 0;
+ }
+
return err;
}
@@ -1385,59 +2257,62 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
- mlx4_ib_dbg("qpn 0x%x: invalid attribute mask specified "
- "for transition %d to %d. qp_type %d, attr_mask 0x%x",
- ibqp->qp_num, cur_state, new_state,
- ibqp->qp_type, attr_mask);
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+ attr_mask & ~IB_M_QP_MOD_VEND_MASK)) {
+ pr_debug("qpn 0x%x: invalid attribute mask specified "
+ "for transition %d to %d. qp_type %d,"
+ " attr_mask 0x%x\n",
+ ibqp->qp_num, cur_state, new_state,
+ ibqp->qp_type, attr_mask);
goto out;
}
- if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type != IB_QPT_RAW_ETH) &&
- (attr->port_num == 0 || attr->port_num > dev->num_ports)) {
- mlx4_ib_dbg("qpn 0x%x: invalid port number (%d) specified "
- "for transition %d to %d. qp_type %d",
- ibqp->qp_num, attr->port_num, cur_state,
- new_state, ibqp->qp_type);
+ if ((attr_mask & IB_M_QP_MOD_VEND_MASK) && !dev->dev->caps.sync_qp) {
+ pr_err("extended verbs are not supported by %s\n",
+ dev->ib_dev.name);
goto out;
}
- if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_ETH) &&
- (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num)
- != IB_LINK_LAYER_ETHERNET)) {
- mlx4_ib_dbg("qpn 0x%x: invalid port (%d) specified (not RDMAoE)"
- "for transition %d to %d. qp_type %d",
- ibqp->qp_num, attr->port_num, cur_state,
- new_state, ibqp->qp_type);
+ if ((attr_mask & IB_QP_PORT) &&
+ (attr->port_num == 0 || attr->port_num > dev->num_ports)) {
+ pr_debug("qpn 0x%x: invalid port number (%d) specified "
+ "for transition %d to %d. qp_type %d\n",
+ ibqp->qp_num, attr->port_num, cur_state,
+ new_state, ibqp->qp_type);
goto out;
}
+ if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) &&
+ (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) !=
+ IB_LINK_LAYER_ETHERNET))
+ goto out;
+
if (attr_mask & IB_QP_PKEY_INDEX) {
int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) {
- mlx4_ib_dbg("qpn 0x%x: invalid pkey index (%d) specified "
- "for transition %d to %d. qp_type %d",
- ibqp->qp_num, attr->pkey_index, cur_state,
- new_state, ibqp->qp_type);
+ pr_debug("qpn 0x%x: invalid pkey index (%d) specified "
+ "for transition %d to %d. qp_type %d\n",
+ ibqp->qp_num, attr->pkey_index, cur_state,
+ new_state, ibqp->qp_type);
goto out;
}
}
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
- mlx4_ib_dbg("qpn 0x%x: max_rd_atomic (%d) too large. "
- "Transition %d to %d. qp_type %d",
- ibqp->qp_num, attr->max_rd_atomic, cur_state,
- new_state, ibqp->qp_type);
+ pr_debug("qpn 0x%x: max_rd_atomic (%d) too large. "
+ "Transition %d to %d. qp_type %d\n",
+ ibqp->qp_num, attr->max_rd_atomic, cur_state,
+ new_state, ibqp->qp_type);
goto out;
}
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {
- mlx4_ib_dbg("qpn 0x%x: max_dest_rd_atomic (%d) too large. "
- "Transition %d to %d. qp_type %d",
- ibqp->qp_num, attr->max_dest_rd_atomic, cur_state,
- new_state, ibqp->qp_type);
+ pr_debug("qpn 0x%x: max_dest_rd_atomic (%d) too large. "
+ "Transition %d to %d. qp_type %d\n",
+ ibqp->qp_num, attr->max_dest_rd_atomic, cur_state,
+ new_state, ibqp->qp_type);
goto out;
}
@@ -1453,85 +2328,163 @@ out:
return err;
}
-static int build_raw_ety_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
- void *wqe, unsigned *mlx_seg_len)
+static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
+ struct ib_send_wr *wr,
+ void *wqe, unsigned *mlx_seg_len)
{
- int payload = 0;
- int header_size, packet_length;
+ struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
+ struct ib_device *ib_dev = &mdev->ib_dev;
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
- u32 *lrh = wqe + sizeof *mlx + sizeof *inl;
+ struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ u16 pkey;
+ u32 qkey;
+ int send_size;
+ int header_size;
+ int spc;
int i;
- /* Only IB_WR_SEND is supported */
if (wr->opcode != IB_WR_SEND)
return -EINVAL;
+ send_size = 0;
+
for (i = 0; i < wr->num_sge; ++i)
- payload += wr->sg_list[i].length;
+ send_size += wr->sg_list[i].length;
+
+ /* for proxy-qp0 sends, need to add in size of tunnel header */
+ /* for tunnel-qp0 sends, tunnel header is already in s/g list */
+ if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
+ send_size += sizeof (struct mlx4_ib_tunnel_header);
- header_size = IB_LRH_BYTES + 4; /* LRH + RAW_HEADER (32 bits) */
+ ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
- /* headers + payload and round up */
- packet_length = (header_size + payload + 3) / 4;
+ if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
+ sqp->ud_header.lrh.service_level =
+ be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
+ sqp->ud_header.lrh.destination_lid =
+ cpu_to_be16(ah->av.ib.g_slid & 0x7f);
+ sqp->ud_header.lrh.source_lid =
+ cpu_to_be16(ah->av.ib.g_slid & 0x7f);
+ }
mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
- mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_ICRC |
- (wr->wr.raw_ety.lrh->service_level << 8));
+ /* force loopback */
+ mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR);
+ mlx->rlid = sqp->ud_header.lrh.destination_lid;
- mlx->rlid = wr->wr.raw_ety.lrh->destination_lid;
+ sqp->ud_header.lrh.virtual_lane = 0;
+ sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+ ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
+ sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
+ if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
+ sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ else
+ sqp->ud_header.bth.destination_qpn =
+ cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
- wr->wr.raw_ety.lrh->packet_length = cpu_to_be16(packet_length);
+ sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
+ if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ return -EINVAL;
+ sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
- ib_lrh_header_pack(wr->wr.raw_ety.lrh, lrh);
- lrh += IB_LRH_BYTES / 4; /* LRH size is a dword multiple */
- *lrh = cpu_to_be32(wr->wr.raw_ety.eth_type);
+ sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+ sqp->ud_header.immediate_present = 0;
- inl->byte_count = cpu_to_be32(1 << 31 | header_size);
+ header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
- *mlx_seg_len =
- ALIGN(sizeof(struct mlx4_wqe_inline_seg) + header_size, 16);
+ /*
+ * Inline data segments may not cross a 64 byte boundary. If
+ * our UD header is bigger than the space available up to the
+ * next 64 byte boundary in the WQE, use two inline data
+ * segments to hold the UD header.
+ */
+ spc = MLX4_INLINE_ALIGN -
+ ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
+ if (header_size <= spc) {
+ inl->byte_count = cpu_to_be32(1 << 31 | header_size);
+ memcpy(inl + 1, sqp->header_buf, header_size);
+ i = 1;
+ } else {
+ inl->byte_count = cpu_to_be32(1 << 31 | spc);
+ memcpy(inl + 1, sqp->header_buf, spc);
+ inl = (void *) (inl + 1) + spc;
+ memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
+ /*
+ * Need a barrier here to make sure all the data is
+ * visible before the byte_count field is set.
+ * Otherwise the HCA prefetcher could grab the 64-byte
+ * chunk with this inline segment and get a valid (!=
+ * 0xffffffff) byte count but stale data, and end up
+ * generating a packet with bad headers.
+ *
+ * The first inline segment's byte_count field doesn't
+ * need a barrier, because it comes after a
+ * control/MLX segment and therefore is at an offset
+ * of 16 mod 64.
+ */
+ wmb();
+ inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
+ i = 2;
+ }
+
+ *mlx_seg_len =
+ ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
return 0;
}
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
- struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;
+ struct ib_device *ib_dev = sqp->qp.ibqp.device;
struct mlx4_wqe_mlx_seg *mlx = wqe;
+ struct mlx4_wqe_ctrl_seg *ctrl = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ union ib_gid sgid;
u16 pkey;
int send_size;
int header_size;
int spc;
int i;
- union ib_gid sgid;
int is_eth;
- int is_grh;
int is_vlan = 0;
- int err;
- u16 vlan;
+ int is_grh;
+ u16 vlan = 0;
+ int err = 0;
- vlan = 0;
send_size = 0;
for (i = 0; i < wr->num_sge; ++i)
send_size += wr->sg_list[i].length;
is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
- err = ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index, &sgid);
- if (err)
- return err;
if (is_eth) {
- is_vlan = rdma_get_vlan_id(&sgid) < 0x1000;
+ if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
+ /* When multi-function is enabled, the ib_core gid
+ * indexes don't necessarily match the hw ones, so
+ * we must use our own cache */
+ err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index, &sgid.raw[0]);
+ if (err)
+ return err;
+ } else {
+ err = ib_get_cached_gid(ib_dev,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index, &sgid);
+ if (err)
+ return err;
+ }
+
vlan = rdma_get_vlan_id(&sgid);
+ is_vlan = vlan < 0x1000;
}
-
ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
+
if (!is_eth) {
sqp->ud_header.lrh.service_level =
be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
@@ -1545,8 +2498,25 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
sqp->ud_header.grh.flow_label =
ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
- ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid);
+ if (is_eth)
+ memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
+ else {
+ if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
+ /* When multi-function is enabled, the ib_core gid
+ * indexes don't necessarily match the hw ones, so
+ * we must use our own cache */
+ sqp->ud_header.grh.source_gid.global.subnet_prefix =
+ to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
+ subnet_prefix;
+ sqp->ud_header.grh.source_gid.global.interface_id =
+ to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
+ guid_cache[ah->av.ib.gid_index];
+ } else
+ ib_get_cached_gid(ib_dev,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index,
+ &sqp->ud_header.grh.source_gid);
+ }
memcpy(sqp->ud_header.grh.destination_gid.raw,
ah->av.ib.dgid, 16);
}
@@ -1558,16 +2528,18 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
(sqp->ud_header.lrh.destination_lid ==
IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
(sqp->ud_header.lrh.service_level << 8));
+ if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
+ mlx->flags |= cpu_to_be32(0x1); /* force loopback */
mlx->rlid = sqp->ud_header.lrh.destination_lid;
}
switch (wr->opcode) {
case IB_WR_SEND:
- sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+ sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
sqp->ud_header.immediate_present = 0;
break;
case IB_WR_SEND_WITH_IMM:
- sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
sqp->ud_header.immediate_present = 1;
sqp->ud_header.immediate_data = wr->ex.imm_data;
break;
@@ -1576,24 +2548,26 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
}
if (is_eth) {
- u8 *smac;
+ u8 smac[6];
+ struct in6_addr in6;
+
+ u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
+
+ mlx->sched_prio = cpu_to_be16(pcp);
memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
-#ifdef __linux__
- smac = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]->dev_addr; /* fixme: cache this value */
-#else
- smac = IF_LLADDR(to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]); /* fixme: cache this value */
-#endif
+ /* FIXME: cache smac value? */
+ memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
+ memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
+ memcpy(&in6, sgid.raw, sizeof(in6));
+ rdma_get_ll_mac(&in6, smac);
memcpy(sqp->ud_header.eth.smac_h, smac, 6);
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
- if (!is_vlan)
- sqp->ud_header.eth.type = cpu_to_be16(MLX4_IBOE_ETHERTYPE);
- else {
- u16 pcp;
-
- sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IBOE_ETHERTYPE);
- pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 27 & 3) << 13;
+ if (!is_vlan) {
+ sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+ } else {
+ sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
}
} else {
@@ -1616,16 +2590,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
if (0) {
- printk(KERN_ERR "built UD header of size %d:\n", header_size);
+ pr_err("built UD header of size %d:\n", header_size);
for (i = 0; i < header_size / 4; ++i) {
if (i % 8 == 0)
- printk(" [%02x] ", i * 4);
- printk(" %08x",
- be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
+ pr_err(" [%02x] ", i * 4);
+ pr_cont(" %08x",
+ be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
if ((i + 1) % 8 == 0)
- printk("\n");
+ pr_cont("\n");
}
- printk("\n");
+ pr_err("\n");
}
/*
@@ -1635,7 +2609,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
* segments to hold the UD header.
*/
spc = MLX4_INLINE_ALIGN -
- ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
+ ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
if (header_size <= spc) {
inl->byte_count = cpu_to_be32(1 << 31 | header_size);
memcpy(inl + 1, sqp->header_buf, header_size);
@@ -1665,7 +2639,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
}
*mlx_seg_len =
- ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
+ ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
return 0;
}
@@ -1757,14 +2731,70 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
}
static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
- struct ib_send_wr *wr, __be16 *vlan)
+ struct ib_send_wr *wr)
{
memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
- *vlan = dseg->vlan;
+}
+
+static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
+ struct mlx4_wqe_datagram_seg *dseg,
+ struct ib_send_wr *wr, enum ib_qp_type qpt)
+{
+ union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
+ struct mlx4_av sqp_av = {0};
+ int port = *((u8 *) &av->ib.port_pd) & 0x3;
+
+ /* force loopback */
+ sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000);
+ sqp_av.g_slid = av->ib.g_slid & 0x7f; /* no GRH */
+ sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel &
+ cpu_to_be32(0xf0000000);
+
+ memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
+ /* This function used only for sending on QP1 proxies */
+ dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+ /* Use QKEY from the QP context, which is set by master */
+ dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
+}
+
+static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len)
+{
+ struct mlx4_wqe_inline_seg *inl = wqe;
+ struct mlx4_ib_tunnel_header hdr;
+ struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ int spc;
+ int i;
+
+ memcpy(&hdr.av, &ah->av, sizeof hdr.av);
+ hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
+ hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+
+ spc = MLX4_INLINE_ALIGN -
+ ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
+ if (sizeof (hdr) <= spc) {
+ memcpy(inl + 1, &hdr, sizeof (hdr));
+ wmb();
+ inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr));
+ i = 1;
+ } else {
+ memcpy(inl + 1, &hdr, spc);
+ wmb();
+ inl->byte_count = cpu_to_be32(1 << 31 | spc);
+
+ inl = (void *) (inl + 1) + spc;
+ memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc);
+ wmb();
+ inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc));
+ i = 2;
+ }
+
+ *mlx_seg_len =
+ ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16);
}
static void set_mlx_icrc_seg(void *dseg)
@@ -1814,11 +2844,12 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
- __be32 *lso_hdr_sz, int *blh)
+ __be32 *lso_hdr_sz, __be32 *blh)
{
unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
- *blh = unlikely(halign > 64) ? 1 : 0;
+ if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
+ *blh = cpu_to_be32(1 << 6);
if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
wr->num_sge > qp->sq.max_gs - (halign >> 4)))
@@ -1847,6 +2878,13 @@ static __be32 send_ieth(struct ib_send_wr *wr)
}
}
+static void add_zero_len_inline(void *wqe)
+{
+ struct mlx4_wqe_inline_seg *inl = wqe;
+ memset(wqe, 0, 16);
+ inl->byte_count = cpu_to_be32(1 << 31);
+}
+
static int lay_inline_data(struct mlx4_ib_qp *qp, struct ib_send_wr *wr,
void *wqe, int *sz)
{
@@ -1923,7 +2961,8 @@ static int lay_inline_data(struct mlx4_ib_qp *qp, struct ib_send_wr *wr,
* implementations may use move-string-buffer assembler instructions,
* which do not guarantee order of copying.
*/
-static void mlx4_bf_copy(unsigned long *dst, unsigned long *src, unsigned bytecnt)
+static void mlx4_bf_copy(unsigned long *dst, unsigned long *src,
+ unsigned bytecnt)
{
__iowrite64_copy(dst, src, bytecnt / 8);
}
@@ -1933,7 +2972,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
{
struct mlx4_ib_qp *qp = to_mqp(ibqp);
void *wqe;
- struct mlx4_wqe_ctrl_seg *ctrl;
+ struct mlx4_wqe_ctrl_seg *uninitialized_var(ctrl);
struct mlx4_wqe_data_seg *dseg;
unsigned long flags;
int nreq;
@@ -1945,29 +2984,24 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
__be32 dummy;
__be32 *lso_wqe;
__be32 uninitialized_var(lso_hdr_sz);
+ __be32 blh;
int i;
- int blh = 0;
- __be16 vlan = 0;
int inl = 0;
-
- ctrl = NULL;
spin_lock_irqsave(&qp->sq.lock, flags);
ind = qp->sq_next_wqe;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
lso_wqe = &dummy;
+ blh = 0;
if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
- mlx4_ib_dbg("QP 0x%x: WQE overflow", ibqp->qp_num);
err = -ENOMEM;
*bad_wr = wr;
goto out;
}
if (unlikely(wr->num_sge > qp->sq.max_gs)) {
- mlx4_ib_dbg("QP 0x%x: too many sg entries (%d)",
- ibqp->qp_num, wr->num_sge);
err = -EINVAL;
*bad_wr = wr;
goto out;
@@ -1992,13 +3026,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
wqe += sizeof *ctrl;
size = sizeof *ctrl / 16;
- switch (ibqp->qp_type) {
- case IB_QPT_XRC:
- ctrl->srcrb_flags |=
- cpu_to_be32(wr->xrc_remote_srq_num << 8);
- /* fall thru */
- case IB_QPT_RC:
- case IB_QPT_UC:
+ switch (qp->mlx4_ib_qp_type) {
+ case MLX4_IB_QPT_RC:
+ case MLX4_IB_QPT_UC:
switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
@@ -2059,8 +3089,26 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
break;
- case IB_QPT_UD:
- set_datagram_seg(wqe, wr, &vlan);
+ case MLX4_IB_QPT_TUN_SMI_OWNER:
+ err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ goto out;
+ }
+ wqe += seglen;
+ size += seglen / 16;
+ break;
+ case MLX4_IB_QPT_TUN_SMI:
+ case MLX4_IB_QPT_TUN_GSI:
+ /* this is a UD qp used in MAD responses to slaves. */
+ set_datagram_seg(wqe, wr);
+ /* set the forced-loopback bit in the data seg av */
+ *(__be32 *) wqe |= cpu_to_be32(0x80000000);
+ wqe += sizeof (struct mlx4_wqe_datagram_seg);
+ size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
+ break;
+ case MLX4_IB_QPT_UD:
+ set_datagram_seg(wqe, wr);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
@@ -2076,20 +3124,48 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
break;
- case IB_QPT_SMI:
- case IB_QPT_GSI:
- err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
+ case MLX4_IB_QPT_PROXY_SMI_OWNER:
+ if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) {
+ err = -ENOSYS;
+ *bad_wr = wr;
+ goto out;
+ }
+ err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
}
wqe += seglen;
size += seglen / 16;
+ /* to start tunnel header on a cache-line boundary */
+ add_zero_len_inline(wqe);
+ wqe += 16;
+ size++;
+ build_tunnel_header(wr, wqe, &seglen);
+ wqe += seglen;
+ size += seglen / 16;
+ break;
+ case MLX4_IB_QPT_PROXY_SMI:
+ /* don't allow QP0 sends on guests */
+ err = -ENOSYS;
+ *bad_wr = wr;
+ goto out;
+ case MLX4_IB_QPT_PROXY_GSI:
+ /* If we are tunneling special qps, this is a UD qp.
+ * In this case we first add a UD segment targeting
+ * the tunnel qp, and then add a header with address
+ * information */
+ set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type);
+ wqe += sizeof (struct mlx4_wqe_datagram_seg);
+ size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
+ build_tunnel_header(wr, wqe, &seglen);
+ wqe += seglen;
+ size += seglen / 16;
break;
- case IB_QPT_RAW_ETY:
- err = build_raw_ety_header(to_msqp(qp), wr, ctrl,
- &seglen);
+ case MLX4_IB_QPT_SMI:
+ case MLX4_IB_QPT_GSI:
+ err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -2108,13 +3184,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
* cacheline. This avoids issues with WQE
* prefetching.
*/
-
dseg = wqe;
dseg += wr->num_sge - 1;
/* Add one more inline data segment for ICRC for MLX sends */
- if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI ||
- qp->ibqp.qp_type == IB_QPT_GSI)) {
+ if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI ||
+ qp->mlx4_ib_qp_type &
+ (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) {
set_mlx_icrc_seg(dseg + 1);
size += sizeof (struct mlx4_wqe_data_seg) / 16;
}
@@ -2127,7 +3204,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sz;
}
} else {
- size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);
+ size += wr->num_sge *
+ (sizeof(struct mlx4_wqe_data_seg) / 16);
for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
set_data_seg(dseg, wr->sg_list + i);
}
@@ -2139,15 +3217,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
*/
wmb();
*lso_wqe = lso_hdr_sz;
-
ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
MLX4_WQE_CTRL_FENCE : 0) | size;
- if (vlan) {
- ctrl->ins_vlan = 1 << 6;
- ctrl->vlan_tag = vlan;
- }
-
/*
* Make sure descriptor is fully written before
* setting ownership bit (because HW can start
@@ -2155,14 +3227,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
*/
wmb();
- if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
+ if (wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
+ *bad_wr = wr;
err = -EINVAL;
goto out;
}
ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
- (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) |
- (blh ? cpu_to_be32(1 << 6) : 0);
+ (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
stamp = ind + qp->sq_spare_wqes;
ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
@@ -2185,6 +3257,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
out:
if (nreq == 1 && inl && size > 1 && size < qp->bf.buf_size / 16) {
ctrl->owner_opcode |= htonl((qp->sq_next_wqe & 0xffff) << 8);
+ /* We set above doorbell_qpn bits to 0 as part of vlan
+ * tag initialization, so |= should be correct.
+ */
*(u32 *) (&ctrl->vlan_tag) |= qp->doorbell_qpn;
/*
* Make sure that descriptor is written to memory
@@ -2239,23 +3314,22 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
int err = 0;
int nreq;
int ind;
+ int max_gs;
int i;
+ max_gs = qp->rq.max_gs;
spin_lock_irqsave(&qp->rq.lock, flags);
ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
- mlx4_ib_dbg("QP 0x%x: WQE overflow", ibqp->qp_num);
err = -ENOMEM;
*bad_wr = wr;
goto out;
}
if (unlikely(wr->num_sge > qp->rq.max_gs)) {
- mlx4_ib_dbg("QP 0x%x: too many sg entries (%d)",
- ibqp->qp_num, wr->num_sge);
err = -EINVAL;
*bad_wr = wr;
goto out;
@@ -2263,10 +3337,25 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
scat = get_recv_wqe(qp, ind);
+ if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
+ ib_dma_sync_single_for_device(ibqp->device,
+ qp->sqp_proxy_rcv[ind].map,
+ sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ DMA_FROM_DEVICE);
+ scat->byte_count =
+ cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr));
+ /* use dma lkey from upper layer entry */
+ scat->lkey = cpu_to_be32(wr->sg_list->lkey);
+ scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map);
+ scat++;
+ max_gs--;
+ }
+
for (i = 0; i < wr->num_sge; ++i)
__set_data_seg(scat + i, wr->sg_list + i);
- if (i < qp->rq.max_gs) {
+ if (i < max_gs) {
scat[i].byte_count = 0;
scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
scat[i].addr = 0;
@@ -2334,10 +3423,10 @@ static int to_ib_qp_access_flags(int mlx4_flags)
return ib_flags;
}
-static void to_ib_ah_attr(struct mlx4_ib_dev *ib_dev, struct ib_ah_attr *ib_ah_attr,
- struct mlx4_qp_path *path)
+static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
+ struct mlx4_qp_path *path)
{
- struct mlx4_dev *dev = ib_dev->dev;
+ struct mlx4_dev *dev = ibdev->dev;
int is_eth;
memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
@@ -2346,7 +3435,7 @@ static void to_ib_ah_attr(struct mlx4_ib_dev *ib_dev, struct ib_ah_attr *ib_ah_a
if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
return;
- is_eth = rdma_port_get_link_layer(&ib_dev->ib_dev, ib_ah_attr->port_num) ==
+ is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) ==
IB_LINK_LAYER_ETHERNET;
if (is_eth)
ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) |
@@ -2355,7 +3444,6 @@ static void to_ib_ah_attr(struct mlx4_ib_dev *ib_dev, struct ib_ah_attr *ib_ah_a
ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;
ib_ah_attr->dlid = be16_to_cpu(path->rlid);
-
ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
@@ -2407,8 +3495,7 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
qp_attr->qp_access_flags =
to_ib_qp_access_flags(be32_to_cpu(context.params2));
- if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC ||
- qp->ibqp.qp_type == IB_QPT_XRC) {
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
@@ -2463,308 +3550,21 @@ done:
if (qp->flags & MLX4_IB_QP_LSO)
qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
-out:
- mutex_unlock(&qp->mutex);
- return err;
-}
-
-int mlx4_ib_create_xrc_rcv_qp(struct ib_qp_init_attr *init_attr,
- u32 *qp_num)
-{
- struct mlx4_ib_dev *dev = to_mdev(init_attr->xrc_domain->device);
- struct mlx4_ib_xrcd *xrcd = to_mxrcd(init_attr->xrc_domain);
- struct mlx4_ib_qp *qp;
- struct ib_qp *ibqp;
- struct mlx4_ib_xrc_reg_entry *ctx_entry;
- unsigned long flags;
- int err;
-
- if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
- return -ENOSYS;
-
- if (init_attr->qp_type != IB_QPT_XRC)
- return -EINVAL;
-
- ctx_entry = kmalloc(sizeof *ctx_entry, GFP_KERNEL);
- if (!ctx_entry)
- return -ENOMEM;
-
- qp = kzalloc(sizeof *qp, GFP_KERNEL);
- if (!qp) {
- kfree(ctx_entry);
- return -ENOMEM;
- }
- mutex_lock(&dev->xrc_reg_mutex);
- qp->flags = MLX4_IB_XRC_RCV;
- qp->xrcdn = to_mxrcd(init_attr->xrc_domain)->xrcdn;
- INIT_LIST_HEAD(&qp->xrc_reg_list);
- err = create_qp_common(dev, xrcd->pd, init_attr, NULL, 0, qp);
- if (err) {
- mutex_unlock(&dev->xrc_reg_mutex);
- kfree(ctx_entry);
- kfree(qp);
- return err;
- }
-
- ibqp = &qp->ibqp;
- /* set the ibpq attributes which will be used by the mlx4 module */
- ibqp->qp_num = qp->mqp.qpn;
- ibqp->device = init_attr->xrc_domain->device;
- ibqp->pd = xrcd->pd;
- ibqp->send_cq = ibqp->recv_cq = xrcd->cq;
- ibqp->event_handler = init_attr->event_handler;
- ibqp->qp_context = init_attr->qp_context;
- ibqp->qp_type = init_attr->qp_type;
- ibqp->xrcd = init_attr->xrc_domain;
-
- mutex_lock(&qp->mutex);
- ctx_entry->context = init_attr->qp_context;
- spin_lock_irqsave(&qp->xrc_reg_list_lock, flags);
- list_add_tail(&ctx_entry->list, &qp->xrc_reg_list);
- spin_unlock_irqrestore(&qp->xrc_reg_list_lock, flags);
- mutex_unlock(&qp->mutex);
- mutex_unlock(&dev->xrc_reg_mutex);
- *qp_num = qp->mqp.qpn;
- return 0;
-}
-
-int mlx4_ib_modify_xrc_rcv_qp(struct ib_xrcd *ibxrcd, u32 qp_num,
- struct ib_qp_attr *attr, int attr_mask)
-{
- struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
- struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
- struct mlx4_qp *mqp;
- struct mlx4_ib_qp *mibqp;
- int err = -EINVAL;
-
- if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
- return -ENOSYS;
-
- mutex_lock(&dev->xrc_reg_mutex);
- mqp = mlx4_qp_lookup_lock(dev->dev, qp_num);
- if (unlikely(!mqp)) {
- printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: "
- "unknown QPN %06x\n", qp_num);
- goto err_out;
- }
-
- mibqp = to_mibqp(mqp);
-
- if (!(mibqp->flags & MLX4_IB_XRC_RCV) || !mibqp->ibqp.xrcd ||
- xrcd->xrcdn != to_mxrcd(mibqp->ibqp.xrcd)->xrcdn)
- goto err_out;
-
- err = mlx4_ib_modify_qp(&mibqp->ibqp, attr, attr_mask, NULL);
- mutex_unlock(&dev->xrc_reg_mutex);
- return err;
-
-err_out:
- mutex_unlock(&dev->xrc_reg_mutex);
- return err;
-}
-
-int mlx4_ib_query_xrc_rcv_qp(struct ib_xrcd *ibxrcd, u32 qp_num,
- struct ib_qp_attr *qp_attr, int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr)
-{
- struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
- struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
- struct mlx4_ib_qp *qp;
- struct mlx4_qp *mqp;
- struct mlx4_qp_context context;
- int mlx4_state;
- int err = -EINVAL;
-
- if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
- return -ENOSYS;
-
- mutex_lock(&dev->xrc_reg_mutex);
- mqp = mlx4_qp_lookup_lock(dev->dev, qp_num);
- if (unlikely(!mqp)) {
- printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: "
- "unknown QPN %06x\n", qp_num);
- goto err_out;
- }
-
- qp = to_mibqp(mqp);
- if (!(qp->flags & MLX4_IB_XRC_RCV) || !(qp->ibqp.xrcd) ||
- xrcd->xrcdn != to_mxrcd(qp->ibqp.xrcd)->xrcdn)
- goto err_out;
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ qp_init_attr->create_flags |= IB_QP_CREATE_NETIF_QP;
- if (qp->state == IB_QPS_RESET) {
- qp_attr->qp_state = IB_QPS_RESET;
- goto done;
- }
-
- err = mlx4_qp_query(dev->dev, mqp, &context);
- if (err)
- goto err_out;
-
- mlx4_state = be32_to_cpu(context.flags) >> 28;
-
- qp_attr->qp_state = to_ib_qp_state(mlx4_state);
- qp_attr->path_mtu = context.mtu_msgmax >> 5;
- qp_attr->path_mig_state =
- to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);
- qp_attr->qkey = be32_to_cpu(context.qkey);
- qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;
- qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff;
- qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff;
- qp_attr->qp_access_flags =
- to_ib_qp_access_flags(be32_to_cpu(context.params2));
-
- if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC ||
- qp->ibqp.qp_type == IB_QPT_XRC) {
- to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
- to_ib_ah_attr(dev, &qp_attr->alt_ah_attr,
- &context.alt_path);
- qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
- qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
- }
+ qp_init_attr->sq_sig_type =
+ qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
+ IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
- qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
- if (qp_attr->qp_state == IB_QPS_INIT)
- qp_attr->port_num = qp->port;
+ qp_init_attr->qpg_type = ibqp->qpg_type;
+ if (ibqp->qpg_type == IB_QPG_PARENT)
+ qp_init_attr->cap.qpg_tss_mask_sz = qp->qpg_data->qpg_tss_mask_sz;
else
- qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
-
- /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
- qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
-
- qp_attr->max_rd_atomic =
- 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7);
-
- qp_attr->max_dest_rd_atomic =
- 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7);
- qp_attr->min_rnr_timer =
- (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f;
- qp_attr->timeout = context.pri_path.ackto >> 3;
- qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7;
- qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7;
- qp_attr->alt_timeout = context.alt_path.ackto >> 3;
-
-done:
- qp_attr->cur_qp_state = qp_attr->qp_state;
- qp_attr->cap.max_recv_wr = 0;
- qp_attr->cap.max_recv_sge = 0;
- qp_attr->cap.max_send_wr = 0;
- qp_attr->cap.max_send_sge = 0;
- qp_attr->cap.max_inline_data = 0;
- qp_init_attr->cap = qp_attr->cap;
+ qp_init_attr->cap.qpg_tss_mask_sz = 0;
- mutex_unlock(&dev->xrc_reg_mutex);
- return 0;
-
-err_out:
- mutex_unlock(&dev->xrc_reg_mutex);
- return err;
-}
-
-int mlx4_ib_reg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num)
-{
-
- struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd);
-
- struct mlx4_qp *mqp;
- struct mlx4_ib_qp *mibqp;
- struct mlx4_ib_xrc_reg_entry *ctx_entry, *tmp;
- unsigned long flags;
- int err = -EINVAL;
-
- mutex_lock(&to_mdev(xrcd->device)->xrc_reg_mutex);
- mqp = mlx4_qp_lookup_lock(to_mdev(xrcd->device)->dev, qp_num);
- if (unlikely(!mqp)) {
- printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: "
- "unknown QPN %06x\n", qp_num);
- goto err_out;
- }
-
- mibqp = to_mibqp(mqp);
-
- if (!(mibqp->flags & MLX4_IB_XRC_RCV) || !(mibqp->ibqp.xrcd) ||
- mxrcd->xrcdn != to_mxrcd(mibqp->ibqp.xrcd)->xrcdn)
- goto err_out;
-
- ctx_entry = kmalloc(sizeof *ctx_entry, GFP_KERNEL);
- if (!ctx_entry) {
- err = -ENOMEM;
- goto err_out;
- }
-
- mutex_lock(&mibqp->mutex);
- list_for_each_entry(tmp, &mibqp->xrc_reg_list, list)
- if (tmp->context == context) {
- mutex_unlock(&mibqp->mutex);
- kfree(ctx_entry);
- mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
- return 0;
- }
-
- ctx_entry->context = context;
- spin_lock_irqsave(&mibqp->xrc_reg_list_lock, flags);
- list_add_tail(&ctx_entry->list, &mibqp->xrc_reg_list);
- spin_unlock_irqrestore(&mibqp->xrc_reg_list_lock, flags);
- mutex_unlock(&mibqp->mutex);
- mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
- return 0;
-
-err_out:
- mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
- return err;
-}
-
-int mlx4_ib_unreg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num)
-{
-
- struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd);
-
- struct mlx4_qp *mqp;
- struct mlx4_ib_qp *mibqp;
- struct mlx4_ib_xrc_reg_entry *ctx_entry, *tmp;
- unsigned long flags;
- int found = 0;
- int err = -EINVAL;
-
- mutex_lock(&to_mdev(xrcd->device)->xrc_reg_mutex);
- mqp = mlx4_qp_lookup_lock(to_mdev(xrcd->device)->dev, qp_num);
- if (unlikely(!mqp)) {
- printk(KERN_WARNING "mlx4_ib_unreg_xrc_rcv_qp: "
- "unknown QPN %06x\n", qp_num);
- goto err_out;
- }
-
- mibqp = to_mibqp(mqp);
-
- if (!(mibqp->flags & MLX4_IB_XRC_RCV) ||
- mxrcd->xrcdn != (mibqp->xrcdn & 0xffff))
- goto err_out;
-
- mutex_lock(&mibqp->mutex);
- spin_lock_irqsave(&mibqp->xrc_reg_list_lock, flags);
- list_for_each_entry_safe(ctx_entry, tmp, &mibqp->xrc_reg_list, list)
- if (ctx_entry->context == context) {
- found = 1;
- list_del(&ctx_entry->list);
- spin_unlock_irqrestore(&mibqp->xrc_reg_list_lock, flags);
- kfree(ctx_entry);
- break;
- }
-
- if (!found)
- spin_unlock_irqrestore(&mibqp->xrc_reg_list_lock, flags);
- mutex_unlock(&mibqp->mutex);
- if (!found)
- goto err_out;
-
- /* destroy the QP if the registration list is empty */
- if (list_empty(&mibqp->xrc_reg_list))
- mlx4_ib_destroy_qp(&mibqp->ibqp);
-
- mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
- return 0;
-
-err_out:
- mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex);
+out:
+ mutex_unlock(&qp->mutex);
return err;
}
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/srq.c b/sys/ofed/drivers/infiniband/hw/mlx4/srq.c
index 90918c7..60c5fb0 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/srq.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/srq.c
@@ -33,6 +33,7 @@
#include <linux/mlx4/qp.h>
#include <linux/mlx4/srq.h>
+#include <linux/slab.h>
#include "mlx4_ib.h"
#include "user.h"
@@ -58,7 +59,7 @@ static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
event.event = IB_EVENT_SRQ_ERR;
break;
default:
- printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
+ pr_warn("Unexpected event type %d "
"on SRQ %06x\n", type, srq->srqn);
return;
}
@@ -67,17 +68,16 @@ static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
}
}
-struct ib_srq *mlx4_ib_create_xrc_srq(struct ib_pd *pd,
- struct ib_cq *xrc_cq,
- struct ib_xrcd *xrcd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata)
+struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_srq *srq;
struct mlx4_wqe_srq_next_seg *next;
- u32 cqn;
- u16 xrcdn;
+ struct mlx4_wqe_data_seg *scatter;
+ u32 cqn;
+ u16 xrcdn;
int desc_size;
int buf_size;
int err;
@@ -85,14 +85,10 @@ struct ib_srq *mlx4_ib_create_xrc_srq(struct ib_pd *pd,
/* Sanity check SRQ size before proceeding */
if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes ||
- init_attr->attr.max_sge > dev->dev->caps.max_srq_sge) {
- mlx4_ib_dbg("a size param is out of range. "
- "max_wr = 0x%x, max_sge = 0x%x",
- init_attr->attr.max_wr, init_attr->attr.max_sge);
+ init_attr->attr.max_sge > dev->dev->caps.max_srq_sge)
return ERR_PTR(-EINVAL);
- }
- srq = kzalloc(sizeof *srq, GFP_KERNEL);
+ srq = kmalloc(sizeof *srq, GFP_KERNEL);
if (!srq)
return ERR_PTR(-ENOMEM);
@@ -138,8 +134,6 @@ struct ib_srq *mlx4_ib_create_xrc_srq(struct ib_pd *pd,
if (err)
goto err_mtt;
} else {
- struct mlx4_wqe_data_seg *scatter;
-
err = mlx4_db_alloc(dev->dev, &srq->db, 0);
if (err)
goto err_srq;
@@ -182,24 +176,24 @@ struct ib_srq *mlx4_ib_create_xrc_srq(struct ib_pd *pd,
}
}
- cqn = xrc_cq ? (u32) (to_mcq(xrc_cq)->mcq.cqn) : 0;
- xrcdn = xrcd ? (u16) (to_mxrcd(xrcd)->xrcdn) :
+ cqn = (init_attr->srq_type == IB_SRQT_XRC) ?
+ to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0;
+ xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ?
+ to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn :
(u16) dev->dev->caps.reserved_xrcds;
-
err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt,
srq->db.dma, &srq->msrq);
if (err)
goto err_wrid;
srq->msrq.event = mlx4_ib_srq_event;
+ srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
- if (pd->uobject) {
+ if (pd->uobject)
if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
err = -EFAULT;
goto err_wrid;
}
- } else
- srq->ibsrq.xrc_srq_num = srq->msrq.srqn;
init_attr->attr.max_wr = srq->msrq.max - 1;
@@ -238,16 +232,12 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
int ret;
/* We don't support resizing SRQs (yet?) */
- if (attr_mask & IB_SRQ_MAX_WR) {
- mlx4_ib_dbg("resize not yet supported");
+ if (attr_mask & IB_SRQ_MAX_WR)
return -EINVAL;
- }
if (attr_mask & IB_SRQ_LIMIT) {
- if (attr->srq_limit >= srq->msrq.max){
- mlx4_ib_dbg("limit (0x%x) too high", attr->srq_limit);
+ if (attr->srq_limit >= srq->msrq.max)
return -EINVAL;
- }
mutex_lock(&srq->mutex);
ret = mlx4_srq_arm(dev->dev, &srq->msrq, attr->srq_limit);
@@ -260,13 +250,6 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
return 0;
}
-struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata)
-{
- return mlx4_ib_create_xrc_srq(pd, NULL, NULL, init_attr, udata);
-}
-
int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
{
struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
@@ -289,18 +272,6 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq)
{
struct mlx4_ib_dev *dev = to_mdev(srq->device);
struct mlx4_ib_srq *msrq = to_msrq(srq);
- struct mlx4_ib_cq *cq;
-
- mlx4_srq_invalidate(dev->dev, &msrq->msrq);
-
- if (srq->xrc_cq && !srq->uobject) {
- cq = to_mcq(srq->xrc_cq);
- spin_lock_irq(&cq->lock);
- __mlx4_ib_cq_clean(cq, -1, msrq);
- mlx4_srq_remove(dev->dev, &msrq->msrq);
- spin_unlock_irq(&cq->lock);
- } else
- mlx4_srq_remove(dev->dev, &msrq->msrq);
mlx4_srq_free(dev->dev, &msrq->msrq);
mlx4_mtt_cleanup(dev->dev, &msrq->mtt);
@@ -349,16 +320,12 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
- mlx4_ib_dbg("srq num 0x%x: num s/g entries too large (%d)",
- srq->msrq.srqn, wr->num_sge);
err = -EINVAL;
*bad_wr = wr;
break;
}
if (unlikely(srq->head == srq->tail)) {
- mlx4_ib_dbg("srq num 0x%x: No entries available to post.",
- srq->msrq.srqn);
err = -ENOMEM;
*bad_wr = wr;
break;
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c b/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c
new file mode 100644
index 0000000..f19525e
--- /dev/null
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/sysfs.c
@@ -0,0 +1,800 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*#include "core_priv.h"*/
+#include "mlx4_ib.h"
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include <rdma/ib_mad.h>
+/*show_admin_alias_guid returns the administratively assigned value of that GUID.
+ * Values returned in buf parameter string:
+ * 0 - requests opensm to assign a value.
+ * ffffffffffffffff - delete this entry.
+ * other - value assigned by administrator.
+ */
+static ssize_t show_admin_alias_guid(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int record_num;/*0-15*/
+ int guid_index_in_rec; /*0 - 7*/
+ struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
+ container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
+ struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
+ struct mlx4_ib_dev *mdev = port->dev;
+
+ record_num = mlx4_ib_iov_dentry->entry_num / 8 ;
+ guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ;
+
+ return sprintf(buf, "%llx\n", (long long)
+ be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
+ ports_guid[port->num - 1].
+ all_rec_per_port[record_num].
+ all_recs[8 * guid_index_in_rec]));
+}
+
+/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
+ * Values in buf parameter string:
+ * 0 - requests opensm to assign a value.
+ * 0xffffffffffffffff - delete this entry.
+ * other - guid value assigned by the administrator.
+ */
+static ssize_t store_admin_alias_guid(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int record_num;/*0-15*/
+ int guid_index_in_rec; /*0 - 7*/
+ struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
+ container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
+ struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
+ struct mlx4_ib_dev *mdev = port->dev;
+ u64 sysadmin_ag_val;
+
+ record_num = mlx4_ib_iov_dentry->entry_num / 8;
+ guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
+ if (0 == record_num && 0 == guid_index_in_rec) {
+ pr_err("GUID 0 block 0 is RO\n");
+ return count;
+ }
+ sscanf(buf, "%llx", &sysadmin_ag_val);
+ *(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
+ all_rec_per_port[record_num].
+ all_recs[GUID_REC_SIZE * guid_index_in_rec] =
+ cpu_to_be64(sysadmin_ag_val);
+
+ /* Change the state to be pending for update */
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
+ = MLX4_GUID_INFO_STATUS_IDLE ;
+
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
+ = MLX4_GUID_INFO_RECORD_SET;
+
+ switch (sysadmin_ag_val) {
+ case MLX4_GUID_FOR_DELETE_VAL:
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
+ = MLX4_GUID_INFO_RECORD_DELETE;
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
+ = MLX4_GUID_SYSADMIN_ASSIGN;
+ break;
+ /* The sysadmin requests the SM to re-assign */
+ case MLX4_NOT_SET_GUID:
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
+ = MLX4_GUID_DRIVER_ASSIGN;
+ break;
+ /* The sysadmin requests a specific value.*/
+ default:
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
+ = MLX4_GUID_SYSADMIN_ASSIGN;
+ break;
+ }
+
+ /* set the record index */
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
+ = mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
+
+ mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
+
+ return count;
+}
+
+static ssize_t show_port_gid(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
+ container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
+ struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
+ struct mlx4_ib_dev *mdev = port->dev;
+ union ib_gid gid;
+ ssize_t ret;
+
+ ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num,
+ mlx4_ib_iov_dentry->entry_num, &gid, 1);
+ if (ret)
+ return ret;
+ ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(((__be16 *) gid.raw)[0]),
+ be16_to_cpu(((__be16 *) gid.raw)[1]),
+ be16_to_cpu(((__be16 *) gid.raw)[2]),
+ be16_to_cpu(((__be16 *) gid.raw)[3]),
+ be16_to_cpu(((__be16 *) gid.raw)[4]),
+ be16_to_cpu(((__be16 *) gid.raw)[5]),
+ be16_to_cpu(((__be16 *) gid.raw)[6]),
+ be16_to_cpu(((__be16 *) gid.raw)[7]));
+ return ret;
+}
+
+static ssize_t show_phys_port_pkey(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
+ container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
+ struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
+ struct mlx4_ib_dev *mdev = port->dev;
+ u16 pkey;
+ ssize_t ret;
+
+ ret = __mlx4_ib_query_pkey(&mdev->ib_dev, port->num,
+ mlx4_ib_iov_dentry->entry_num, &pkey, 1);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "0x%04x\n", pkey);
+}
+
+#define DENTRY_REMOVE(_dentry) \
+do { \
+ sysfs_remove_file((_dentry)->kobj, &(_dentry)->dentry.attr); \
+} while (0);
+
+static int create_sysfs_entry(void *_ctx, struct mlx4_ib_iov_sysfs_attr *_dentry,
+ char *_name, struct kobject *_kobj,
+ ssize_t (*show)(struct device *dev,
+ struct device_attribute *attr,
+ char *buf),
+ ssize_t (*store)(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+ )
+{
+ int ret = 0;
+ struct mlx4_ib_iov_sysfs_attr *vdentry = _dentry;
+
+ vdentry->ctx = _ctx;
+ vdentry->dentry.show = show;
+ vdentry->dentry.store = store;
+ sysfs_attr_init(&vdentry->dentry.attr);
+ vdentry->dentry.attr.name = vdentry->name;
+ vdentry->dentry.attr.mode = 0;
+ vdentry->kobj = _kobj;
+ snprintf(vdentry->name, 15, "%s", _name);
+
+ if (vdentry->dentry.store)
+ vdentry->dentry.attr.mode |= S_IWUSR;
+
+ if (vdentry->dentry.show)
+ vdentry->dentry.attr.mode |= S_IRUGO;
+
+ ret = sysfs_create_file(vdentry->kobj, &vdentry->dentry.attr);
+ if (ret) {
+ pr_err("failed to create %s\n", vdentry->dentry.attr.name);
+ vdentry->ctx = NULL;
+ return ret;
+ }
+
+ return ret;
+}
+
+int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
+ struct attribute *attr)
+{
+ struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1];
+ int ret;
+
+ ret = sysfs_create_file(port->mcgs_parent, attr);
+ if (ret)
+ pr_err("failed to create %s\n", attr->name);
+
+ return ret;
+}
+
+void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
+ struct attribute *attr)
+{
+ struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1];
+
+ sysfs_remove_file(port->mcgs_parent, attr);
+}
+
+static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
+{
+ int i;
+ char buff[10];
+ struct mlx4_ib_iov_port *port = NULL;
+ int ret = 0 ;
+ struct ib_port_attr attr;
+
+ /* get the physical gid and pkey table sizes.*/
+ ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1);
+ if (ret)
+ goto err;
+
+ port = &device->iov_ports[port_num - 1];
+ port->dev = device;
+ port->num = port_num;
+ /* Directory structure:
+ * iov -
+ * port num -
+ * admin_guids
+ * gids (operational)
+ * mcg_table
+ */
+ port->dentr_ar = kzalloc(sizeof (struct mlx4_ib_iov_sysfs_attr_ar),
+ GFP_KERNEL);
+ if (!port->dentr_ar) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ sprintf(buff, "%d", port_num);
+ port->cur_port = kobject_create_and_add(buff,
+ kobject_get(device->ports_parent));
+ if (!port->cur_port) {
+ ret = -ENOMEM;
+ goto kobj_create_err;
+ }
+ /* admin GUIDs */
+ port->admin_alias_parent = kobject_create_and_add("admin_guids",
+ kobject_get(port->cur_port));
+ if (!port->admin_alias_parent) {
+ ret = -ENOMEM;
+ goto err_admin_guids;
+ }
+ for (i = 0 ; i < attr.gid_tbl_len; i++) {
+ sprintf(buff, "%d", i);
+ port->dentr_ar->dentries[i].entry_num = i;
+ ret = create_sysfs_entry(port, &port->dentr_ar->dentries[i],
+ buff, port->admin_alias_parent,
+ show_admin_alias_guid, store_admin_alias_guid);
+ if (ret)
+ goto err_admin_alias_parent;
+ }
+
+ /* gids subdirectory (operational gids) */
+ port->gids_parent = kobject_create_and_add("gids",
+ kobject_get(port->cur_port));
+ if (!port->gids_parent) {
+ ret = -ENOMEM;
+ goto err_gids;
+ }
+
+ for (i = 0 ; i < attr.gid_tbl_len; i++) {
+ sprintf(buff, "%d", i);
+ port->dentr_ar->dentries[attr.gid_tbl_len + i].entry_num = i;
+ ret = create_sysfs_entry(port,
+ &port->dentr_ar->dentries[attr.gid_tbl_len + i],
+ buff,
+ port->gids_parent, show_port_gid, NULL);
+ if (ret)
+ goto err_gids_parent;
+ }
+
+ /* physical port pkey table */
+ port->pkeys_parent =
+ kobject_create_and_add("pkeys", kobject_get(port->cur_port));
+ if (!port->pkeys_parent) {
+ ret = -ENOMEM;
+ goto err_pkeys;
+ }
+
+ for (i = 0 ; i < attr.pkey_tbl_len; i++) {
+ sprintf(buff, "%d", i);
+ port->dentr_ar->dentries[2 * attr.gid_tbl_len + i].entry_num = i;
+ ret = create_sysfs_entry(port,
+ &port->dentr_ar->dentries[2 * attr.gid_tbl_len + i],
+ buff, port->pkeys_parent,
+ show_phys_port_pkey, NULL);
+ if (ret)
+ goto err_pkeys_parent;
+ }
+
+ /* MCGs table */
+ port->mcgs_parent =
+ kobject_create_and_add("mcgs", kobject_get(port->cur_port));
+ if (!port->mcgs_parent) {
+ ret = -ENOMEM;
+ goto err_mcgs;
+ }
+ return 0;
+
+err_mcgs:
+ kobject_put(port->cur_port);
+
+err_pkeys_parent:
+ kobject_put(port->pkeys_parent);
+
+err_pkeys:
+ kobject_put(port->cur_port);
+
+err_gids_parent:
+ kobject_put(port->gids_parent);
+
+err_gids:
+ kobject_put(port->cur_port);
+
+err_admin_alias_parent:
+ kobject_put(port->admin_alias_parent);
+
+err_admin_guids:
+ kobject_put(port->cur_port);
+ kobject_put(port->cur_port); /* once more for create_and_add buff */
+
+kobj_create_err:
+ kobject_put(device->ports_parent);
+ kfree(port->dentr_ar);
+
+err:
+ pr_err("add_port_entries FAILED: for port:%d, error: %d\n",
+ port_num, ret);
+ return ret;
+}
+
+static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
+{
+ char base_name[9];
+
+ /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
+ strlcpy(name, pci_name(dev->dev->pdev), max);
+ strncpy(base_name, name, 8); /*till xxxx:yy:*/
+ base_name[8] = '\0';
+ /* with no ARI only 3 last bits are used so when the fn is higher than 8
+ * need to add it to the dev num, so count in the last number will be
+ * modulo 8 */
+ sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8));
+}
+
+struct mlx4_port {
+ struct kobject kobj;
+ struct mlx4_ib_dev *dev;
+ struct attribute_group pkey_group;
+ struct attribute_group gid_group;
+ u8 port_num;
+ int slave;
+};
+
+
+static void mlx4_port_release(struct kobject *kobj)
+{
+ struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
+ struct attribute *a;
+ int i;
+
+ for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
+ kfree(a);
+ kfree(p->pkey_group.attrs);
+ for (i = 0; (a = p->gid_group.attrs[i]); ++i)
+ kfree(a);
+ kfree(p->gid_group.attrs);
+ kfree(p);
+}
+
+struct port_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct mlx4_port *, struct port_attribute *, char *buf);
+ ssize_t (*store)(struct mlx4_port *, struct port_attribute *,
+ const char *buf, size_t count);
+};
+
+static ssize_t port_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct port_attribute *port_attr =
+ container_of(attr, struct port_attribute, attr);
+ struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
+
+ if (!port_attr->show)
+ return -EIO;
+ return port_attr->show(p, port_attr, buf);
+}
+
+static ssize_t port_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t size)
+{
+ struct port_attribute *port_attr =
+ container_of(attr, struct port_attribute, attr);
+ struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
+
+ if (!port_attr->store)
+ return -EIO;
+ return port_attr->store(p, port_attr, buf, size);
+}
+
+static const struct sysfs_ops port_sysfs_ops = {
+ .show = port_attr_show,
+ .store = port_attr_store,
+};
+
+static struct kobj_type port_type = {
+ .release = mlx4_port_release,
+ .sysfs_ops = &port_sysfs_ops,
+};
+
+struct port_table_attribute {
+ struct port_attribute attr;
+ char name[8];
+ int index;
+};
+
+static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
+ char *buf)
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ ssize_t ret = -ENODEV;
+
+ if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >=
+ (p->dev->dev->caps.pkey_table_len[p->port_num]))
+ ret = sprintf(buf, "none\n");
+ else
+ ret = sprintf(buf, "%d\n",
+ p->dev->pkeys.virt2phys_pkey[p->slave]
+ [p->port_num - 1][tab_attr->index]);
+ return ret;
+}
+
+static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ int idx;
+ int err;
+
+ /* do not allow remapping Dom0 virtual pkey table */
+ if (p->slave == mlx4_master_func_num(p->dev->dev))
+ return -EINVAL;
+
+ if (!strncasecmp(buf, "no", 2))
+ idx = p->dev->dev->phys_caps.pkey_phys_table_len[p->port_num] - 1;
+ else if (sscanf(buf, "%i", &idx) != 1 ||
+ idx >= p->dev->dev->caps.pkey_table_len[p->port_num] ||
+ idx < 0)
+ return -EINVAL;
+
+ p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1]
+ [tab_attr->index] = idx;
+ mlx4_sync_pkey_table(p->dev->dev, p->slave, p->port_num,
+ tab_attr->index, idx);
+ err = mlx4_gen_pkey_eqe(p->dev->dev, p->slave, p->port_num);
+ if (err) {
+ pr_err("mlx4_gen_pkey_eqe failed for slave %d,"
+ " port %d, index %d\n", p->slave, p->port_num, idx);
+ return err;
+ }
+ return count;
+}
+
+static ssize_t show_port_gid_idx(struct mlx4_port *p,
+ struct port_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", p->slave);
+}
+
+static struct attribute **
+alloc_group_attrs(ssize_t (*show)(struct mlx4_port *,
+ struct port_attribute *, char *buf),
+ ssize_t (*store)(struct mlx4_port *, struct port_attribute *,
+ const char *buf, size_t count),
+ int len)
+{
+ struct attribute **tab_attr;
+ struct port_table_attribute *element;
+ int i;
+
+ tab_attr = kcalloc(1 + len, sizeof (struct attribute *), GFP_KERNEL);
+ if (!tab_attr)
+ return NULL;
+
+ for (i = 0; i < len; i++) {
+ element = kzalloc(sizeof (struct port_table_attribute),
+ GFP_KERNEL);
+ if (!element)
+ goto err;
+ if (snprintf(element->name, sizeof (element->name),
+ "%d", i) >= sizeof (element->name)) {
+ kfree(element);
+ goto err;
+ }
+ sysfs_attr_init(&element->attr.attr);
+ element->attr.attr.name = element->name;
+ if (store) {
+ element->attr.attr.mode = S_IWUSR | S_IRUGO;
+ element->attr.store = store;
+ } else
+ element->attr.attr.mode = S_IRUGO;
+
+ element->attr.show = show;
+ element->index = i;
+ tab_attr[i] = &element->attr.attr;
+ }
+ return tab_attr;
+
+err:
+ while (--i >= 0)
+ kfree(tab_attr[i]);
+ kfree(tab_attr);
+ return NULL;
+}
+
+static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
+{
+ struct mlx4_port *p;
+ int i;
+ int ret;
+ int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port_num) ==
+ IB_LINK_LAYER_ETHERNET;
+
+ p = kzalloc(sizeof *p, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ p->dev = dev;
+ p->port_num = port_num;
+ p->slave = slave;
+
+ ret = kobject_init_and_add(&p->kobj, &port_type,
+ kobject_get(dev->dev_ports_parent[slave]),
+ "%d", port_num);
+ if (ret)
+ goto err_alloc;
+
+ p->pkey_group.name = "pkey_idx";
+ if (is_eth)
+ p->pkey_group.attrs =
+ alloc_group_attrs(show_port_pkey, NULL,
+ dev->dev->caps.pkey_table_len[port_num]);
+ else
+ p->pkey_group.attrs =
+ alloc_group_attrs(show_port_pkey, store_port_pkey,
+ dev->dev->caps.pkey_table_len[port_num]);
+ if (!p->pkey_group.attrs)
+ goto err_alloc;
+
+ ret = sysfs_create_group(&p->kobj, &p->pkey_group);
+ if (ret)
+ goto err_free_pkey;
+
+ p->gid_group.name = "gid_idx";
+ p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1);
+ if (!p->gid_group.attrs)
+ goto err_free_pkey;
+
+ ret = sysfs_create_group(&p->kobj, &p->gid_group);
+ if (ret)
+ goto err_free_gid;
+
+ list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]);
+ return 0;
+
+err_free_gid:
+ kfree(p->gid_group.attrs[0]);
+ kfree(p->gid_group.attrs);
+
+err_free_pkey:
+ for (i = 0; i < dev->dev->caps.pkey_table_len[port_num]; ++i)
+ kfree(p->pkey_group.attrs[i]);
+ kfree(p->pkey_group.attrs);
+
+err_alloc:
+ kobject_put(dev->dev_ports_parent[slave]);
+ kfree(p);
+ return ret;
+}
+
+static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave)
+{
+ char name[32];
+ int err;
+ int port;
+ struct kobject *p, *t;
+ struct mlx4_port *mport;
+
+ get_name(dev, name, slave, sizeof name);
+
+ dev->pkeys.device_parent[slave] =
+ kobject_create_and_add(name, kobject_get(dev->iov_parent));
+
+ if (!dev->pkeys.device_parent[slave]) {
+ err = -ENOMEM;
+ goto fail_dev;
+ }
+
+ INIT_LIST_HEAD(&dev->pkeys.pkey_port_list[slave]);
+
+ dev->dev_ports_parent[slave] =
+ kobject_create_and_add("ports",
+ kobject_get(dev->pkeys.device_parent[slave]));
+
+ if (!dev->dev_ports_parent[slave]) {
+ err = -ENOMEM;
+ goto err_ports;
+ }
+
+ for (port = 1; port <= dev->dev->caps.num_ports; ++port) {
+ err = add_port(dev, port, slave);
+ if (err)
+ goto err_add;
+ }
+ return 0;
+
+err_add:
+ list_for_each_entry_safe(p, t,
+ &dev->pkeys.pkey_port_list[slave],
+ entry) {
+ list_del(&p->entry);
+ mport = container_of(p, struct mlx4_port, kobj);
+ sysfs_remove_group(p, &mport->pkey_group);
+ sysfs_remove_group(p, &mport->gid_group);
+ kobject_put(p);
+ }
+ kobject_put(dev->dev_ports_parent[slave]);
+
+err_ports:
+ kobject_put(dev->pkeys.device_parent[slave]);
+ /* extra put for the device_parent create_and_add */
+ kobject_put(dev->pkeys.device_parent[slave]);
+
+fail_dev:
+ kobject_put(dev->iov_parent);
+ return err;
+}
+
+static int register_pkey_tree(struct mlx4_ib_dev *device)
+{
+ int i;
+
+ if (!mlx4_is_master(device->dev))
+ return 0;
+
+ for (i = 0; i <= device->dev->num_vfs; ++i)
+ register_one_pkey_tree(device, i);
+
+ return 0;
+}
+
+static void unregister_pkey_tree(struct mlx4_ib_dev *device)
+{
+ int slave;
+ struct kobject *p, *t;
+ struct mlx4_port *port;
+
+ if (!mlx4_is_master(device->dev))
+ return;
+
+ for (slave = device->dev->num_vfs; slave >= 0; --slave) {
+ list_for_each_entry_safe(p, t,
+ &device->pkeys.pkey_port_list[slave],
+ entry) {
+ list_del(&p->entry);
+ port = container_of(p, struct mlx4_port, kobj);
+ sysfs_remove_group(p, &port->pkey_group);
+ sysfs_remove_group(p, &port->gid_group);
+ kobject_put(p);
+ kobject_put(device->dev_ports_parent[slave]);
+ }
+ kobject_put(device->dev_ports_parent[slave]);
+ kobject_put(device->pkeys.device_parent[slave]);
+ kobject_put(device->pkeys.device_parent[slave]);
+ kobject_put(device->iov_parent);
+ }
+}
+
+int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
+{
+ int i;
+ int ret = 0;
+
+ if (!mlx4_is_master(dev->dev))
+ return 0;
+
+ dev->iov_parent =
+ kobject_create_and_add("iov",
+ kobject_get(dev->ib_dev.ports_parent->parent));
+ if (!dev->iov_parent) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dev->ports_parent =
+ kobject_create_and_add("ports",
+ kobject_get(dev->iov_parent));
+ if (!dev->iov_parent) {
+ ret = -ENOMEM;
+ goto err_ports;
+ }
+
+ for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) {
+ ret = add_port_entries(dev, i);
+ if (ret)
+ goto err_add_entries;
+ }
+
+ ret = register_pkey_tree(dev);
+ if (ret)
+ goto err_add_entries;
+ return 0;
+
+err_add_entries:
+ kobject_put(dev->ports_parent);
+
+err_ports:
+ kobject_put(dev->iov_parent);
+err:
+ kobject_put(dev->ib_dev.ports_parent->parent);
+ pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret);
+ return ret;
+}
+
+static void unregister_alias_guid_tree(struct mlx4_ib_dev *device)
+{
+ struct mlx4_ib_iov_port *p;
+ int i;
+
+ if (!mlx4_is_master(device->dev))
+ return;
+
+ for (i = 0; i < device->dev->caps.num_ports; i++) {
+ p = &device->iov_ports[i];
+ kobject_put(p->admin_alias_parent);
+ kobject_put(p->gids_parent);
+ kobject_put(p->pkeys_parent);
+ kobject_put(p->mcgs_parent);
+ kobject_put(p->cur_port);
+ kobject_put(p->cur_port);
+ kobject_put(p->cur_port);
+ kobject_put(p->cur_port);
+ kobject_put(p->cur_port);
+ kobject_put(p->dev->ports_parent);
+ kfree(p->dentr_ar);
+ }
+}
+
+void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device)
+{
+ unregister_alias_guid_tree(device);
+ unregister_pkey_tree(device);
+ kobject_put(device->ports_parent);
+ kobject_put(device->iov_parent);
+ kobject_put(device->iov_parent);
+ kobject_put(device->ib_dev.ports_parent->parent);
+}
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/user.h b/sys/ofed/drivers/infiniband/hw/mlx4/user.h
index 13beede..07e6769 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/user.h
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/user.h
@@ -40,7 +40,9 @@
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
-#define MLX4_IB_UVERBS_ABI_VERSION 3
+
+#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3
+#define MLX4_IB_UVERBS_ABI_VERSION 4
/*
* Make sure that all structs defined in this file remain laid out so
@@ -50,10 +52,18 @@
* instead.
*/
+struct mlx4_ib_alloc_ucontext_resp_v3 {
+ __u32 qp_tab_size;
+ __u16 bf_reg_size;
+ __u16 bf_regs_per_page;
+};
+
struct mlx4_ib_alloc_ucontext_resp {
+ __u32 dev_caps;
__u32 qp_tab_size;
__u16 bf_reg_size;
__u16 bf_regs_per_page;
+ __u32 cqe_size;
};
struct mlx4_ib_alloc_pd_resp {
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/wc.c b/sys/ofed/drivers/infiniband/hw/mlx4/wc.c
index 827de14..c73a61c 100644
--- a/sys/ofed/drivers/infiniband/hw/mlx4/wc.c
+++ b/sys/ofed/drivers/infiniband/hw/mlx4/wc.c
@@ -71,4 +71,3 @@ int mlx4_wc_enabled(void)
}
#endif
-
diff --git a/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.c b/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.c
index 81e2838..f9d1872 100644
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1808,7 +1808,7 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
case IB_QPT_RAW_IPV6:
op_mod = 2;
break;
- case IB_QPT_RAW_ETY:
+ case IB_QPT_RAW_ETHERTYPE:
op_mod = 3;
break;
default:
diff --git a/sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c b/sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c
index 5401364..10f7fd3 100644
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1325,7 +1325,7 @@ static void __init mthca_validate_profile(void)
if (log_mtts_per_seg == 0)
log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8);
if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) {
- printk(KERN_WARNING PFX "bad log_mtts_per_seg (%d). Using default - %ld\n",
+ printk(KERN_WARNING PFX "bad log_mtts_per_seg (%d). Using default - %d\n",
log_mtts_per_seg, ilog2(MTHCA_MTT_SEG_SIZE / 8));
log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8);
}
diff --git a/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c b/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c
index 783da4b..ab139bf 100644
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -448,6 +448,7 @@ static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int pag
page * MTHCA_ICM_PAGE_SIZE;
}
+
#include <vm/vm_map.h>
#include <vm/vm_pageout.h>
#include <vm/pmap.h>
diff --git a/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c b/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
index e547739..eaec3e6 100644
--- a/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/sys/ofed/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1006,7 +1006,7 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
}
static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *udata)
+ u64 virt, int acc, struct ib_udata *udata, int mr_id)
{
struct mthca_dev *dev = to_mdev(pd->device);
struct ib_umem_chunk *chunk;
@@ -1402,7 +1402,7 @@ int mthca_register_device(struct mthca_dev *dev)
mutex_init(&dev->cap_mask_mutex);
- ret = ib_register_device(&dev->ib_dev);
+ ret = ib_register_device(&dev->ib_dev, NULL);
if (ret)
return ret;
diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
index 5740eb0..768833d 100644
--- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -109,7 +109,8 @@ enum {
IPOIB_ENCAP_LEN = 4,
IPOIB_HEADER_LEN = IPOIB_ENCAP_LEN + INFINIBAND_ALEN,
IPOIB_UD_MAX_MTU = 4 * 1024,
- IPOIB_UD_RX_SG = (IPOIB_UD_MAX_MTU / MJUMPAGESIZE),
+// IPOIB_UD_RX_SG = (IPOIB_UD_MAX_MTU / MJUMPAGESIZE),
+ IPOIB_UD_RX_SG = 2,
IPOIB_UD_TX_SG = (IPOIB_UD_MAX_MTU / MCLBYTES) + 2,
IPOIB_CM_MAX_MTU = (64 * 1024),
IPOIB_CM_TX_SG = (IPOIB_CM_MAX_MTU / MCLBYTES) + 2,
diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 9081e13..bae1740 100644
--- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1539,3 +1539,20 @@ ipoib_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
module_init(ipoib_init_module);
module_exit(ipoib_cleanup_module);
+
+#undef MODULE_VERSION
+#include <sys/module.h>
+static int
+ipoib_evhand(module_t mod, int event, void *arg)
+{
+ return (0);
+}
+
+static moduledata_t ipoib_mod = {
+ .name = "ipoib",
+ .evhand = ipoib_evhand,
+};
+
+DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_SMP, SI_ORDER_ANY);
+MODULE_DEPEND(ipoib, ibcore, 1, 1, 1);
+
diff --git a/sys/ofed/drivers/net/mlx4/Makefile b/sys/ofed/drivers/net/mlx4/Makefile
index b9d2e7e..bac8eb3 100644
--- a/sys/ofed/drivers/net/mlx4/Makefile
+++ b/sys/ofed/drivers/net/mlx4/Makefile
@@ -1,9 +1,34 @@
-obj-$(CONFIG_MLX4_CORE) += mlx4_core.o
+# $FreeBSD$
+#.PATH: ${.CURDIR}/../../ofed/drivers/net/mlx4:${.CURDIR}/../../ofed/include/linux
+.PATH: ${.CURDIR}/../../../../../include/linux
-mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \
- mr.o pd.o port.o profile.o qp.o reset.o sense.o srq.o xrcd.o
+.include <bsd.own.mk>
-obj-$(CONFIG_MLX4_EN) += mlx4_en.o
-mlx4_en-y := en_main.o en_tx.o en_rx.o en_ethtool.o en_port.o en_cq.o \
- en_resources.o en_netdev.o en_frag.o en_selftest.o
+KMOD = mlx4
+SRCS = device_if.h bus_if.h pci_if.h vnode_if.h
+SRCS+= alloc.c catas.c cmd.c cq.c eq.c fw.c icm.c intf.c main.c mcg.c mr.c linux_compat.c linux_radix.c
+SRCS+= pd.c port.c profile.c qp.c reset.c sense.c srq.c resource_tracker.c sys_tune.c
+SRCS+= opt_inet.h opt_inet6.h
+
+
+#CFLAGS+= -I${.CURDIR}/../../ofed/drivers/net/mlx4
+#CFLAGS+= -I${.CURDIR}/../../ofed/include/
+CFLAGS+= -I${.CURDIR}/../../../../../include
+
+.if !defined(KERNBUILDDIR)
+.if ${MK_INET_SUPPORT} != "no"
+opt_inet.h:
+ @echo "#define INET 1" > ${.TARGET}
+.endif
+
+.if ${MK_INET6_SUPPORT} != "no"
+opt_inet6.h:
+ @echo "#define INET6 1" > ${.TARGET}
+.endif
+.endif
+
+.include <bsd.kmod.mk>
+
+CFLAGS+= -Wno-cast-qual -Wno-pointer-arith -fms-extensions
+
diff --git a/sys/ofed/drivers/net/mlx4/alloc.c b/sys/ofed/drivers/net/mlx4/alloc.c
index c22791a..38f3caf 100644
--- a/sys/ofed/drivers/net/mlx4/alloc.c
+++ b/sys/ofed/drivers/net/mlx4/alloc.c
@@ -34,6 +34,7 @@
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/mm.h>
+//#include <linux/export.h> /* XXX SK probabaly not needed in freeBSD XXX */
#include <linux/bitmap.h>
#include <linux/dma-mapping.h>
#include <linux/vmalloc.h>
@@ -77,14 +78,15 @@ void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj)
static unsigned long find_aligned_range(unsigned long *bitmap,
u32 start, u32 nbits,
- int len, int align)
+ int len, int align, u32 skip_mask)
{
unsigned long end, i;
again:
start = ALIGN(start, align);
- while ((start < nbits) && test_bit(start, bitmap))
+ while ((start < nbits) && (test_bit(start, bitmap) ||
+ (start & skip_mask)))
start += align;
if (start >= nbits)
@@ -95,7 +97,7 @@ again:
return -1;
for (i = start + 1; i < end; i++) {
- if (test_bit(i, bitmap)) {
+ if (test_bit(i, bitmap) || ((u32)i & skip_mask)) {
start = i + 1;
goto again;
}
@@ -104,27 +106,27 @@ again:
return start;
}
-u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align)
+u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt,
+ int align, u32 skip_mask)
{
- u32 obj, i;
+ u32 obj;
- if (likely(cnt == 1 && align == 1))
+ if (likely(cnt == 1 && align == 1 && !skip_mask))
return mlx4_bitmap_alloc(bitmap);
spin_lock(&bitmap->lock);
obj = find_aligned_range(bitmap->table, bitmap->last,
- bitmap->max, cnt, align);
+ bitmap->max, cnt, align, skip_mask);
if (obj >= bitmap->max) {
bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
& bitmap->mask;
obj = find_aligned_range(bitmap->table, 0, bitmap->max,
- cnt, align);
+ cnt, align, skip_mask);
}
if (obj < bitmap->max) {
- for (i = 0; i < cnt; i++)
- set_bit(obj + i, bitmap->table);
+ bitmap_set(bitmap->table, obj, cnt);
if (obj == bitmap->last) {
bitmap->last = (obj + cnt);
if (bitmap->last >= bitmap->max)
@@ -149,16 +151,10 @@ u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap)
void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt)
{
- u32 i;
-
obj &= bitmap->max + bitmap->reserved_top - 1;
spin_lock(&bitmap->lock);
- for (i = 0; i < cnt; i++)
- clear_bit(obj + i, bitmap->table);
- bitmap->last = min(bitmap->last, obj);
- bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
- & bitmap->mask;
+ bitmap_clear(bitmap->table, obj, cnt);
bitmap->avail += cnt;
spin_unlock(&bitmap->lock);
}
@@ -166,12 +162,17 @@ void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt)
int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask,
u32 reserved_bot, u32 reserved_top)
{
- int i;
+ /* sanity check */
+ if (num <= (u64)reserved_top + reserved_bot)
+ return -EINVAL;
/* num must be a power of 2 */
if (num != roundup_pow_of_two(num))
return -EINVAL;
+ if (reserved_bot + reserved_top >= num)
+ return -EINVAL;
+
bitmap->last = 0;
bitmap->top = 0;
bitmap->max = num - reserved_top;
@@ -184,8 +185,7 @@ int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask,
if (!bitmap->table)
return -ENOMEM;
- for (i = 0; i < reserved_bot; ++i)
- set_bit(i, bitmap->table);
+ bitmap_set(bitmap->table, 0, reserved_bot);
return 0;
}
@@ -207,7 +207,6 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
{
dma_addr_t t;
- buf->direct.buf = NULL;
if (size <= max_direct) {
buf->nbufs = 1;
buf->npages = 1;
@@ -229,11 +228,10 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
int i;
buf->direct.buf = NULL;
- buf->direct.map = 0;
buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
buf->npages = buf->nbufs;
buf->page_shift = PAGE_SHIFT;
- buf->page_list = kzalloc(buf->nbufs * sizeof *buf->page_list,
+ buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list),
GFP_KERNEL);
if (!buf->page_list)
return -ENOMEM;
@@ -291,7 +289,6 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
buf->page_list[i].map);
kfree(buf->page_list);
}
- buf->direct.buf = NULL;
}
EXPORT_SYMBOL_GPL(mlx4_buf_free);
diff --git a/sys/ofed/drivers/net/mlx4/catas.c b/sys/ofed/drivers/net/mlx4/catas.c
index 334aad9..185129a 100644
--- a/sys/ofed/drivers/net/mlx4/catas.c
+++ b/sys/ofed/drivers/net/mlx4/catas.c
@@ -32,10 +32,12 @@
*/
#include <linux/workqueue.h>
+#include <linux/module.h>
#include "mlx4.h"
-#define MLX4_CATAS_POLL_INTERVAL (5 * HZ)
+#define MLX4_CATAS_POLL_INTERVAL (5 * HZ)
+
static DEFINE_SPINLOCK(catas_lock);
@@ -45,7 +47,8 @@ static struct work_struct catas_work;
static int internal_err_reset = 1;
module_param(internal_err_reset, int, 0644);
MODULE_PARM_DESC(internal_err_reset,
- "Reset device on internal errors if non-zero (default 1)");
+ "Reset device on internal errors if non-zero"
+ " (default 1, in SRIOV mode default is 0)");
static void dump_err_buf(struct mlx4_dev *dev)
{
@@ -65,16 +68,21 @@ static void poll_catas(unsigned long dev_ptr)
struct mlx4_priv *priv = mlx4_priv(dev);
if (readl(priv->catas_err.map)) {
- dump_err_buf(dev);
-
- mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
+ /* If the device is off-line, we cannot try to recover it */
+ if (pci_channel_offline(dev->pdev))
+ mod_timer(&priv->catas_err.timer,
+ round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
+ else {
+ dump_err_buf(dev);
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
- if (internal_err_reset) {
- spin_lock(&catas_lock);
- list_add(&priv->catas_err.list, &catas_list);
- spin_unlock(&catas_lock);
+ if (internal_err_reset) {
+ spin_lock(&catas_lock);
+ list_add(&priv->catas_err.list, &catas_list);
+ spin_unlock(&catas_lock);
- queue_work(mlx4_wq, &catas_work);
+ queue_work(mlx4_wq, &catas_work);
+ }
}
} else
mod_timer(&priv->catas_err.timer,
@@ -89,9 +97,6 @@ static void catas_reset(struct work_struct *work)
LIST_HEAD(tlist);
int ret;
- if (!mutex_trylock(&drv_mutex))
- return;
-
spin_lock_irq(&catas_lock);
list_splice_init(&catas_list, &tlist);
spin_unlock_irq(&catas_lock);
@@ -99,23 +104,30 @@ static void catas_reset(struct work_struct *work)
list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) {
struct pci_dev *pdev = priv->dev.pdev;
+ /* If the device is off-line, we cannot reset it */
+ if (pci_channel_offline(pdev))
+ continue;
+
ret = mlx4_restart_one(priv->dev.pdev);
/* 'priv' now is not valid */
if (ret)
- printk(KERN_ERR "mlx4 %s: Reset failed (%d)\n",
- pci_name(pdev), ret);
+ pr_err("mlx4 %s: Reset failed (%d)\n",
+ pci_name(pdev), ret);
else {
dev = pci_get_drvdata(pdev);
mlx4_dbg(dev, "Reset succeeded\n");
}
}
- mutex_unlock(&drv_mutex);
}
void mlx4_start_catas_poll(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- unsigned long addr;
+ phys_addr_t addr;
+
+ /*If we are in SRIOV the default of the module param must be 0*/
+ if (mlx4_is_mfunc(dev))
+ internal_err_reset = 0;
INIT_LIST_HEAD(&priv->catas_err.list);
init_timer(&priv->catas_err.timer);
@@ -126,8 +138,8 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
if (!priv->catas_err.map) {
- mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n",
- addr);
+ mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n",
+ (unsigned long long) addr);
return;
}
diff --git a/sys/ofed/drivers/net/mlx4/cmd.c b/sys/ofed/drivers/net/mlx4/cmd.c
index bc4a618..5c78cdc 100644
--- a/sys/ofed/drivers/net/mlx4/cmd.c
+++ b/sys/ofed/drivers/net/mlx4/cmd.c
@@ -33,16 +33,24 @@
*/
#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/errno.h>
#include <linux/mlx4/cmd.h>
+#include <linux/semaphore.h>
+#include <rdma/ib_smi.h>
#include <asm/io.h>
#include "mlx4.h"
+#include "fw.h"
#define CMD_POLL_TOKEN 0xffff
+#define INBOX_MASK 0xffffffffffffff00ULL
+
+#define CMD_CHAN_VER 1
+#define CMD_CHAN_IF_REV 1
enum {
/* command completed successfully: */
@@ -111,6 +119,9 @@ struct mlx4_cmd_context {
u8 fw_status;
};
+static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr_cmd *in_vhcr);
+
static int mlx4_status_to_errno(u8 status)
{
static const int trans_table[] = {
@@ -141,9 +152,157 @@ static int mlx4_status_to_errno(u8 status)
return trans_table[status];
}
+static u8 mlx4_errno_to_status(int errno)
+{
+ switch (errno) {
+ case -EPERM:
+ return CMD_STAT_BAD_OP;
+ case -EINVAL:
+ return CMD_STAT_BAD_PARAM;
+ case -ENXIO:
+ return CMD_STAT_BAD_SYS_STATE;
+ case -EBUSY:
+ return CMD_STAT_RESOURCE_BUSY;
+ case -ENOMEM:
+ return CMD_STAT_EXCEED_LIM;
+ case -ENFILE:
+ return CMD_STAT_ICM_ERROR;
+ default:
+ return CMD_STAT_INTERNAL_ERR;
+ }
+}
+
+static int comm_pending(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u32 status = readl(&priv->mfunc.comm->slave_read);
+
+ return (swab32(status) >> 31) != priv->cmd.comm_toggle;
+}
+
+static void mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u32 val;
+
+ priv->cmd.comm_toggle ^= 1;
+ val = param | (cmd << 16) | (priv->cmd.comm_toggle << 31);
+ __raw_writel((__force u32) cpu_to_be32(val),
+ &priv->mfunc.comm->slave_write);
+ mmiowb();
+}
+
+static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param,
+ unsigned long timeout)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ unsigned long end;
+ int err = 0;
+ int ret_from_pending = 0;
+
+ /* First, verify that the master reports correct status */
+ if (comm_pending(dev)) {
+ mlx4_warn(dev, "Communication channel is not idle."
+ "my toggle is %d (cmd:0x%x)\n",
+ priv->cmd.comm_toggle, cmd);
+ return -EAGAIN;
+ }
+
+ /* Write command */
+ down(&priv->cmd.poll_sem);
+ mlx4_comm_cmd_post(dev, cmd, param);
+
+ end = msecs_to_jiffies(timeout) + jiffies;
+ while (comm_pending(dev) && time_before(jiffies, end))
+ cond_resched();
+ ret_from_pending = comm_pending(dev);
+ if (ret_from_pending) {
+ /* check if the slave is trying to boot in the middle of
+ * FLR process. The only non-zero result in the RESET command
+ * is MLX4_DELAY_RESET_SLAVE*/
+ if ((MLX4_COMM_CMD_RESET == cmd)) {
+ mlx4_warn(dev, "Got slave FLRed from Communication"
+ " channel (ret:0x%x)\n", ret_from_pending);
+ err = MLX4_DELAY_RESET_SLAVE;
+ } else {
+ mlx4_warn(dev, "Communication channel timed out\n");
+ err = -ETIMEDOUT;
+ }
+ }
+
+ up(&priv->cmd.poll_sem);
+ return err;
+}
+
+static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op,
+ u16 param, unsigned long timeout)
+{
+ struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
+ struct mlx4_cmd_context *context;
+ unsigned long end;
+ int err = 0;
+
+ down(&cmd->event_sem);
+
+ spin_lock(&cmd->context_lock);
+ BUG_ON(cmd->free_head < 0);
+ context = &cmd->context[cmd->free_head];
+ context->token += cmd->token_mask + 1;
+ cmd->free_head = context->next;
+ spin_unlock(&cmd->context_lock);
+
+ init_completion(&context->done);
+
+ mlx4_comm_cmd_post(dev, op, param);
+
+ if (!wait_for_completion_timeout(&context->done,
+ msecs_to_jiffies(timeout))) {
+ mlx4_warn(dev, "communication channel command 0x%x timed out\n", op);
+ err = -EBUSY;
+ goto out;
+ }
+
+ err = context->result;
+ if (err && context->fw_status != CMD_STAT_MULTI_FUNC_REQ) {
+ mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
+ op, context->fw_status);
+ goto out;
+ }
+
+out:
+ /* wait for comm channel ready
+ * this is necessary for prevention the race
+ * when switching between event to polling mode
+ */
+ end = msecs_to_jiffies(timeout) + jiffies;
+ while (comm_pending(dev) && time_before(jiffies, end))
+ cond_resched();
+
+ spin_lock(&cmd->context_lock);
+ context->next = cmd->free_head;
+ cmd->free_head = context - cmd->context;
+ spin_unlock(&cmd->context_lock);
+
+ up(&cmd->event_sem);
+ return err;
+}
+
+int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
+ unsigned long timeout)
+{
+ if (mlx4_priv(dev)->cmd.use_events)
+ return mlx4_comm_cmd_wait(dev, cmd, param, timeout);
+ return mlx4_comm_cmd_poll(dev, cmd, param, timeout);
+}
+
static int cmd_pending(struct mlx4_dev *dev)
{
- u32 status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET);
+ u32 status;
+
+ if (pci_channel_offline(dev->pdev))
+ return -EIO;
+
+ status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET);
return (status & swab32(1 << HCR_GO_BIT)) ||
(mlx4_priv(dev)->cmd.toggle ==
@@ -161,13 +320,33 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
mutex_lock(&cmd->hcr_mutex);
+ if (pci_channel_offline(dev->pdev)) {
+ /*
+ * Device is going through error recovery
+ * and cannot accept commands.
+ */
+ ret = -EIO;
+ goto out;
+ }
+
end = jiffies;
if (event)
end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS);
while (cmd_pending(dev)) {
- if (time_after_eq(jiffies, end))
+ if (pci_channel_offline(dev->pdev)) {
+ /*
+ * Device is going through error recovery
+ * and cannot accept commands.
+ */
+ ret = -EIO;
+ goto out;
+ }
+
+ if (time_after_eq(jiffies, end)) {
+ mlx4_err(dev, "%s:cmd_pending failed\n", __func__);
goto out;
+ }
cond_resched();
}
@@ -191,7 +370,7 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
(cmd->toggle << HCR_T_BIT) |
(event ? (1 << HCR_E_BIT) : 0) |
(op_modifier << HCR_OPMOD_SHIFT) |
- op), hcr + 6);
+ op), hcr + 6);
/*
* Make sure that our HCR writes don't get mixed in with
@@ -208,6 +387,65 @@ out:
return ret;
}
+static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
+ int out_is_imm, u32 in_modifier, u8 op_modifier,
+ u16 op, unsigned long timeout)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_vhcr_cmd *vhcr = priv->mfunc.vhcr;
+ int ret;
+
+ mutex_lock(&priv->cmd.slave_cmd_mutex);
+
+ vhcr->in_param = cpu_to_be64(in_param);
+ vhcr->out_param = out_param ? cpu_to_be64(*out_param) : 0;
+ vhcr->in_modifier = cpu_to_be32(in_modifier);
+ vhcr->opcode = cpu_to_be16((((u16) op_modifier) << 12) | (op & 0xfff));
+ vhcr->token = cpu_to_be16(CMD_POLL_TOKEN);
+ vhcr->status = 0;
+ vhcr->flags = !!(priv->cmd.use_events) << 6;
+
+ if (mlx4_is_master(dev)) {
+ ret = mlx4_master_process_vhcr(dev, dev->caps.function, vhcr);
+ if (!ret) {
+ if (out_is_imm) {
+ if (out_param)
+ *out_param =
+ be64_to_cpu(vhcr->out_param);
+ else {
+ mlx4_err(dev, "response expected while"
+ "output mailbox is NULL for "
+ "command 0x%x\n", op);
+ vhcr->status = CMD_STAT_BAD_PARAM;
+ }
+ }
+ ret = mlx4_status_to_errno(vhcr->status);
+ }
+ } else {
+ ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0,
+ MLX4_COMM_TIME + timeout);
+ if (!ret) {
+ if (out_is_imm) {
+ if (out_param)
+ *out_param =
+ be64_to_cpu(vhcr->out_param);
+ else {
+ mlx4_err(dev, "response expected while"
+ "output mailbox is NULL for "
+ "command 0x%x\n", op);
+ vhcr->status = CMD_STAT_BAD_PARAM;
+ }
+ }
+ ret = mlx4_status_to_errno(vhcr->status);
+ } else
+ mlx4_err(dev, "failed execution of VHCR_POST command"
+ "opcode 0x%x\n", op);
+ }
+
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ return ret;
+}
+
static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
int out_is_imm, u32 in_modifier, u8 op_modifier,
u16 op, unsigned long timeout)
@@ -220,16 +458,36 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
down(&priv->cmd.poll_sem);
+ if (pci_channel_offline(dev->pdev)) {
+ /*
+ * Device is going through error recovery
+ * and cannot accept commands.
+ */
+ err = -EIO;
+ goto out;
+ }
+
err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0);
if (err)
goto out;
end = msecs_to_jiffies(timeout) + jiffies;
- while (cmd_pending(dev) && time_before(jiffies, end))
+ while (cmd_pending(dev) && time_before(jiffies, end)) {
+ if (pci_channel_offline(dev->pdev)) {
+ /*
+ * Device is going through error recovery
+ * and cannot accept commands.
+ */
+ err = -EIO;
+ goto out;
+ }
+
cond_resched();
+ }
if (cmd_pending(dev)) {
+ mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op);
err = -ETIMEDOUT;
goto out;
}
@@ -240,13 +498,12 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
__raw_readl(hcr + HCR_OUT_PARAM_OFFSET)) << 32 |
(u64) be32_to_cpu((__force __be32)
__raw_readl(hcr + HCR_OUT_PARAM_OFFSET + 4));
- stat = be32_to_cpu((__force __be32) __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24;
+ stat = be32_to_cpu((__force __be32)
+ __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24;
err = mlx4_status_to_errno(stat);
- if (err) {
- if (op != MLX4_CMD_SET_NODE || stat != CMD_STAT_BAD_OP)
- mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
- op, stat);
- }
+ if (err)
+ mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
+ op, stat);
out:
up(&priv->cmd.poll_sem);
@@ -270,6 +527,19 @@ void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param)
complete(&context->done);
}
+static int get_status(struct mlx4_dev *dev, u32 *status, int *go_bit,
+ int *t_bit)
+{
+ if (pci_channel_offline(dev->pdev))
+ return -EIO;
+
+ *status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET);
+ *t_bit = !!(*status & swab32(1 << HCR_T_BIT));
+ *go_bit = !!(*status & swab32(1 << HCR_GO_BIT));
+
+ return 0;
+}
+
static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
int out_is_imm, u32 in_modifier, u8 op_modifier,
u16 op, unsigned long timeout)
@@ -277,6 +547,8 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
struct mlx4_cmd_context *context;
int err = 0;
+ int go_bit = 0, t_bit = 0, stat_err;
+ u32 status = 0;
down(&cmd->event_sem);
@@ -289,19 +561,29 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
init_completion(&context->done);
- mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
- in_modifier, op_modifier, op, context->token, 1);
+ err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
+ in_modifier, op_modifier, op, context->token, 1);
+ if (err) {
+ mlx4_warn(dev, "command 0x%x could not be posted (%d)\n",
+ op, err);
+ goto out;
+ }
- if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) {
+ if (!wait_for_completion_timeout(&context->done,
+ msecs_to_jiffies(timeout))) {
+ stat_err = get_status(dev, &status, &go_bit, &t_bit);
+ mlx4_warn(dev, "command 0x%x timed out: "
+ "get_status err=%d, status=0x%x, go_bit=%d, "
+ "t_bit=%d, toggle=0x%x\n", op, stat_err, status,
+ go_bit, t_bit, mlx4_priv(dev)->cmd.toggle);
err = -EBUSY;
goto out;
}
err = context->result;
if (err) {
- if (op != MLX4_CMD_SET_NODE || context->fw_status != CMD_STAT_BAD_OP)
- mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
- op, context->fw_status);
+ mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
+ op, context->fw_status);
goto out;
}
@@ -320,42 +602,1397 @@ out:
int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
int out_is_imm, u32 in_modifier, u8 op_modifier,
- u16 op, unsigned long timeout)
+ u16 op, unsigned long timeout, int native)
{
- if (mlx4_priv(dev)->cmd.use_events && !cold)
- return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm,
- in_modifier, op_modifier, op, timeout);
- else
- return mlx4_cmd_poll(dev, in_param, out_param, out_is_imm,
- in_modifier, op_modifier, op, timeout);
+ if (pci_channel_offline(dev->pdev))
+ return -EIO;
+
+ if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) {
+ if (mlx4_priv(dev)->cmd.use_events)
+ return mlx4_cmd_wait(dev, in_param, out_param,
+ out_is_imm, in_modifier,
+ op_modifier, op, timeout);
+ else
+ return mlx4_cmd_poll(dev, in_param, out_param,
+ out_is_imm, in_modifier,
+ op_modifier, op, timeout);
+ }
+ return mlx4_slave_cmd(dev, in_param, out_param, out_is_imm,
+ in_modifier, op_modifier, op, timeout);
}
EXPORT_SYMBOL_GPL(__mlx4_cmd);
+
+static int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev)
+{
+ return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+}
+
+static int mlx4_ACCESS_MEM(struct mlx4_dev *dev, u64 master_addr,
+ int slave, u64 slave_addr,
+ int size, int is_read)
+{
+ u64 in_param;
+ u64 out_param;
+
+ if ((slave_addr & 0xfff) | (master_addr & 0xfff) |
+ (slave & ~0x7f) | (size & 0xff)) {
+ mlx4_err(dev, "Bad access mem params - slave_addr:0x%llx "
+ "master_addr:0x%llx slave_id:%d size:%d\n",
+ slave_addr, master_addr, slave, size);
+ return -EINVAL;
+ }
+
+ if (is_read) {
+ in_param = (u64) slave | slave_addr;
+ out_param = (u64) dev->caps.function | master_addr;
+ } else {
+ in_param = (u64) dev->caps.function | master_addr;
+ out_param = (u64) slave | slave_addr;
+ }
+
+ return mlx4_cmd_imm(dev, in_param, &out_param, size, 0,
+ MLX4_CMD_ACCESS_MEM,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+}
+
+static int query_pkey_block(struct mlx4_dev *dev, u8 port, u16 index, u16 *pkey,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox)
+{
+ struct ib_smp *in_mad = (struct ib_smp *)(inbox->buf);
+ struct ib_smp *out_mad = (struct ib_smp *)(outbox->buf);
+ int err;
+ int i;
+
+ if (index & 0x1f)
+ return -EINVAL;
+
+ in_mad->attr_mod = cpu_to_be32(index / 32);
+
+ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3,
+ MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+
+ for (i = 0; i < 32; ++i)
+ pkey[i] = be16_to_cpu(((__be16 *) out_mad->data)[i]);
+
+ return err;
+}
+
+static int get_full_pkey_table(struct mlx4_dev *dev, u8 port, u16 *table,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox)
+{
+ int i;
+ int err;
+
+ for (i = 0; i < dev->caps.pkey_table_len[port]; i += 32) {
+ err = query_pkey_block(dev, port, i, table + i, inbox, outbox);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+#define PORT_CAPABILITY_LOCATION_IN_SMP 20
+#define PORT_STATE_OFFSET 32
+
+static enum ib_port_state vf_port_state(struct mlx4_dev *dev, int port, int vf)
+{
+ if (mlx4_get_slave_port_state(dev, vf, port) == SLAVE_PORT_UP)
+ return IB_PORT_ACTIVE;
+ else
+ return IB_PORT_DOWN;
+}
+
+static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct ib_smp *smp = inbox->buf;
+ u32 index;
+ u8 port;
+ u16 *table;
+ int err;
+ int vidx, pidx;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct ib_smp *outsmp = outbox->buf;
+ __be16 *outtab = (__be16 *)(outsmp->data);
+ __be32 slave_cap_mask;
+ __be64 slave_node_guid;
+ port = vhcr->in_modifier;
+
+ if (smp->base_version == 1 &&
+ smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+ smp->class_version == 1) {
+ if (smp->method == IB_MGMT_METHOD_GET) {
+ if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) {
+ index = be32_to_cpu(smp->attr_mod);
+ if (port < 1 || port > dev->caps.num_ports)
+ return -EINVAL;
+ table = kcalloc(dev->caps.pkey_table_len[port], sizeof *table, GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+ /* need to get the full pkey table because the paravirtualized
+ * pkeys may be scattered among several pkey blocks.
+ */
+ err = get_full_pkey_table(dev, port, table, inbox, outbox);
+ if (!err) {
+ for (vidx = index * 32; vidx < (index + 1) * 32; ++vidx) {
+ pidx = priv->virt2phys_pkey[slave][port - 1][vidx];
+ outtab[vidx % 32] = cpu_to_be16(table[pidx]);
+ }
+ }
+ kfree(table);
+ return err;
+ }
+ if (smp->attr_id == IB_SMP_ATTR_PORT_INFO) {
+ /*get the slave specific caps:*/
+ /*do the command */
+ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+ vhcr->in_modifier, vhcr->op_modifier,
+ vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+ /* modify the response for slaves */
+ if (!err && slave != mlx4_master_func_num(dev)) {
+ u8 *state = outsmp->data + PORT_STATE_OFFSET;
+
+ *state = (*state & 0xf0) | vf_port_state(dev, port, slave);
+ slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port];
+ memcpy(outsmp->data + PORT_CAPABILITY_LOCATION_IN_SMP, &slave_cap_mask, 4);
+ }
+ return err;
+ }
+ if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) {
+ /* compute slave's gid block */
+ smp->attr_mod = cpu_to_be32(slave / 8);
+ /* execute cmd */
+ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+ vhcr->in_modifier, vhcr->op_modifier,
+ vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+ if (!err) {
+ /* if needed, move slave gid to index 0 */
+ if (slave % 8)
+ memcpy(outsmp->data,
+ outsmp->data + (slave % 8) * 8, 8);
+ /* delete all other gids */
+ memset(outsmp->data + 8, 0, 56);
+ }
+ return err;
+ }
+ if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
+ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+ vhcr->in_modifier, vhcr->op_modifier,
+ vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+ if (!err) {
+ slave_node_guid = mlx4_get_slave_node_guid(dev, slave);
+ memcpy(outsmp->data + 12, &slave_node_guid, 8);
+ }
+ return err;
+ }
+ }
+ }
+ if (slave != mlx4_master_func_num(dev) &&
+ ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) ||
+ (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+ smp->method == IB_MGMT_METHOD_SET))) {
+ mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, "
+ "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n",
+ slave, smp->method, smp->mgmt_class,
+ be16_to_cpu(smp->attr_id));
+ return -EPERM;
+ }
+ /*default:*/
+ return mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+ vhcr->in_modifier, vhcr->op_modifier,
+ vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+}
+
+int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ u64 in_param;
+ u64 out_param;
+ int err;
+
+ in_param = cmd->has_inbox ? (u64) inbox->dma : vhcr->in_param;
+ out_param = cmd->has_outbox ? (u64) outbox->dma : vhcr->out_param;
+ if (cmd->encode_slave_id) {
+ in_param &= 0xffffffffffffff00ll;
+ in_param |= slave;
+ }
+
+ err = __mlx4_cmd(dev, in_param, &out_param, cmd->out_is_imm,
+ vhcr->in_modifier, vhcr->op_modifier, vhcr->op,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+
+ if (cmd->out_is_imm)
+ vhcr->out_param = out_param;
+
+ return err;
+}
+
+static struct mlx4_cmd_info cmd_info[] = {
+ {
+ .opcode = MLX4_CMD_QUERY_FW,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_FW_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_HCA,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_DEV_CAP,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_DEV_CAP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_FUNC_CAP,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_FUNC_CAP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_ADAPTER,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_INIT_PORT,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_INIT_PORT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_CLOSE_PORT,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_CLOSE_PORT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_PORT,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_PORT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SET_PORT,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SET_PORT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_MAP_EQ,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_MAP_EQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SW2HW_EQ,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_SW2HW_EQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_HW_HEALTH_CHECK,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_NOP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_ALLOC_RES,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = true,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_ALLOC_RES_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_FREE_RES,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_FREE_RES_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SW2HW_MPT,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_SW2HW_MPT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_MPT,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_MPT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_HW2SW_MPT,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_HW2SW_MPT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_READ_MTT,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_WRITE_MTT,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_WRITE_MTT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SYNC_TPT,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_HW2SW_EQ,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_HW2SW_EQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_EQ,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_EQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SW2HW_CQ,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_SW2HW_CQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_HW2SW_CQ,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_HW2SW_CQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_CQ,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_CQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_MODIFY_CQ,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = true,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_MODIFY_CQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SW2HW_SRQ,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_SW2HW_SRQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_HW2SW_SRQ,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_HW2SW_SRQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_SRQ,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_SRQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_ARM_SRQ,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_ARM_SRQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_RST2INIT_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_RST2INIT_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_INIT2INIT_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_INIT2INIT_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_INIT2RTR_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_INIT2RTR_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_RTR2RTS_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_RTR2RTS_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_RTS2RTS_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_RTS2RTS_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SQERR2RTS_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SQERR2RTS_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_2ERR_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_GEN_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_RTS2SQD_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_GEN_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SQD2SQD_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SQD2SQD_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SQD2RTS_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SQD2RTS_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_2RST_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_2RST_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_QP,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_GEN_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SUSPEND_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_GEN_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_UNSUSPEND_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_GEN_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_CONF_SPECIAL_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL, /* XXX verify: only demux can do this */
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_MAD_IFC,
+ .has_inbox = true,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_MAD_IFC_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_IF_STAT,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_IF_STAT_wrapper
+ },
+ /* Native multicast commands are not available for guests */
+ {
+ .opcode = MLX4_CMD_QP_ATTACH,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QP_ATTACH_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_PROMISC,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_PROMISC_wrapper
+ },
+ /* Ethernet specific commands */
+ {
+ .opcode = MLX4_CMD_SET_VLAN_FLTR,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SET_VLAN_FLTR_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SET_MCAST_FLTR,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SET_MCAST_FLTR_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_DUMP_ETH_STATS,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_DUMP_ETH_STATS_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_INFORM_FLR_DONE,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ /* flow steering commands */
+ {
+ .opcode = MLX4_QP_FLOW_STEERING_ATTACH,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = true,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QP_FLOW_STEERING_ATTACH_wrapper
+ },
+ {
+ .opcode = MLX4_QP_FLOW_STEERING_DETACH,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper
+ },
+};
+
+static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr_cmd *in_vhcr)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cmd_info *cmd = NULL;
+ struct mlx4_vhcr_cmd *vhcr_cmd = in_vhcr ? in_vhcr : priv->mfunc.vhcr;
+ struct mlx4_vhcr *vhcr;
+ struct mlx4_cmd_mailbox *inbox = NULL;
+ struct mlx4_cmd_mailbox *outbox = NULL;
+ u64 in_param;
+ u64 out_param;
+ int ret = 0;
+ int i;
+ int err = 0;
+
+ /* Create sw representation of Virtual HCR */
+ vhcr = kzalloc(sizeof(struct mlx4_vhcr), GFP_KERNEL);
+ if (!vhcr)
+ return -ENOMEM;
+
+ /* DMA in the vHCR */
+ if (!in_vhcr) {
+ ret = mlx4_ACCESS_MEM(dev, priv->mfunc.vhcr_dma, slave,
+ priv->mfunc.master.slave_state[slave].vhcr_dma,
+ ALIGN(sizeof(struct mlx4_vhcr_cmd),
+ MLX4_ACCESS_MEM_ALIGN), 1);
+ if (ret) {
+ mlx4_err(dev, "%s:Failed reading vhcr"
+ "ret: 0x%x\n", __func__, ret);
+ kfree(vhcr);
+ return ret;
+ }
+ }
+
+ /* Fill SW VHCR fields */
+ vhcr->in_param = be64_to_cpu(vhcr_cmd->in_param);
+ vhcr->out_param = be64_to_cpu(vhcr_cmd->out_param);
+ vhcr->in_modifier = be32_to_cpu(vhcr_cmd->in_modifier);
+ vhcr->token = be16_to_cpu(vhcr_cmd->token);
+ vhcr->op = be16_to_cpu(vhcr_cmd->opcode) & 0xfff;
+ vhcr->op_modifier = (u8) (be16_to_cpu(vhcr_cmd->opcode) >> 12);
+ vhcr->e_bit = vhcr_cmd->flags & (1 << 6);
+
+ /* Lookup command */
+ for (i = 0; i < ARRAY_SIZE(cmd_info); ++i) {
+ if (vhcr->op == cmd_info[i].opcode) {
+ cmd = &cmd_info[i];
+ break;
+ }
+ }
+ if (!cmd) {
+ mlx4_err(dev, "Unknown command:0x%x accepted from slave:%d\n",
+ vhcr->op, slave);
+ vhcr_cmd->status = CMD_STAT_BAD_PARAM;
+ goto out_status;
+ }
+
+ /* Read inbox */
+ if (cmd->has_inbox) {
+ vhcr->in_param &= INBOX_MASK;
+ inbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(inbox)) {
+ vhcr_cmd->status = CMD_STAT_BAD_SIZE;
+ inbox = NULL;
+ goto out_status;
+ }
+
+ if (mlx4_ACCESS_MEM(dev, inbox->dma, slave,
+ vhcr->in_param,
+ MLX4_MAILBOX_SIZE, 1)) {
+ mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n",
+ __func__, cmd->opcode);
+ vhcr_cmd->status = CMD_STAT_INTERNAL_ERR;
+ goto out_status;
+ }
+ }
+
+ /* Apply permission and bound checks if applicable */
+ if (cmd->verify && cmd->verify(dev, slave, vhcr, inbox)) {
+ mlx4_warn(dev, "Command:0x%x from slave: %d failed protection "
+ "checks for resource_id:%d\n", vhcr->op, slave,
+ vhcr->in_modifier);
+ vhcr_cmd->status = CMD_STAT_BAD_OP;
+ goto out_status;
+ }
+
+ /* Allocate outbox */
+ if (cmd->has_outbox) {
+ outbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(outbox)) {
+ vhcr_cmd->status = CMD_STAT_BAD_SIZE;
+ outbox = NULL;
+ goto out_status;
+ }
+ }
+
+ /* Execute the command! */
+ if (cmd->wrapper) {
+ err = cmd->wrapper(dev, slave, vhcr, inbox, outbox,
+ cmd);
+ if (cmd->out_is_imm)
+ vhcr_cmd->out_param = cpu_to_be64(vhcr->out_param);
+ } else {
+ in_param = cmd->has_inbox ? (u64) inbox->dma :
+ vhcr->in_param;
+ out_param = cmd->has_outbox ? (u64) outbox->dma :
+ vhcr->out_param;
+ err = __mlx4_cmd(dev, in_param, &out_param,
+ cmd->out_is_imm, vhcr->in_modifier,
+ vhcr->op_modifier, vhcr->op,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+
+ if (cmd->out_is_imm) {
+ vhcr->out_param = out_param;
+ vhcr_cmd->out_param = cpu_to_be64(vhcr->out_param);
+ }
+ }
+
+ if (err) {
+ mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with"
+ " error:%d, status %d\n",
+ vhcr->op, slave, vhcr->errno, err);
+ vhcr_cmd->status = mlx4_errno_to_status(err);
+ goto out_status;
+ }
+
+
+ /* Write outbox if command completed successfully */
+ if (cmd->has_outbox && !vhcr_cmd->status) {
+ ret = mlx4_ACCESS_MEM(dev, outbox->dma, slave,
+ vhcr->out_param,
+ MLX4_MAILBOX_SIZE, MLX4_CMD_WRAPPED);
+ if (ret) {
+ /* If we failed to write back the outbox after the
+ *command was successfully executed, we must fail this
+ * slave, as it is now in undefined state */
+ mlx4_err(dev, "%s:Failed writing outbox\n", __func__);
+ goto out;
+ }
+ }
+
+out_status:
+ /* DMA back vhcr result */
+ if (!in_vhcr) {
+ ret = mlx4_ACCESS_MEM(dev, priv->mfunc.vhcr_dma, slave,
+ priv->mfunc.master.slave_state[slave].vhcr_dma,
+ ALIGN(sizeof(struct mlx4_vhcr),
+ MLX4_ACCESS_MEM_ALIGN),
+ MLX4_CMD_WRAPPED);
+ if (ret)
+ mlx4_err(dev, "%s:Failed writing vhcr result\n",
+ __func__);
+ else if (vhcr->e_bit &&
+ mlx4_GEN_EQE(dev, slave, &priv->mfunc.master.cmd_eqe))
+ mlx4_warn(dev, "Failed to generate command completion "
+ "eqe for slave %d\n", slave);
+ }
+
+out:
+ kfree(vhcr);
+ mlx4_free_cmd_mailbox(dev, inbox);
+ mlx4_free_cmd_mailbox(dev, outbox);
+ return ret;
+}
+
+static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave)
+{
+ int port, err;
+ struct mlx4_vport_state *vp_admin;
+ struct mlx4_vport_oper_state *vp_oper;
+
+ for (port = 1; port <= MLX4_MAX_PORTS; port++) {
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+ vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+ vp_oper->state = *vp_admin;
+ if (MLX4_VGT != vp_admin->default_vlan) {
+ err = mlx4_register_vlan(&priv->dev, port,
+ vp_admin->default_vlan, &(vp_oper->vlan_idx));
+ if (err) {
+ vp_oper->vlan_idx = NO_INDX;
+ mlx4_warn((&priv->dev),
+ "No vlan resorces slave %d, port %d\n",
+ slave, port);
+ return err;
+ }
+ mlx4_dbg((&(priv->dev)), "alloc vlan %d idx %d slave %d port %d\n",
+ (int)(vp_oper->state.default_vlan),
+ vp_oper->vlan_idx, slave, port);
+ }
+ if (vp_admin->spoofchk) {
+ vp_oper->mac_idx = __mlx4_register_mac(&priv->dev,
+ port,
+ vp_admin->mac);
+ if (0 > vp_oper->mac_idx) {
+ err = vp_oper->mac_idx;
+ vp_oper->mac_idx = NO_INDX;
+ mlx4_warn((&priv->dev),
+ "No mac resorces slave %d, port %d\n",
+ slave, port);
+ return err;
+ }
+ mlx4_dbg((&(priv->dev)), "alloc mac %llx idx %d slave %d port %d\n",
+ vp_oper->state.mac, vp_oper->mac_idx, slave, port);
+ }
+ }
+ return 0;
+}
+
+static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave)
+{
+ int port;
+ struct mlx4_vport_oper_state *vp_oper;
+
+ for (port = 1; port <= MLX4_MAX_PORTS; port++) {
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+ if (NO_INDX != vp_oper->vlan_idx) {
+ __mlx4_unregister_vlan(&priv->dev,
+ port, vp_oper->state.default_vlan);
+ vp_oper->vlan_idx = NO_INDX;
+ }
+ if (NO_INDX != vp_oper->mac_idx) {
+ __mlx4_unregister_mac(&priv->dev, port, vp_oper->state.mac);
+ vp_oper->mac_idx = NO_INDX;
+ }
+ }
+ return;
+}
+
+static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
+ u16 param, u8 toggle)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state;
+ u32 reply;
+ u8 is_going_down = 0;
+ int i;
+ unsigned long flags;
+
+ slave_state[slave].comm_toggle ^= 1;
+ reply = (u32) slave_state[slave].comm_toggle << 31;
+ if (toggle != slave_state[slave].comm_toggle) {
+ mlx4_warn(dev, "Incorrect toggle %d from slave %d. *** MASTER"
+ "STATE COMPROMISIED ***\n", toggle, slave);
+ goto reset_slave;
+ }
+ if (cmd == MLX4_COMM_CMD_RESET) {
+ mlx4_warn(dev, "Received reset from slave:%d\n", slave);
+ slave_state[slave].active = false;
+ mlx4_master_deactivate_admin_state(priv, slave);
+ for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) {
+ slave_state[slave].event_eq[i].eqn = -1;
+ slave_state[slave].event_eq[i].token = 0;
+ }
+ /*check if we are in the middle of FLR process,
+ if so return "retry" status to the slave*/
+ if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd)
+ goto inform_slave_state;
+
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, slave);
+
+ /* write the version in the event field */
+ reply |= mlx4_comm_get_version();
+
+ goto reset_slave;
+ }
+ /*command from slave in the middle of FLR*/
+ if (cmd != MLX4_COMM_CMD_RESET &&
+ MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) {
+ mlx4_warn(dev, "slave:%d is Trying to run cmd(0x%x) "
+ "in the middle of FLR\n", slave, cmd);
+ return;
+ }
+
+ switch (cmd) {
+ case MLX4_COMM_CMD_VHCR0:
+ if (slave_state[slave].last_cmd != MLX4_COMM_CMD_RESET)
+ goto reset_slave;
+ slave_state[slave].vhcr_dma = ((u64) param) << 48;
+ priv->mfunc.master.slave_state[slave].cookie = 0;
+ mutex_init(&priv->mfunc.master.gen_eqe_mutex[slave]);
+ break;
+ case MLX4_COMM_CMD_VHCR1:
+ if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR0)
+ goto reset_slave;
+ slave_state[slave].vhcr_dma |= ((u64) param) << 32;
+ break;
+ case MLX4_COMM_CMD_VHCR2:
+ if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR1)
+ goto reset_slave;
+ slave_state[slave].vhcr_dma |= ((u64) param) << 16;
+ break;
+ case MLX4_COMM_CMD_VHCR_EN:
+ if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR2)
+ goto reset_slave;
+ slave_state[slave].vhcr_dma |= param;
+ if (mlx4_master_activate_admin_state(priv, slave))
+ goto reset_slave;
+ slave_state[slave].active = true;
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave);
+ break;
+ case MLX4_COMM_CMD_VHCR_POST:
+ if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) &&
+ (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST))
+ goto reset_slave;
+
+ mutex_lock(&priv->cmd.slave_cmd_mutex);
+ if (mlx4_master_process_vhcr(dev, slave, NULL)) {
+ mlx4_err(dev, "Failed processing vhcr for slave:%d,"
+ " resetting slave.\n", slave);
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ goto reset_slave;
+ }
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ break;
+ default:
+ mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave);
+ goto reset_slave;
+ }
+ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
+ if (!slave_state[slave].is_slave_going_down)
+ slave_state[slave].last_cmd = cmd;
+ else
+ is_going_down = 1;
+ spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+ if (is_going_down) {
+ mlx4_warn(dev, "Slave is going down aborting command(%d)"
+ " executing from slave:%d\n",
+ cmd, slave);
+ return;
+ }
+ __raw_writel((__force u32) cpu_to_be32(reply),
+ &priv->mfunc.comm[slave].slave_read);
+ mmiowb();
+
+ return;
+
+reset_slave:
+ /* cleanup any slave resources */
+ mlx4_delete_all_resources_for_slave(dev, slave);
+ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
+ if (!slave_state[slave].is_slave_going_down)
+ slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET;
+ spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+ /*with slave in the middle of flr, no need to clean resources again.*/
+inform_slave_state:
+ memset(&slave_state[slave].event_eq, 0,
+ sizeof(struct mlx4_slave_event_eq_info));
+ __raw_writel((__force u32) cpu_to_be32(reply),
+ &priv->mfunc.comm[slave].slave_read);
+ wmb();
+}
+
+/* master command processing */
+void mlx4_master_comm_channel(struct work_struct *work)
+{
+ struct mlx4_mfunc_master_ctx *master =
+ container_of(work,
+ struct mlx4_mfunc_master_ctx,
+ comm_work);
+ struct mlx4_mfunc *mfunc =
+ container_of(master, struct mlx4_mfunc, master);
+ struct mlx4_priv *priv =
+ container_of(mfunc, struct mlx4_priv, mfunc);
+ struct mlx4_dev *dev = &priv->dev;
+ __be32 *bit_vec;
+ u32 comm_cmd;
+ u32 vec;
+ int i, j, slave;
+ int toggle;
+ int served = 0;
+ int reported = 0;
+ u32 slt;
+
+ bit_vec = master->comm_arm_bit_vector;
+ for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++) {
+ vec = be32_to_cpu(bit_vec[i]);
+ for (j = 0; j < 32; j++) {
+ if (!(vec & (1 << j)))
+ continue;
+ ++reported;
+ slave = (i * 32) + j;
+ comm_cmd = swab32(readl(
+ &mfunc->comm[slave].slave_write));
+ slt = swab32(readl(&mfunc->comm[slave].slave_read))
+ >> 31;
+ toggle = comm_cmd >> 31;
+ if (toggle != slt) {
+ if (master->slave_state[slave].comm_toggle
+ != slt) {
+ mlx4_info(dev, "slave %d out of sync."
+ " read toggle %d, state toggle %d. "
+ "Resynching.\n", slave, slt,
+ master->slave_state[slave].comm_toggle);
+ master->slave_state[slave].comm_toggle =
+ slt;
+ }
+ mlx4_master_do_cmd(dev, slave,
+ comm_cmd >> 16 & 0xff,
+ comm_cmd & 0xffff, toggle);
+ ++served;
+ }
+ }
+ }
+
+ if (reported && reported != served)
+ mlx4_warn(dev, "Got command event with bitmask from %d slaves"
+ " but %d were served\n",
+ reported, served);
+
+ if (mlx4_ARM_COMM_CHANNEL(dev))
+ mlx4_warn(dev, "Failed to arm comm channel events\n");
+}
+
+static int sync_toggles(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int wr_toggle;
+ int rd_toggle;
+ unsigned long end;
+
+ wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)) >> 31;
+ end = jiffies + msecs_to_jiffies(5000);
+
+ while (time_before(jiffies, end)) {
+ rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)) >> 31;
+ if (rd_toggle == wr_toggle) {
+ priv->cmd.comm_toggle = rd_toggle;
+ return 0;
+ }
+
+ cond_resched();
+ }
+
+ /*
+ * we could reach here if for example the previous VM using this
+ * function misbehaved and left the channel with unsynced state. We
+ * should fix this here and give this VM a chance to use a properly
+ * synced channel
+ */
+ mlx4_warn(dev, "recovering from previously mis-behaved VM\n");
+ __raw_writel((__force u32) 0, &priv->mfunc.comm->slave_read);
+ __raw_writel((__force u32) 0, &priv->mfunc.comm->slave_write);
+ priv->cmd.comm_toggle = 0;
+
+ return 0;
+}
+
+int mlx4_multi_func_init(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_state;
+ int i, j, err, port;
+
+ if (mlx4_is_master(dev))
+ priv->mfunc.comm =
+ ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) +
+ priv->fw.comm_base, MLX4_COMM_PAGESIZE);
+ else
+ priv->mfunc.comm =
+ ioremap(pci_resource_start(dev->pdev, 2) +
+ MLX4_SLAVE_COMM_BASE, MLX4_COMM_PAGESIZE);
+ if (!priv->mfunc.comm) {
+ mlx4_err(dev, "Couldn't map communication vector.\n");
+ goto err_vhcr;
+ }
+
+ if (mlx4_is_master(dev)) {
+ priv->mfunc.master.slave_state =
+ kzalloc(dev->num_slaves *
+ sizeof(struct mlx4_slave_state), GFP_KERNEL);
+ if (!priv->mfunc.master.slave_state)
+ goto err_comm;
+
+ priv->mfunc.master.vf_admin =
+ kzalloc(dev->num_slaves *
+ sizeof(struct mlx4_vf_admin_state), GFP_KERNEL);
+ if (!priv->mfunc.master.vf_admin)
+ goto err_comm_admin;
+
+ priv->mfunc.master.vf_oper =
+ kzalloc(dev->num_slaves *
+ sizeof(struct mlx4_vf_oper_state), GFP_KERNEL);
+ if (!priv->mfunc.master.vf_oper)
+ goto err_comm_oper;
+
+ for (i = 0; i < dev->num_slaves; ++i) {
+ s_state = &priv->mfunc.master.slave_state[i];
+ s_state->last_cmd = MLX4_COMM_CMD_RESET;
+ for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j)
+ s_state->event_eq[j].eqn = -1;
+ __raw_writel((__force u32) 0,
+ &priv->mfunc.comm[i].slave_write);
+ __raw_writel((__force u32) 0,
+ &priv->mfunc.comm[i].slave_read);
+ mmiowb();
+ for (port = 1; port <= MLX4_MAX_PORTS; port++) {
+ s_state->vlan_filter[port] =
+ kzalloc(sizeof(struct mlx4_vlan_fltr),
+ GFP_KERNEL);
+ if (!s_state->vlan_filter[port]) {
+ if (--port)
+ kfree(s_state->vlan_filter[port]);
+ goto err_slaves;
+ }
+ INIT_LIST_HEAD(&s_state->mcast_filters[port]);
+ priv->mfunc.master.vf_admin[i].vport[port].default_vlan = MLX4_VGT;
+ priv->mfunc.master.vf_oper[i].vport[port].state.default_vlan = MLX4_VGT;
+ priv->mfunc.master.vf_oper[i].vport[port].vlan_idx = NO_INDX;
+ priv->mfunc.master.vf_oper[i].vport[port].mac_idx = NO_INDX;
+ }
+ spin_lock_init(&s_state->lock);
+ }
+
+ memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size);
+ priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD;
+ INIT_WORK(&priv->mfunc.master.comm_work,
+ mlx4_master_comm_channel);
+ INIT_WORK(&priv->mfunc.master.slave_event_work,
+ mlx4_gen_slave_eqe);
+ INIT_WORK(&priv->mfunc.master.slave_flr_event_work,
+ mlx4_master_handle_slave_flr);
+ spin_lock_init(&priv->mfunc.master.slave_state_lock);
+ spin_lock_init(&priv->mfunc.master.slave_eq.event_lock);
+ priv->mfunc.master.comm_wq =
+ create_singlethread_workqueue("mlx4_comm");
+ if (!priv->mfunc.master.comm_wq)
+ goto err_slaves;
+
+ if (mlx4_init_resource_tracker(dev))
+ goto err_thread;
+
+ err = mlx4_ARM_COMM_CHANNEL(dev);
+ if (err) {
+ mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
+ err);
+ goto err_resource;
+ }
+
+ } else {
+ err = sync_toggles(dev);
+ if (err) {
+ mlx4_err(dev, "Couldn't sync toggles\n");
+ goto err_comm;
+ }
+ }
+ return 0;
+
+err_resource:
+ mlx4_free_resource_tracker(dev, RES_TR_FREE_ALL);
+err_thread:
+ flush_workqueue(priv->mfunc.master.comm_wq);
+ destroy_workqueue(priv->mfunc.master.comm_wq);
+err_slaves:
+ while (--i) {
+ for (port = 1; port <= MLX4_MAX_PORTS; port++)
+ kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]);
+ }
+ kfree(priv->mfunc.master.vf_oper);
+err_comm_oper:
+ kfree(priv->mfunc.master.vf_admin);
+err_comm_admin:
+ kfree(priv->mfunc.master.slave_state);
+err_comm:
+ iounmap(priv->mfunc.comm);
+err_vhcr:
+ dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+ priv->mfunc.vhcr,
+ priv->mfunc.vhcr_dma);
+ priv->mfunc.vhcr = NULL;
+ return -ENOMEM;
+}
+
int mlx4_cmd_init(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
mutex_init(&priv->cmd.hcr_mutex);
+ mutex_init(&priv->cmd.slave_cmd_mutex);
sema_init(&priv->cmd.poll_sem, 1);
priv->cmd.use_events = 0;
priv->cmd.toggle = 1;
- priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_HCR_BASE,
- MLX4_HCR_SIZE);
- if (!priv->cmd.hcr) {
- mlx4_err(dev, "Couldn't map command register.");
- return -ENOMEM;
+ priv->cmd.hcr = NULL;
+ priv->mfunc.vhcr = NULL;
+
+ if (!mlx4_is_slave(dev)) {
+ priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) +
+ MLX4_HCR_BASE, MLX4_HCR_SIZE);
+ if (!priv->cmd.hcr) {
+ mlx4_err(dev, "Couldn't map command register.\n");
+ return -ENOMEM;
+ }
+ }
+
+ if (mlx4_is_mfunc(dev)) {
+ priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE,
+ &priv->mfunc.vhcr_dma,
+ GFP_KERNEL);
+ if (!priv->mfunc.vhcr) {
+ mlx4_err(dev, "Couldn't allocate VHCR.\n");
+ goto err_hcr;
+ }
}
priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev,
MLX4_MAILBOX_SIZE,
MLX4_MAILBOX_SIZE, 0);
- if (!priv->cmd.pool) {
+ if (!priv->cmd.pool)
+ goto err_vhcr;
+
+ return 0;
+
+err_vhcr:
+ if (mlx4_is_mfunc(dev))
+ dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+ priv->mfunc.vhcr, priv->mfunc.vhcr_dma);
+ priv->mfunc.vhcr = NULL;
+
+err_hcr:
+ if (!mlx4_is_slave(dev))
iounmap(priv->cmd.hcr);
- return -ENOMEM;
+ return -ENOMEM;
+}
+
+void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i, port;
+
+ if (mlx4_is_master(dev)) {
+ flush_workqueue(priv->mfunc.master.comm_wq);
+ destroy_workqueue(priv->mfunc.master.comm_wq);
+ for (i = 0; i < dev->num_slaves; i++) {
+ for (port = 1; port <= MLX4_MAX_PORTS; port++)
+ kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]);
+ }
+ kfree(priv->mfunc.master.slave_state);
+ kfree(priv->mfunc.master.vf_admin);
+ kfree(priv->mfunc.master.vf_oper);
}
- return 0;
+ iounmap(priv->mfunc.comm);
}
void mlx4_cmd_cleanup(struct mlx4_dev *dev)
@@ -363,7 +2000,13 @@ void mlx4_cmd_cleanup(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
pci_pool_destroy(priv->cmd.pool);
- iounmap(priv->cmd.hcr);
+
+ if (!mlx4_is_slave(dev))
+ iounmap(priv->cmd.hcr);
+ if (mlx4_is_mfunc(dev))
+ dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+ priv->mfunc.vhcr, priv->mfunc.vhcr_dma);
+ priv->mfunc.vhcr = NULL;
}
/*
@@ -374,6 +2017,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int i;
+ int err = 0;
priv->cmd.context = kmalloc(priv->cmd.max_cmds *
sizeof (struct mlx4_cmd_context),
@@ -398,11 +2042,10 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev)
; /* nothing */
--priv->cmd.token_mask;
- priv->cmd.use_events = 1;
-
down(&priv->cmd.poll_sem);
+ priv->cmd.use_events = 1;
- return 0;
+ return err;
}
/*
@@ -442,7 +2085,8 @@ struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev)
}
EXPORT_SYMBOL_GPL(mlx4_alloc_cmd_mailbox);
-void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox)
+void mlx4_free_cmd_mailbox(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *mailbox)
{
if (!mailbox)
return;
@@ -451,3 +2095,70 @@ void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbo
kfree(mailbox);
}
EXPORT_SYMBOL_GPL(mlx4_free_cmd_mailbox);
+
+u32 mlx4_comm_get_version(void)
+{
+ return ((u32) CMD_CHAN_IF_REV << 8) | (u32) CMD_CHAN_VER;
+}
+
+int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u8 *mac)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_vport_state *s_info;
+
+ if (!mlx4_is_master(dev))
+ return -EPROTONOSUPPORT;
+
+ if ((vf <= 0) || (vf > dev->num_vfs)) {
+ mlx4_err(dev, "Bad vf number:%d (max vf activated: %d)\n", vf, dev->num_vfs);
+ return -EINVAL;
+ }
+
+ s_info = &priv->mfunc.master.vf_admin[vf].vport[port];
+ s_info->mac = mlx4_mac_to_u64(mac);
+ mlx4_info(dev, "default mac on vf %d port %d to %llX will take afect only after vf restart\n",
+ vf, port, s_info->mac);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_mac);
+
+int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_vport_state *s_info;
+
+ if ((!mlx4_is_master(dev)) ||
+ !(dev->caps.flags & MLX4_DEV_CAP_FLAG_ESWITCH_SUPPORT))
+ return -EPROTONOSUPPORT;
+
+ if ((vf <= 0) || (vf > dev->num_vfs) || (vlan > 4095) || (qos > 7))
+ return -EINVAL;
+
+ s_info = &priv->mfunc.master.vf_admin[vf].vport[port];
+ if ((0 == vlan) && (0 == qos))
+ s_info->default_vlan = MLX4_VGT;
+ else
+ s_info->default_vlan = vlan;
+ s_info->default_qos = qos;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_vlan);
+
+int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_vport_state *s_info;
+
+ if ((!mlx4_is_master(dev)) ||
+ !(dev->caps.flags & MLX4_DEV_CAP_FLAG_ESWITCH_SUPPORT))
+ return -EPROTONOSUPPORT;
+
+ if ((vf <= 0) || (vf > dev->num_vfs))
+ return -EINVAL;
+
+ s_info = &priv->mfunc.master.vf_admin[vf].vport[port];
+ s_info->spoofchk = setting;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_spoofchk);
diff --git a/sys/ofed/drivers/net/mlx4/cq.c b/sys/ofed/drivers/net/mlx4/cq.c
index 076c602..c5a36e0 100644
--- a/sys/ofed/drivers/net/mlx4/cq.c
+++ b/sys/ofed/drivers/net/mlx4/cq.c
@@ -43,27 +43,6 @@
#include "mlx4.h"
#include "icm.h"
-struct mlx4_cq_context {
- __be32 flags;
- u16 reserved1[3];
- __be16 page_offset;
- __be32 logsize_usrpage;
- __be16 cq_period;
- __be16 cq_max_count;
- u8 reserved2[3];
- u8 comp_eqn;
- u8 log_page_size;
- u8 reserved3[2];
- u8 mtt_base_addr_h;
- __be32 mtt_base_addr_l;
- __be32 last_notified_index;
- __be32 solicit_producer_index;
- __be32 consumer_index;
- __be32 producer_index;
- u32 reserved4[2];
- __be64 db_rec_addr;
-};
-
#define MLX4_CQ_STATUS_OK ( 0 << 28)
#define MLX4_CQ_STATUS_OVERFLOW ( 9 << 28)
#define MLX4_CQ_STATUS_WRITE_FAIL (10 << 28)
@@ -75,10 +54,16 @@ struct mlx4_cq_context {
void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
{
+ struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
struct mlx4_cq *cq;
+ spin_lock(&cq_table->lock);
cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
cqn & (dev->caps.num_cqs - 1));
+ if (cq)
+ atomic_inc(&cq->refcount);
+ spin_unlock(&cq_table->lock);
+
if (!cq) {
mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
return;
@@ -87,6 +72,9 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
++cq->arm_sn;
cq->comp(cq);
+
+ if (atomic_dec_and_test(&cq->refcount))
+ complete(&cq->free);
}
void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
@@ -116,23 +104,24 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int cq_num)
{
- return mlx4_cmd(dev, mailbox->dma, cq_num, 0, MLX4_CMD_SW2HW_CQ,
- MLX4_CMD_TIME_CLASS_A);
+ return mlx4_cmd(dev, mailbox->dma, cq_num, 0,
+ MLX4_CMD_SW2HW_CQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
}
static int mlx4_MODIFY_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int cq_num, u32 opmod)
{
return mlx4_cmd(dev, mailbox->dma, cq_num, opmod, MLX4_CMD_MODIFY_CQ,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
}
static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int cq_num)
{
- return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
- mailbox ? 0 : 1, MLX4_CMD_HW2SW_CQ,
- MLX4_CMD_TIME_CLASS_A);
+ return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
+ cq_num, mailbox ? 0 : 1, MLX4_CMD_HW2SW_CQ,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
}
int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq,
@@ -187,25 +176,121 @@ int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq,
}
EXPORT_SYMBOL_GPL(mlx4_cq_resize);
-static int mlx4_find_least_loaded_vector(struct mlx4_priv *priv)
+int mlx4_cq_ignore_overrun(struct mlx4_dev *dev, struct mlx4_cq *cq)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_cq_context *cq_context;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ cq_context = mailbox->buf;
+ memset(cq_context, 0, sizeof *cq_context);
+
+ cq_context->flags |= cpu_to_be32(MLX4_CQ_FLAG_OI);
+
+ err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 3);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_cq_ignore_overrun);
+
+int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn)
{
- int i;
- int index = 0;
- int min = priv->eq_table.eq[0].load;
-
- for (i = 1; i < priv->dev.caps.num_comp_vectors; i++) {
- if (priv->eq_table.eq[i].load < min) {
- index = i;
- min = priv->eq_table.eq[i].load;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cq_table *cq_table = &priv->cq_table;
+ int err;
+
+ *cqn = mlx4_bitmap_alloc(&cq_table->bitmap);
+ if (*cqn == -1)
+ return -ENOMEM;
+
+ err = mlx4_table_get(dev, &cq_table->table, *cqn);
+ if (err)
+ goto err_out;
+
+ err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn);
+ if (err)
+ goto err_put;
+ return 0;
+
+err_put:
+ mlx4_table_put(dev, &cq_table->table, *cqn);
+
+err_out:
+ mlx4_bitmap_free(&cq_table->bitmap, *cqn);
+ return err;
+}
+
+static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn)
+{
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, 0, &out_param, RES_CQ,
+ RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ return err;
+ else {
+ *cqn = get_param_l(&out_param);
+ return 0;
}
}
+ return __mlx4_cq_alloc_icm(dev, cqn);
+}
- return index;
+void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cq_table *cq_table = &priv->cq_table;
+
+ mlx4_table_put(dev, &cq_table->cmpt_table, cqn);
+ mlx4_table_put(dev, &cq_table->table, cqn);
+ mlx4_bitmap_free(&cq_table->bitmap, cqn);
+}
+
+static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn)
+{
+ u64 in_param = 0;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, cqn);
+ err = mlx4_cmd(dev, in_param, RES_CQ, RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ mlx4_warn(dev, "Failed freeing cq:%d\n", cqn);
+ } else
+ __mlx4_cq_free_icm(dev, cqn);
}
-int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
- struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
- unsigned vector, int collapsed)
+static int mlx4_find_least_loaded_vector(struct mlx4_priv *priv)
+{
+ int i;
+ int index = 0;
+ int min = priv->eq_table.eq[0].load;
+
+ for (i = 1; i < priv->dev.caps.num_comp_vectors; i++) {
+ if (priv->eq_table.eq[i].load < min) {
+ index = i;
+ min = priv->eq_table.eq[i].load;
+ }
+ }
+
+ return index;
+}
+
+
+int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
+ struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec,
+ struct mlx4_cq *cq, unsigned vector, int collapsed,
+ int timestamp_en)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cq_table *cq_table = &priv->cq_table;
@@ -214,29 +299,24 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
u64 mtt_addr;
int err;
- cq->vector = (vector == MLX4_LEAST_ATTACHED_VECTOR) ?
- mlx4_find_least_loaded_vector(priv) : vector;
+ cq->vector = (vector == MLX4_LEAST_ATTACHED_VECTOR) ?
+ mlx4_find_least_loaded_vector(priv) : vector;
- if (cq->vector >= dev->caps.num_comp_vectors)
+ if (cq->vector > dev->caps.num_comp_vectors + dev->caps.comp_pool) {
return -EINVAL;
+ }
- cq->cqn = mlx4_bitmap_alloc(&cq_table->bitmap);
- if (cq->cqn == -1)
- return -ENOMEM;
-
- err = mlx4_table_get(dev, &cq_table->table, cq->cqn);
- if (err)
- goto err_out;
-
- err = mlx4_table_get(dev, &cq_table->cmpt_table, cq->cqn);
- if (err)
- goto err_put;
+ err = mlx4_cq_alloc_icm(dev, &cq->cqn);
+ if (err) {
+ return err;
+ }
spin_lock_irq(&cq_table->lock);
err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
spin_unlock_irq(&cq_table->lock);
- if (err)
- goto err_cmpt_put;
+ if (err){
+ goto err_icm;
+ }
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
@@ -248,6 +328,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
memset(cq_context, 0, sizeof *cq_context);
cq_context->flags = cpu_to_be32(!!collapsed << 18);
+ if (timestamp_en)
+ cq_context->flags |= cpu_to_be32(1 << 19);
+
cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
cq_context->comp_eqn = priv->eq_table.eq[cq->vector].eqn;
cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
@@ -262,13 +345,16 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
if (err)
goto err_radix;
- priv->eq_table.eq[cq->vector].load++;
+ priv->eq_table.eq[cq->vector].load++;
cq->cons_index = 0;
cq->arm_sn = 1;
cq->uar = uar;
atomic_set(&cq->refcount, 1);
init_completion(&cq->free);
+ cq->eqn = priv->eq_table.eq[cq->vector].eqn;
+ cq->irq = priv->eq_table.eq[cq->vector].irq;
+
return 0;
err_radix:
@@ -276,14 +362,8 @@ err_radix:
radix_tree_delete(&cq_table->tree, cq->cqn);
spin_unlock_irq(&cq_table->lock);
-err_cmpt_put:
- mlx4_table_put(dev, &cq_table->cmpt_table, cq->cqn);
-
-err_put:
- mlx4_table_put(dev, &cq_table->table, cq->cqn);
-
-err_out:
- mlx4_bitmap_free(&cq_table->bitmap, cq->cqn);
+err_icm:
+ mlx4_cq_free_icm(dev, cq->cqn);
return err;
}
@@ -299,8 +379,9 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
if (err)
mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
+
+ priv->eq_table.eq[cq->vector].load--;
synchronize_irq(priv->eq_table.eq[cq->vector].irq);
- priv->eq_table.eq[cq->vector].load--;
spin_lock_irq(&cq_table->lock);
radix_tree_delete(&cq_table->tree, cq->cqn);
@@ -310,8 +391,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
complete(&cq->free);
wait_for_completion(&cq->free);
- mlx4_table_put(dev, &cq_table->table, cq->cqn);
- mlx4_bitmap_free(&cq_table->bitmap, cq->cqn);
+ mlx4_cq_free_icm(dev, cq->cqn);
}
EXPORT_SYMBOL_GPL(mlx4_cq_free);
@@ -322,6 +402,8 @@ int mlx4_init_cq_table(struct mlx4_dev *dev)
spin_lock_init(&cq_table->lock);
INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
+ if (mlx4_is_slave(dev))
+ return 0;
err = mlx4_bitmap_init(&cq_table->bitmap, dev->caps.num_cqs,
dev->caps.num_cqs - 1, dev->caps.reserved_cqs, 0);
@@ -333,6 +415,8 @@ int mlx4_init_cq_table(struct mlx4_dev *dev)
void mlx4_cleanup_cq_table(struct mlx4_dev *dev)
{
+ if (mlx4_is_slave(dev))
+ return;
/* Nothing to do to clean up radix_tree */
mlx4_bitmap_cleanup(&mlx4_priv(dev)->cq_table.bitmap);
}
diff --git a/sys/ofed/drivers/net/mlx4/en_cq.c b/sys/ofed/drivers/net/mlx4/en_cq.c
index 57f00d4..9783e23 100644
--- a/sys/ofed/drivers/net/mlx4/en_cq.c
+++ b/sys/ofed/drivers/net/mlx4/en_cq.c
@@ -101,10 +101,12 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
if (!cq->is_tx)
cq->size = priv->rx_ring[cq->ring].actual_size;
+
err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar,
- cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx);
- if (err)
+ cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx, 0);
+ if (err) {
return err;
+ }
cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq;
cq->mcq.event = mlx4_en_cq_event;
diff --git a/sys/ofed/drivers/net/mlx4/en_main.c b/sys/ofed/drivers/net/mlx4/en_main.c
index 107eee3..b56766b 100644
--- a/sys/ofed/drivers/net/mlx4/en_main.c
+++ b/sys/ofed/drivers/net/mlx4/en_main.c
@@ -88,7 +88,8 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
params->tcp_rss = tcp_rss;
params->udp_rss = udp_rss;
- if (params->udp_rss && !mdev->dev->caps.udp_rss) {
+ if (params->udp_rss && !(mdev->dev->caps.flags
+ & MLX4_DEV_CAP_FLAG_UDP_RSS)) {
mlx4_warn(mdev, "UDP RSS is not supported on this device.\n");
params->udp_rss = 0;
}
@@ -116,18 +117,17 @@ static void *get_netdev(struct mlx4_dev *dev, void *ctx, u8 port)
}
static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr,
- enum mlx4_dev_event event, int port)
+ enum mlx4_dev_event event, unsigned long port)
{
struct mlx4_en_dev *mdev = (struct mlx4_en_dev *) endev_ptr;
struct mlx4_en_priv *priv;
- if (!mdev->pndev[port])
- return;
-
- priv = netdev_priv(mdev->pndev[port]);
switch (event) {
case MLX4_DEV_EVENT_PORT_UP:
case MLX4_DEV_EVENT_PORT_DOWN:
+ if (!mdev->pndev[port])
+ return;
+ priv = netdev_priv(mdev->pndev[port]);
/* To prevent races, we poll the link state in a separate
task rather than changing it here */
priv->link_state = event;
@@ -139,7 +139,11 @@ static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr,
break;
default:
- mlx4_warn(mdev, "Unhandled event: %d\n", event);
+ if (port < 1 || port > dev->caps.num_ports ||
+ !mdev->pndev[port])
+ return;
+ mlx4_warn(mdev, "Unhandled event %d for port %d\n", event,
+ (int) port);
}
}
@@ -351,8 +355,8 @@ static struct mlx4_interface mlx4_en_interface = {
.remove = mlx4_en_remove,
.event = mlx4_en_event,
.query = mlx4_en_query,
- .get_prot_dev = get_netdev,
- .protocol = MLX4_PROT_EN,
+ .get_dev = get_netdev,
+ .protocol = MLX4_PROT_ETH,
};
static int __init mlx4_en_init(void)
diff --git a/sys/ofed/drivers/net/mlx4/en_netdev.c b/sys/ofed/drivers/net/mlx4/en_netdev.c
index 0a59ab4..f7167d9 100644
--- a/sys/ofed/drivers/net/mlx4/en_netdev.c
+++ b/sys/ofed/drivers/net/mlx4/en_netdev.c
@@ -632,8 +632,7 @@ int mlx4_en_start_port(struct net_device *dev)
/* Set port mac number */
en_dbg(DRV, priv, "Setting mac for port %d\n", priv->port);
err = mlx4_register_mac(mdev->dev, priv->port,
- mlx4_en_mac_to_u64(IF_LLADDR(dev)),
- &priv->mac_index);
+ mlx4_en_mac_to_u64(IF_LLADDR(dev)));
if (err) {
en_err(priv, "Failed setting port mac\n");
goto tx_err;
@@ -697,7 +696,7 @@ wol_err:
mlx4_CLOSE_PORT(mdev->dev, priv->port);
mac_err:
- mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index);
+ mlx4_unregister_mac(mdev->dev, priv->port, priv->mac);
tx_err:
while (tx_index--) {
mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[tx_index]);
@@ -730,7 +729,7 @@ void mlx4_en_stop_port(struct net_device *dev)
priv->port_up = false;
/* Unregister Mac address for the port */
- mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index);
+ mlx4_unregister_mac(mdev->dev, priv->port, priv->mac);
mdev->mac_removed[priv->port] = 1;
/* Free TX Rings */
@@ -946,6 +945,7 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
mutex_unlock(&mdev->state_lock);
mlx4_en_free_resources(priv);
+
mtx_destroy(&priv->stats_lock.m);
mtx_destroy(&priv->vlan_lock.m);
kfree(priv);
@@ -1587,6 +1587,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
/*
* Setup wake-on-lan.
*/
+#if 0
if (priv->mdev->dev->caps.wol) {
u64 config;
if (mlx4_wol_read(priv->mdev->dev, &config, priv->port) == 0) {
@@ -1596,6 +1597,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
dev->if_capenable |= IFCAP_WOL_MAGIC;
}
}
+#endif
/* Register for VLAN events */
priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
diff --git a/sys/ofed/drivers/net/mlx4/en_port.c b/sys/ofed/drivers/net/mlx4/en_port.c
index d8a2d7f..303bb2b 100644
--- a/sys/ofed/drivers/net/mlx4/en_port.c
+++ b/sys/ofed/drivers/net/mlx4/en_port.c
@@ -39,13 +39,14 @@
#include <linux/mlx4/device.h>
#include <linux/mlx4/cmd.h>
-
+#if 0 // moved to port.c
int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port,
u64 mac, u64 clear, u8 mode)
{
return mlx4_cmd(dev, (mac | (clear << 63)), port, mode,
- MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
}
+#endif
int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, u8 port, u32 *vlans)
{
@@ -65,12 +66,13 @@ int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, u8 port, u32 *vlans)
i++, j--)
filter->entry[j] = cpu_to_be32(vlans[i]);
err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_VLAN_FLTR,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
+#if 0 //moved to port.c - shahark
int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
{
@@ -94,15 +96,19 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
-
int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
u8 promisc)
{
+
+ printf("%s %s:%d\n", __func__, __FILE__, __LINE__);
+
+
+
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_set_port_rqp_calc_context *context;
int err;
@@ -116,8 +122,10 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
context->base_qpn = cpu_to_be32(base_qpn);
context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_EN_SHIFT | base_qpn);
+/*
context->mcast = cpu_to_be32((dev->caps.mc_promisc_mode <<
SET_PORT_PROMISC_MODE_SHIFT) | base_qpn);
+*/
context->intra_no_vlan = 0;
context->no_vlan = MLX4_NO_VLAN_IDX;
context->intra_vlan_miss = 0;
@@ -125,11 +133,12 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port;
err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
+#endif
int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port)
{
@@ -144,7 +153,7 @@ int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port)
return PTR_ERR(mailbox);
memset(mailbox->buf, 0, sizeof(*qport_context));
err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
- MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
if (err)
goto out;
qport_context = mailbox->buf;
@@ -176,6 +185,7 @@ out:
return err;
}
+#if 0
static int read_iboe_counters(struct mlx4_dev *dev, int index, u64 counters[])
{
struct mlx4_cmd_mailbox *mailbox;
@@ -189,7 +199,7 @@ static int read_iboe_counters(struct mlx4_dev *dev, int index, u64 counters[])
return -ENOMEM;
err = mlx4_cmd_box(dev, 0, mailbox->dma, index, 0,
- MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C);
+ MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_WRAPPED);
if (err)
goto out;
@@ -217,6 +227,7 @@ out:
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
+#endif
int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
{
@@ -229,22 +240,24 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
unsigned long ierror;
int err;
int i;
- int counter;
+ //int counter;
u64 counters[4];
dev = mdev->pndev[port];
priv = netdev_priv(dev);
memset(counters, 0, sizeof counters);
+ /*
counter = mlx4_get_iboe_counter(priv->mdev->dev, port);
if (counter >= 0)
err = read_iboe_counters(priv->mdev->dev, counter, counters);
+ */
mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
memset(mailbox->buf, 0, sizeof(*mlx4_en_stats));
err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0,
- MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
if (err)
goto out;
diff --git a/sys/ofed/drivers/net/mlx4/en_port.h b/sys/ofed/drivers/net/mlx4/en_port.h
index a9e2e24..5319814 100644
--- a/sys/ofed/drivers/net/mlx4/en_port.h
+++ b/sys/ofed/drivers/net/mlx4/en_port.h
@@ -39,11 +39,7 @@
#define SET_PORT_PROMISC_EN_SHIFT 31
#define SET_PORT_PROMISC_MODE_SHIFT 30
-enum {
- MLX4_CMD_SET_VLAN_FLTR = 0x47,
- MLX4_CMD_SET_MCAST_FLTR = 0x48,
- MLX4_CMD_DUMP_ETH_STATS = 0x49,
-};
+#if 0 //moved to port.c - shahark
struct mlx4_set_port_general_context {
u8 reserved[3];
@@ -72,6 +68,7 @@ struct mlx4_set_port_rqp_calc_context {
__be32 promisc;
__be32 mcast;
};
+#endif
#define VLAN_FLTR_SIZE 128
struct mlx4_set_vlan_fltr_mbox {
diff --git a/sys/ofed/drivers/net/mlx4/en_rx.c b/sys/ofed/drivers/net/mlx4/en_rx.c
index d6843d8..81affce 100644
--- a/sys/ofed/drivers/net/mlx4/en_rx.c
+++ b/sys/ofed/drivers/net/mlx4/en_rx.c
@@ -267,7 +267,6 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
int err;
int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
DS_SIZE * priv->num_frags);
-
for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
ring = &priv->rx_ring[ring_ind];
@@ -673,7 +672,6 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
en_err(priv, "Failed to allocate qp context\n");
return -ENOMEM;
}
-
err = mlx4_qp_alloc(mdev->dev, qpn, qp);
if (err) {
en_err(priv, "Failed to allocate qp #%x\n", qpn);
@@ -717,7 +715,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
en_dbg(DRV, priv, "Configuring rss steering\n");
err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
roundup_pow_of_two(priv->rx_ring_num),
- &rss_map->base_qpn);
+ &rss_map->base_qpn, 0);
if (err) {
en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num);
return err;
@@ -736,7 +734,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
}
/* Configure RSS indirection qp */
- err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &priv->base_qpn);
+ err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &priv->base_qpn, 0);
if (err) {
en_err(priv, "Failed to reserve range for RSS "
"indirection qp\n");
diff --git a/sys/ofed/drivers/net/mlx4/en_tx.c b/sys/ofed/drivers/net/mlx4/en_tx.c
index 9ad3c59..4661024 100644
--- a/sys/ofed/drivers/net/mlx4/en_tx.c
+++ b/sys/ofed/drivers/net/mlx4/en_tx.c
@@ -122,7 +122,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
"buf_size:%d dma:%llx\n", ring, ring->buf, ring->size,
ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map);
- err = mlx4_qp_reserve_range(mdev->dev, 1, 256, &ring->qpn);
+ err = mlx4_qp_reserve_range(mdev->dev, 1, 256, &ring->qpn, MLX4_RESERVE_BF_QP);
if (err) {
en_err(priv, "Failed reserving qp for tx ring.\n");
goto err_map;
@@ -135,7 +135,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
}
ring->qp.event = mlx4_en_sqp_event;
- err = mlx4_bf_alloc(mdev->dev, &ring->bf);
+ err = mlx4_bf_alloc(mdev->dev, &ring->bf, 0);
if (err) {
ring->bf.uar = &mdev->priv_uar;
ring->bf.uar->map = mdev->uar_map;
diff --git a/sys/ofed/drivers/net/mlx4/eq.c b/sys/ofed/drivers/net/mlx4/eq.c
index 3dd96e6..f9d6ab9 100644
--- a/sys/ofed/drivers/net/mlx4/eq.c
+++ b/sys/ofed/drivers/net/mlx4/eq.c
@@ -33,6 +33,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
+#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
@@ -42,35 +43,15 @@
#include "fw.h"
enum {
+ MLX4_IRQNAME_SIZE = 32
+};
+
+enum {
MLX4_NUM_ASYNC_EQE = 0x100,
MLX4_NUM_SPARE_EQE = 0x80,
MLX4_EQ_ENTRY_SIZE = 0x20
};
-/*
- * Must be packed because start is 64 bits but only aligned to 32 bits.
- */
-struct mlx4_eq_context {
- __be32 flags;
- u16 reserved1[3];
- __be16 page_offset;
- u8 log_eq_size;
- u8 reserved2[4];
- u8 eq_period;
- u8 reserved3;
- u8 eq_max_count;
- u8 reserved4[3];
- u8 intr;
- u8 log_page_size;
- u8 reserved5[2];
- u8 mtt_base_addr_h;
- __be32 mtt_base_addr_l;
- u32 reserved6[2];
- __be32 consumer_index;
- __be32 producer_index;
- u32 reserved7[4];
-};
-
#define MLX4_EQ_STATUS_OK ( 0 << 28)
#define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28)
#define MLX4_EQ_OWNER_SW ( 0 << 24)
@@ -95,46 +76,20 @@ struct mlx4_eq_context {
(1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \
(1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \
(1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \
- (1ull << MLX4_EVENT_TYPE_CMD))
-
-struct mlx4_eqe {
- u8 reserved1;
- u8 type;
- u8 reserved2;
- u8 subtype;
- union {
- u32 raw[6];
- struct {
- __be32 cqn;
- } __attribute__((packed)) comp;
- struct {
- u16 reserved1;
- __be16 token;
- u32 reserved2;
- u8 reserved3[3];
- u8 status;
- __be64 out_param;
- } __attribute__((packed)) cmd;
- struct {
- __be32 qpn;
- } __attribute__((packed)) qp;
- struct {
- __be32 srqn;
- } __attribute__((packed)) srq;
- struct {
- __be32 cqn;
- u32 reserved1;
- u8 reserved2[3];
- u8 syndrome;
- } __attribute__((packed)) cq_err;
- struct {
- u32 reserved1[2];
- __be32 port;
- } __attribute__((packed)) port_change;
- } event;
- u8 reserved3[3];
- u8 owner;
-} __attribute__((packed));
+ (1ull << MLX4_EVENT_TYPE_CMD) | \
+ (1ull << MLX4_EVENT_TYPE_OP_REQUIRED) | \
+ (1ull << MLX4_EVENT_TYPE_COMM_CHANNEL) | \
+ (1ull << MLX4_EVENT_TYPE_FLR_EVENT) | \
+ (1ull << MLX4_EVENT_TYPE_FATAL_WARNING))
+
+static u64 get_async_ev_mask(struct mlx4_dev *dev)
+{
+ u64 async_ev_mask = MLX4_ASYNC_EVENT_MASK;
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
+ async_ev_mask |= (1ull << MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT);
+
+ return async_ev_mask;
+}
static void eq_set_ci(struct mlx4_eq *eq, int req_not)
{
@@ -145,27 +100,355 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not)
mb();
}
-static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry)
+static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor)
{
- unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE;
- return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
+ /* (entry & (eq->nent - 1)) gives us a cyclic array */
+ unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor);
+ /* CX3 is capable of extending the EQE from 32 to 64 bytes.
+ * When this feature is enabled, the first (in the lower addresses)
+ * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes
+ * contain the legacy EQE information.
+ */
+ return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE;
}
-static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq)
+static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor)
{
- struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index);
+ struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor);
return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
}
+static struct mlx4_eqe *next_slave_event_eqe(struct mlx4_slave_event_eq *slave_eq)
+{
+ struct mlx4_eqe *eqe =
+ &slave_eq->event_eqe[slave_eq->cons & (SLAVE_EVENT_EQ_SIZE - 1)];
+ return (!!(eqe->owner & 0x80) ^
+ !!(slave_eq->cons & SLAVE_EVENT_EQ_SIZE)) ?
+ eqe : NULL;
+}
+
+void mlx4_gen_slave_eqe(struct work_struct *work)
+{
+ struct mlx4_mfunc_master_ctx *master =
+ container_of(work, struct mlx4_mfunc_master_ctx,
+ slave_event_work);
+ struct mlx4_mfunc *mfunc =
+ container_of(master, struct mlx4_mfunc, master);
+ struct mlx4_priv *priv = container_of(mfunc, struct mlx4_priv, mfunc);
+ struct mlx4_dev *dev = &priv->dev;
+ struct mlx4_slave_event_eq *slave_eq = &mfunc->master.slave_eq;
+ struct mlx4_eqe *eqe;
+ u8 slave;
+ int i;
+
+ for (eqe = next_slave_event_eqe(slave_eq); eqe;
+ eqe = next_slave_event_eqe(slave_eq)) {
+ slave = eqe->slave_id;
+
+ /* All active slaves need to receive the event */
+ if (slave == ALL_SLAVES) {
+ for (i = 0; i < dev->num_slaves; i++) {
+ if (i != dev->caps.function &&
+ master->slave_state[i].active)
+ if (mlx4_GEN_EQE(dev, i, eqe))
+ mlx4_warn(dev, "Failed to "
+ " generate event "
+ "for slave %d\n", i);
+ }
+ } else {
+ if (mlx4_GEN_EQE(dev, slave, eqe))
+ mlx4_warn(dev, "Failed to generate event "
+ "for slave %d\n", slave);
+ }
+ ++slave_eq->cons;
+ }
+}
+
+
+static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_event_eq *slave_eq = &priv->mfunc.master.slave_eq;
+ struct mlx4_eqe *s_eqe;
+ unsigned long flags;
+
+ spin_lock_irqsave(&slave_eq->event_lock, flags);
+ s_eqe = &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)];
+ if ((!!(s_eqe->owner & 0x80)) ^
+ (!!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE))) {
+ mlx4_warn(dev, "Master failed to generate an EQE for slave: %d. "
+ "No free EQE on slave events queue\n", slave);
+ spin_unlock_irqrestore(&slave_eq->event_lock, flags);
+ return;
+ }
+
+ memcpy(s_eqe, eqe, dev->caps.eqe_size - 1);
+ s_eqe->slave_id = slave;
+ /* ensure all information is written before setting the ownersip bit */
+ wmb();
+ s_eqe->owner = !!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE) ? 0x0 : 0x80;
+ ++slave_eq->prod;
+
+ queue_work(priv->mfunc.master.comm_wq,
+ &priv->mfunc.master.slave_event_work);
+ spin_unlock_irqrestore(&slave_eq->event_lock, flags);
+}
+
+static void mlx4_slave_event(struct mlx4_dev *dev, int slave,
+ struct mlx4_eqe *eqe)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_slave =
+ &priv->mfunc.master.slave_state[slave];
+
+ if (!s_slave->active) {
+ /*mlx4_warn(dev, "Trying to pass event to inactive slave\n");*/
+ return;
+ }
+
+ slave_event(dev, slave, eqe);
+}
+
+int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port)
+{
+ struct mlx4_eqe eqe;
+
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_slave = &priv->mfunc.master.slave_state[slave];
+
+ if (!s_slave->active)
+ return 0;
+
+ memset(&eqe, 0, sizeof eqe);
+
+ eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
+ eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE;
+ eqe.event.port_mgmt_change.port = port;
+
+ return mlx4_GEN_EQE(dev, slave, &eqe);
+}
+EXPORT_SYMBOL(mlx4_gen_pkey_eqe);
+
+int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port)
+{
+ struct mlx4_eqe eqe;
+
+ /*don't send if we don't have the that slave */
+ if (dev->num_vfs < slave)
+ return 0;
+ memset(&eqe, 0, sizeof eqe);
+
+ eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
+ eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO;
+ eqe.event.port_mgmt_change.port = port;
+
+ return mlx4_GEN_EQE(dev, slave, &eqe);
+}
+EXPORT_SYMBOL(mlx4_gen_guid_change_eqe);
+
+int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port,
+ u8 port_subtype_change)
+{
+ struct mlx4_eqe eqe;
+
+ /*don't send if we don't have the that slave */
+ if (dev->num_vfs < slave)
+ return 0;
+ memset(&eqe, 0, sizeof eqe);
+
+ eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE;
+ eqe.subtype = port_subtype_change;
+ eqe.event.port_change.port = cpu_to_be32(port << 28);
+
+ mlx4_dbg(dev, "%s: sending: %d to slave: %d on port: %d\n", __func__,
+ port_subtype_change, slave, port);
+ return mlx4_GEN_EQE(dev, slave, &eqe);
+}
+EXPORT_SYMBOL(mlx4_gen_port_state_change_eqe);
+
+enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state;
+ if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS) {
+ pr_err("%s: Error: asking for slave:%d, port:%d\n",
+ __func__, slave, port);
+ return SLAVE_PORT_DOWN;
+ }
+ return s_state[slave].port_state[port];
+}
+EXPORT_SYMBOL(mlx4_get_slave_port_state);
+
+static int mlx4_set_slave_port_state(struct mlx4_dev *dev, int slave, u8 port,
+ enum slave_port_state state)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state;
+
+ if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) {
+ pr_err("%s: Error: asking for slave:%d, port:%d\n",
+ __func__, slave, port);
+ return -1;
+ }
+ s_state[slave].port_state[port] = state;
+
+ return 0;
+}
+
+static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event)
+{
+ int i;
+ enum slave_port_gen_event gen_event;
+
+ for (i = 0; i < dev->num_slaves; i++)
+ set_and_calc_slave_port_state(dev, i, port, event, &gen_event);
+}
+/**************************************************************************
+ The function get as input the new event to that port,
+ and according to the prev state change the slave's port state.
+ The events are:
+ MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
+ MLX4_PORT_STATE_DEV_EVENT_PORT_UP
+ MLX4_PORT_STATE_IB_EVENT_GID_VALID
+ MLX4_PORT_STATE_IB_EVENT_GID_INVALID
+***************************************************************************/
+int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave,
+ u8 port, int event,
+ enum slave_port_gen_event *gen_event)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *ctx = NULL;
+ unsigned long flags;
+ int ret = -1;
+ enum slave_port_state cur_state =
+ mlx4_get_slave_port_state(dev, slave, port);
+
+ *gen_event = SLAVE_PORT_GEN_EVENT_NONE;
+
+ if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) {
+ pr_err("%s: Error: asking for slave:%d, port:%d\n",
+ __func__, slave, port);
+ return ret;
+ }
+
+ ctx = &priv->mfunc.master.slave_state[slave];
+ spin_lock_irqsave(&ctx->lock, flags);
+
+ switch (cur_state) {
+ case SLAVE_PORT_DOWN:
+ if (MLX4_PORT_STATE_DEV_EVENT_PORT_UP == event)
+ mlx4_set_slave_port_state(dev, slave, port,
+ SLAVE_PENDING_UP);
+ break;
+ case SLAVE_PENDING_UP:
+ if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event)
+ mlx4_set_slave_port_state(dev, slave, port,
+ SLAVE_PORT_DOWN);
+ else if (MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID == event) {
+ mlx4_set_slave_port_state(dev, slave, port,
+ SLAVE_PORT_UP);
+ *gen_event = SLAVE_PORT_GEN_EVENT_UP;
+ }
+ break;
+ case SLAVE_PORT_UP:
+ if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) {
+ mlx4_set_slave_port_state(dev, slave, port,
+ SLAVE_PORT_DOWN);
+ *gen_event = SLAVE_PORT_GEN_EVENT_DOWN;
+ } else if (MLX4_PORT_STATE_IB_EVENT_GID_INVALID ==
+ event) {
+ mlx4_set_slave_port_state(dev, slave, port,
+ SLAVE_PENDING_UP);
+ *gen_event = SLAVE_PORT_GEN_EVENT_DOWN;
+ }
+ break;
+ default:
+ pr_err("%s: BUG!!! UNKNOWN state: "
+ "slave:%d, port:%d\n", __func__, slave, port);
+ goto out;
+ }
+ ret = mlx4_get_slave_port_state(dev, slave, port);
+
+out:
+ spin_unlock_irqrestore(&ctx->lock, flags);
+ return ret;
+}
+
+EXPORT_SYMBOL(set_and_calc_slave_port_state);
+
+int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr)
+{
+ struct mlx4_eqe eqe;
+
+ memset(&eqe, 0, sizeof eqe);
+
+ eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
+ eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO;
+ eqe.event.port_mgmt_change.port = port;
+ eqe.event.port_mgmt_change.params.port_info.changed_attr =
+ cpu_to_be32((u32) attr);
+
+ slave_event(dev, ALL_SLAVES, &eqe);
+ return 0;
+}
+EXPORT_SYMBOL(mlx4_gen_slaves_port_mgt_ev);
+
+void mlx4_master_handle_slave_flr(struct work_struct *work)
+{
+ struct mlx4_mfunc_master_ctx *master =
+ container_of(work, struct mlx4_mfunc_master_ctx,
+ slave_flr_event_work);
+ struct mlx4_mfunc *mfunc =
+ container_of(master, struct mlx4_mfunc, master);
+ struct mlx4_priv *priv =
+ container_of(mfunc, struct mlx4_priv, mfunc);
+ struct mlx4_dev *dev = &priv->dev;
+ struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state;
+ int i;
+ int err;
+ unsigned long flags;
+
+ mlx4_dbg(dev, "mlx4_handle_slave_flr\n");
+
+ for (i = 0 ; i < dev->num_slaves; i++) {
+
+ if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) {
+ mlx4_dbg(dev, "mlx4_handle_slave_flr: "
+ "clean slave: %d\n", i);
+
+ mlx4_delete_all_resources_for_slave(dev, i);
+ /*return the slave to running mode*/
+ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
+ slave_state[i].last_cmd = MLX4_COMM_CMD_RESET;
+ slave_state[i].is_slave_going_down = 0;
+ spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+ /*notify the FW:*/
+ err = mlx4_cmd(dev, 0, i, 0, MLX4_CMD_INFORM_FLR_DONE,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ mlx4_warn(dev, "Failed to notify FW on "
+ "FLR done (slave:%d)\n", i);
+ }
+ }
+}
+
static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
{
+ struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_eqe *eqe;
int cqn;
int eqes_found = 0;
int set_ci = 0;
int port;
+ int slave = 0;
+ int ret;
+ u32 flr_slave;
+ u8 update_slave_state;
+ int i;
+ enum slave_port_gen_event gen_event;
+ unsigned long flags;
- while ((eqe = next_eqe_sw(eq))) {
+ while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) {
/*
* Make sure we read EQ entry contents after we've
* checked the ownership bit.
@@ -186,14 +469,68 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
- mlx4_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
- eqe->type);
+ mlx4_dbg(dev, "event %d arrived\n", eqe->type);
+ if (mlx4_is_master(dev)) {
+ /* forward only to slave owning the QP */
+ ret = mlx4_get_slave_from_resource_id(dev,
+ RES_QP,
+ be32_to_cpu(eqe->event.qp.qpn)
+ & 0xffffff, &slave);
+ if (ret && ret != -ENOENT) {
+ mlx4_dbg(dev, "QP event %02x(%02x) on "
+ "EQ %d at index %u: could "
+ "not get slave id (%d)\n",
+ eqe->type, eqe->subtype,
+ eq->eqn, eq->cons_index, ret);
+ break;
+ }
+
+ if (!ret && slave != dev->caps.function) {
+ mlx4_slave_event(dev, slave, eqe);
+ break;
+ }
+
+ }
+ mlx4_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) &
+ 0xffffff, eqe->type);
break;
case MLX4_EVENT_TYPE_SRQ_LIMIT:
+ mlx4_warn(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT\n",
+ __func__);
case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
- mlx4_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff,
- eqe->type);
+ if (mlx4_is_master(dev)) {
+ /* forward only to slave owning the SRQ */
+ ret = mlx4_get_slave_from_resource_id(dev,
+ RES_SRQ,
+ be32_to_cpu(eqe->event.srq.srqn)
+ & 0xffffff,
+ &slave);
+ if (ret && ret != -ENOENT) {
+ mlx4_warn(dev, "SRQ event %02x(%02x) "
+ "on EQ %d at index %u: could"
+ " not get slave id (%d)\n",
+ eqe->type, eqe->subtype,
+ eq->eqn, eq->cons_index, ret);
+ break;
+ }
+ mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x,"
+ " event: %02x(%02x)\n", __func__,
+ slave,
+ be32_to_cpu(eqe->event.srq.srqn),
+ eqe->type, eqe->subtype);
+
+ if (!ret && slave != dev->caps.function) {
+ mlx4_warn(dev, "%s: sending event "
+ "%02x(%02x) to slave:%d\n",
+ __func__, eqe->type,
+ eqe->subtype, slave);
+ mlx4_slave_event(dev, slave, eqe);
+ break;
+ }
+ }
+ mlx4_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) &
+ 0xffffff, eqe->type);
break;
case MLX4_EVENT_TYPE_CMD:
@@ -209,10 +546,46 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN,
port);
mlx4_priv(dev)->sense.do_sense_port[port] = 1;
+ if (!mlx4_is_master(dev))
+ break;
+ for (i = 0; i < dev->num_slaves; i++) {
+ if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
+ if (i == mlx4_master_func_num(dev))
+ continue;
+ mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN"
+ " to slave: %d, port:%d\n",
+ __func__, i, port);
+ mlx4_slave_event(dev, i, eqe);
+ } else { /* IB port */
+ set_and_calc_slave_port_state(dev, i, port,
+ MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
+ &gen_event);
+ /*we can be in pending state, then do not send port_down event*/
+ if (SLAVE_PORT_GEN_EVENT_DOWN == gen_event) {
+ if (i == mlx4_master_func_num(dev))
+ continue;
+ mlx4_slave_event(dev, i, eqe);
+ }
+ }
+ }
} else {
- mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP,
- port);
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, port);
+
mlx4_priv(dev)->sense.do_sense_port[port] = 0;
+
+ if (!mlx4_is_master(dev))
+ break;
+ if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
+ for (i = 0; i < dev->num_slaves; i++) {
+ if (i == mlx4_master_func_num(dev))
+ continue;
+ mlx4_slave_event(dev, i, eqe);
+ }
+ else /* IB port */
+ /* port-up event will be sent to a slave when the
+ * slave's alias-guid is set. This is done in alias_GUID.c
+ */
+ set_all_slave_state(dev, port, MLX4_DEV_EVENT_PORT_UP);
}
break;
@@ -221,7 +594,28 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
eqe->event.cq_err.syndrome == 1 ?
"overrun" : "access violation",
be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
- mlx4_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn),
+ if (mlx4_is_master(dev)) {
+ ret = mlx4_get_slave_from_resource_id(dev,
+ RES_CQ,
+ be32_to_cpu(eqe->event.cq_err.cqn)
+ & 0xffffff, &slave);
+ if (ret && ret != -ENOENT) {
+ mlx4_dbg(dev, "CQ event %02x(%02x) on "
+ "EQ %d at index %u: could "
+ "not get slave id (%d)\n",
+ eqe->type, eqe->subtype,
+ eq->eqn, eq->cons_index, ret);
+ break;
+ }
+
+ if (!ret && slave != dev->caps.function) {
+ mlx4_slave_event(dev, slave, eqe);
+ break;
+ }
+ }
+ mlx4_cq_event(dev,
+ be32_to_cpu(eqe->event.cq_err.cqn)
+ & 0xffffff,
eqe->type);
break;
@@ -229,11 +623,99 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
mlx4_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
break;
+ case MLX4_EVENT_TYPE_OP_REQUIRED:
+ atomic_inc(&priv->opreq_count);
+ /* FW commands can't be executed from interrupt context
+ working in deferred task */
+ queue_work(mlx4_wq, &priv->opreq_task);
+ break;
+
+ case MLX4_EVENT_TYPE_COMM_CHANNEL:
+ if (!mlx4_is_master(dev)) {
+ mlx4_warn(dev, "Received comm channel event "
+ "for non master device\n");
+ break;
+ }
+ memcpy(&priv->mfunc.master.comm_arm_bit_vector,
+ eqe->event.comm_channel_arm.bit_vec,
+ sizeof eqe->event.comm_channel_arm.bit_vec);
+ queue_work(priv->mfunc.master.comm_wq,
+ &priv->mfunc.master.comm_work);
+ break;
+
+ case MLX4_EVENT_TYPE_FLR_EVENT:
+ flr_slave = be32_to_cpu(eqe->event.flr_event.slave_id);
+ if (!mlx4_is_master(dev)) {
+ mlx4_warn(dev, "Non-master function received"
+ "FLR event\n");
+ break;
+ }
+
+ mlx4_dbg(dev, "FLR event for slave: %d\n", flr_slave);
+
+ if (flr_slave >= dev->num_slaves) {
+ mlx4_warn(dev,
+ "Got FLR for unknown function: %d\n",
+ flr_slave);
+ update_slave_state = 0;
+ } else
+ update_slave_state = 1;
+
+ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
+ if (update_slave_state) {
+ priv->mfunc.master.slave_state[flr_slave].active = false;
+ priv->mfunc.master.slave_state[flr_slave].last_cmd = MLX4_COMM_CMD_FLR;
+ priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1;
+ }
+ spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+ queue_work(priv->mfunc.master.comm_wq,
+ &priv->mfunc.master.slave_flr_event_work);
+ break;
+
+ case MLX4_EVENT_TYPE_FATAL_WARNING:
+ if (eqe->subtype == MLX4_FATAL_WARNING_SUBTYPE_WARMING) {
+ if (mlx4_is_master(dev))
+ for (i = 0; i < dev->num_slaves; i++) {
+ mlx4_dbg(dev, "%s: Sending "
+ "MLX4_FATAL_WARNING_SUBTYPE_WARMING"
+ " to slave: %d\n", __func__, i);
+ if (i == dev->caps.function)
+ continue;
+ mlx4_slave_event(dev, i, eqe);
+ }
+ mlx4_err(dev, "Temperature Threshold was reached! "
+ "Threshold: %d celsius degrees; "
+ "Current Temperature: %d\n",
+ be16_to_cpu(eqe->event.warming.warning_threshold),
+ be16_to_cpu(eqe->event.warming.current_temperature));
+ } else
+ mlx4_warn(dev, "Unhandled event FATAL WARNING (%02x), "
+ "subtype %02x on EQ %d at index %u. owner=%x, "
+ "nent=0x%x, slave=%x, ownership=%s\n",
+ eqe->type, eqe->subtype, eq->eqn,
+ eq->cons_index, eqe->owner, eq->nent,
+ eqe->slave_id,
+ !!(eqe->owner & 0x80) ^
+ !!(eq->cons_index & eq->nent) ? "HW" : "SW");
+
+ break;
+
+ case MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT:
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_MGMT_CHANGE,
+ (unsigned long) eqe);
+ break;
+
case MLX4_EVENT_TYPE_EEC_CATAS_ERROR:
case MLX4_EVENT_TYPE_ECC_DETECT:
default:
- mlx4_warn(dev, "Unhandled event %02x(%02x) on EQ %d at index %u\n",
- eqe->type, eqe->subtype, eq->eqn, eq->cons_index);
+ mlx4_warn(dev, "Unhandled event %02x(%02x) on EQ %d at "
+ "index %u. owner=%x, nent=0x%x, slave=%x, "
+ "ownership=%s\n",
+ eqe->type, eqe->subtype, eq->eqn,
+ eq->cons_index, eqe->owner, eq->nent,
+ eqe->slave_id,
+ !!(eqe->owner & 0x80) ^
+ !!(eq->cons_index & eq->nent) ? "HW" : "SW");
break;
};
@@ -266,6 +748,7 @@ static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr)
int work = 0;
int i;
+
writel(priv->eq_table.clr_mask, priv->eq_table.clr_int);
for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
@@ -285,25 +768,55 @@ static irqreturn_t mlx4_msi_x_interrupt(int irq, void *eq_ptr)
return IRQ_HANDLED;
}
+int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_event_eq_info *event_eq =
+ priv->mfunc.master.slave_state[slave].event_eq;
+ u32 in_modifier = vhcr->in_modifier;
+ u32 eqn = in_modifier & 0x1FF;
+ u64 in_param = vhcr->in_param;
+ int err = 0;
+ int i;
+
+ if (slave == dev->caps.function)
+ err = mlx4_cmd(dev, in_param, (in_modifier & 0x80000000) | eqn,
+ 0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+ if (!err)
+ for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i)
+ if (in_param & (1LL << i))
+ event_eq[i].eqn = in_modifier >> 31 ? -1 : eqn;
+
+ return err;
+}
+
static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap,
int eq_num)
{
return mlx4_cmd(dev, event_mask, (unmap << 31) | eq_num,
- 0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B);
+ 0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
}
static int mlx4_SW2HW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int eq_num)
{
- return mlx4_cmd(dev, mailbox->dma, eq_num, 0, MLX4_CMD_SW2HW_EQ,
- MLX4_CMD_TIME_CLASS_A);
+ return mlx4_cmd(dev, mailbox->dma, eq_num, 0,
+ MLX4_CMD_SW2HW_EQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
}
static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int eq_num)
{
- return mlx4_cmd_box(dev, 0, mailbox->dma, eq_num, 0, MLX4_CMD_HW2SW_EQ,
- MLX4_CMD_TIME_CLASS_A);
+ return mlx4_cmd_box(dev, 0, mailbox->dma, eq_num,
+ 0, MLX4_CMD_HW2SW_EQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
}
static int mlx4_num_eq_uar(struct mlx4_dev *dev)
@@ -313,8 +826,8 @@ static int mlx4_num_eq_uar(struct mlx4_dev *dev)
* we need to map, take the difference of highest index and
* the lowest index we'll use and add 1.
*/
- return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs) / 4 -
- dev->caps.reserved_eqs / 4 + 1;
+ return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs +
+ dev->caps.comp_pool)/4 - dev->caps.reserved_eqs/4 + 1;
}
static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
@@ -339,6 +852,18 @@ static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
return priv->eq_table.uar_map[index] + 0x800 + 8 * (eq->eqn % 4);
}
+static void mlx4_unmap_uar(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i;
+
+ for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
+ if (priv->eq_table.uar_map[i]) {
+ iounmap(priv->eq_table.uar_map[i]);
+ priv->eq_table.uar_map[i] = NULL;
+ }
+}
+
static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
u8 intr, struct mlx4_eq *eq)
{
@@ -354,7 +879,8 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
eq->dev = dev;
eq->nent = roundup_pow_of_two(max(nent, 2));
- npages = PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE;
+ /* CX3 is capable of extending the CQE\EQE from 32 to 64 bytes */
+ npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE;
eq->page_list = kmalloc(npages * sizeof *eq->page_list,
GFP_KERNEL);
@@ -456,8 +982,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox;
int err;
- int npages = PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE;
int i;
+ /* CX3 is capable of extending the CQE\EQE from 32 to 64 bytes */
+ int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
@@ -471,16 +998,16 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
mlx4_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn);
for (i = 0; i < sizeof (struct mlx4_eq_context) / 4; ++i) {
if (i % 4 == 0)
- printk("[%02x] ", i * 4);
- printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
+ pr_cont("[%02x] ", i * 4);
+ pr_cont(" %08x", be32_to_cpup(mailbox->buf + i * 4));
if ((i + 1) % 4 == 0)
- printk("\n");
+ pr_cont("\n");
}
}
mlx4_mtt_cleanup(dev, &eq->mtt);
for (i = 0; i < npages; ++i)
- pci_free_consistent(dev->pdev, PAGE_SIZE,
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
eq->page_list[i].buf,
eq->page_list[i].map);
@@ -492,16 +1019,32 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
static void mlx4_free_irqs(struct mlx4_dev *dev)
{
struct mlx4_eq_table *eq_table = &mlx4_priv(dev)->eq_table;
- int i;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i, vec;
if (eq_table->have_irq)
free_irq(dev->pdev->irq, dev);
+
for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
if (eq_table->eq[i].have_irq) {
free_irq(eq_table->eq[i].irq, eq_table->eq + i);
eq_table->eq[i].have_irq = 0;
}
+ for (i = 0; i < dev->caps.comp_pool; i++) {
+ /*
+ * Freeing the assigned irq's
+ * all bits should be 0, but we need to validate
+ */
+ if (priv->msix_ctl.pool_bm & 1ULL << i) {
+ /* NO need protecting*/
+ vec = dev->caps.num_comp_vectors + 1 + i;
+ free_irq(priv->eq_table.eq[vec].irq,
+ &priv->eq_table.eq[vec]);
+ }
+ }
+
+
kfree(eq_table->irq_names);
}
@@ -549,8 +1092,9 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
int err;
int i;
- priv->eq_table.uar_map = kcalloc(sizeof *priv->eq_table.uar_map,
- mlx4_num_eq_uar(dev), GFP_KERNEL);
+ priv->eq_table.uar_map = kcalloc(mlx4_num_eq_uar(dev),
+ sizeof *priv->eq_table.uar_map,
+ GFP_KERNEL);
if (!priv->eq_table.uar_map) {
err = -ENOMEM;
goto err_out_free;
@@ -564,23 +1108,30 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
priv->eq_table.uar_map[i] = NULL;
- err = mlx4_map_clr_int(dev);
- if (err)
- goto err_out_bitmap;
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_map_clr_int(dev);
+ if (err)
+ goto err_out_bitmap;
- priv->eq_table.clr_mask =
- swab32(1 << (priv->eq_table.inta_pin & 31));
- priv->eq_table.clr_int = priv->clr_base +
- (priv->eq_table.inta_pin < 32 ? 4 : 0);
+ priv->eq_table.clr_mask =
+ swab32(1 << (priv->eq_table.inta_pin & 31));
+ priv->eq_table.clr_int = priv->clr_base +
+ (priv->eq_table.inta_pin < 32 ? 4 : 0);
+ }
- priv->eq_table.irq_names = kmalloc(16 * dev->caps.num_comp_vectors, GFP_KERNEL);
+ priv->eq_table.irq_names =
+ kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1 +
+ dev->caps.comp_pool),
+ GFP_KERNEL);
if (!priv->eq_table.irq_names) {
err = -ENOMEM;
- goto err_out_bitmap;
+ goto err_out_clr_int;
}
for (i = 0; i < dev->caps.num_comp_vectors; ++i) {
- err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE,
+ err = mlx4_create_eq(dev, dev->caps.num_cqs -
+ dev->caps.reserved_cqs +
+ MLX4_NUM_SPARE_EQE,
(dev->flags & MLX4_FLAG_MSI_X) ? i : 0,
&priv->eq_table.eq[i]);
if (err) {
@@ -595,18 +1146,42 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
if (err)
goto err_out_comp;
+ /*if additional completion vectors poolsize is 0 this loop will not run*/
+ for (i = dev->caps.num_comp_vectors + 1;
+ i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i) {
+
+ err = mlx4_create_eq(dev, dev->caps.num_cqs -
+ dev->caps.reserved_cqs +
+ MLX4_NUM_SPARE_EQE,
+ (dev->flags & MLX4_FLAG_MSI_X) ? i : 0,
+ &priv->eq_table.eq[i]);
+ if (err) {
+ --i;
+ goto err_out_unmap;
+ }
+ }
+
+
if (dev->flags & MLX4_FLAG_MSI_X) {
- static const char async_eq_name[] = DRV_NAME "(async)";
const char *eq_name;
for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) {
if (i < dev->caps.num_comp_vectors) {
- snprintf(priv->eq_table.irq_names + i * 16, 16,
- "eth-mlx4-%d", i);
- eq_name = priv->eq_table.irq_names + i * 16;
- } else
- eq_name = async_eq_name;
+ snprintf(priv->eq_table.irq_names +
+ i * MLX4_IRQNAME_SIZE,
+ MLX4_IRQNAME_SIZE,
+ "mlx4-comp-%d@pci:%s", i,
+ pci_name(dev->pdev));
+ } else {
+ snprintf(priv->eq_table.irq_names +
+ i * MLX4_IRQNAME_SIZE,
+ MLX4_IRQNAME_SIZE,
+ "mlx4-async@pci:%s",
+ pci_name(dev->pdev));
+ }
+ eq_name = priv->eq_table.irq_names +
+ i * MLX4_IRQNAME_SIZE;
err = request_irq(priv->eq_table.eq[i].irq,
mlx4_msi_x_interrupt, 0, eq_name,
priv->eq_table.eq + i);
@@ -616,15 +1191,19 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
priv->eq_table.eq[i].have_irq = 1;
}
} else {
+ snprintf(priv->eq_table.irq_names,
+ MLX4_IRQNAME_SIZE,
+ DRV_NAME "@pci:%s",
+ pci_name(dev->pdev));
err = request_irq(dev->pdev->irq, mlx4_interrupt,
- IRQF_SHARED, DRV_NAME, dev);
+ IRQF_SHARED, priv->eq_table.irq_names, dev);
if (err)
goto err_out_async;
priv->eq_table.have_irq = 1;
}
- err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
+ err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
if (err)
mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
@@ -646,10 +1225,14 @@ err_out_unmap:
mlx4_free_eq(dev, &priv->eq_table.eq[i]);
--i;
}
- mlx4_unmap_clr_int(dev);
mlx4_free_irqs(dev);
+err_out_clr_int:
+ if (!mlx4_is_slave(dev))
+ mlx4_unmap_clr_int(dev);
+
err_out_bitmap:
+ mlx4_unmap_uar(dev);
mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
err_out_free:
@@ -663,20 +1246,18 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
int i;
- mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1,
+ mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 1,
priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
mlx4_free_irqs(dev);
- for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
+ for (i = 0; i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i)
mlx4_free_eq(dev, &priv->eq_table.eq[i]);
- mlx4_unmap_clr_int(dev);
-
- for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
- if (priv->eq_table.uar_map[i])
- iounmap(priv->eq_table.uar_map[i]);
+ if (!mlx4_is_slave(dev))
+ mlx4_unmap_clr_int(dev);
+ mlx4_unmap_uar(dev);
mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
kfree(priv->eq_table.uar_map);
@@ -694,7 +1275,7 @@ int mlx4_test_interrupts(struct mlx4_dev *dev)
err = mlx4_NOP(dev);
/* When not in MSI_X, there is only one irq to check */
- if (!(dev->flags & MLX4_FLAG_MSI_X))
+ if (!(dev->flags & MLX4_FLAG_MSI_X) || mlx4_is_slave(dev))
return err;
/* A loop over all completion vectors, for each vector we will check
@@ -705,8 +1286,8 @@ int mlx4_test_interrupts(struct mlx4_dev *dev)
/* Temporary use polling for command completions */
mlx4_cmd_use_polling(dev);
- /* Map the new eq to handle all asynchronous events */
- err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
+ /* Map the new eq to handle all asyncronous events */
+ err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
priv->eq_table.eq[i].eqn);
if (err) {
mlx4_warn(dev, "Failed mapping eq for interrupt test\n");
@@ -720,8 +1301,70 @@ int mlx4_test_interrupts(struct mlx4_dev *dev)
}
/* Return to default */
- mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
+ mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
return err;
}
EXPORT_SYMBOL(mlx4_test_interrupts);
+
+int mlx4_assign_eq(struct mlx4_dev *dev, char *name, int *vector)
+{
+
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int vec = 0, err = 0, i;
+
+ mutex_lock(&priv->msix_ctl.pool_lock);
+ for (i = 0; !vec && i < dev->caps.comp_pool; i++) {
+ if (~priv->msix_ctl.pool_bm & 1ULL << i) {
+ priv->msix_ctl.pool_bm |= 1ULL << i;
+ vec = dev->caps.num_comp_vectors + 1 + i;
+ snprintf(priv->eq_table.irq_names +
+ vec * MLX4_IRQNAME_SIZE,
+ MLX4_IRQNAME_SIZE, "%s", name);
+ err = request_irq(priv->eq_table.eq[vec].irq,
+ mlx4_msi_x_interrupt, 0,
+ &priv->eq_table.irq_names[vec<<5],
+ priv->eq_table.eq + vec);
+ if (err) {
+ /*zero out bit by fliping it*/
+ priv->msix_ctl.pool_bm ^= 1 << i;
+ vec = 0;
+ continue;
+ /*we dont want to break here*/
+ }
+ eq_set_ci(&priv->eq_table.eq[vec], 1);
+ }
+ }
+ mutex_unlock(&priv->msix_ctl.pool_lock);
+
+ if (vec) {
+ *vector = vec;
+ } else {
+ *vector = 0;
+ err = (i == dev->caps.comp_pool) ? -ENOSPC : err;
+ }
+ return err;
+}
+EXPORT_SYMBOL(mlx4_assign_eq);
+
+void mlx4_release_eq(struct mlx4_dev *dev, int vec)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ /*bm index*/
+ int i = vec - dev->caps.num_comp_vectors - 1;
+
+ if (likely(i >= 0)) {
+ /*sanity check , making sure were not trying to free irq's
+ Belonging to a legacy EQ*/
+ mutex_lock(&priv->msix_ctl.pool_lock);
+ if (priv->msix_ctl.pool_bm & 1ULL << i) {
+ free_irq(priv->eq_table.eq[vec].irq,
+ &priv->eq_table.eq[vec]);
+ priv->msix_ctl.pool_bm &= ~(1ULL << i);
+ }
+ mutex_unlock(&priv->msix_ctl.pool_lock);
+ }
+
+}
+EXPORT_SYMBOL(mlx4_release_eq);
+
diff --git a/sys/ofed/drivers/net/mlx4/fw.c b/sys/ofed/drivers/net/mlx4/fw.c
index d27db38..bafae00 100644
--- a/sys/ofed/drivers/net/mlx4/fw.c
+++ b/sys/ofed/drivers/net/mlx4/fw.c
@@ -33,6 +33,7 @@
*/
#include <linux/mlx4/cmd.h>
+#include <linux/module.h>
#include "fw.h"
#include "icm.h"
@@ -46,14 +47,10 @@ enum {
extern void __buggy_use_of_MLX4_GET(void);
extern void __buggy_use_of_MLX4_PUT(void);
-static int enable_qos;
+static bool enable_qos;
module_param(enable_qos, bool, 0444);
MODULE_PARM_DESC(enable_qos, "Enable Quality of Service support in the HCA (default: off)");
-static int mlx4_pre_t11_mode = 0;
-module_param_named(enable_pre_t11_mode, mlx4_pre_t11_mode, int, 0644);
-MODULE_PARM_DESC(enable_pre_t11_mode, "For FCoXX, enable pre-t11 mode if non-zero (default: 0)");
-
#define MLX4_GET(dest, source, offset) \
do { \
void *__p = (char *) (source) + (offset); \
@@ -93,6 +90,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags)
[ 9] = "Q_Key violation counter",
[10] = "VMM",
[12] = "DPDP",
+ [15] = "Big LSO headers",
[16] = "MW support",
[17] = "APM support",
[18] = "Atomic ops support",
@@ -102,8 +100,17 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags)
[24] = "Demand paging support",
[25] = "Router support",
[30] = "IBoE support",
- [48] = "Basic counters support",
- [49] = "Extended counters support",
+ [32] = "Unicast loopback support",
+ [34] = "FCS header control",
+ [38] = "Wake On LAN support",
+ [40] = "UDP RSS support",
+ [41] = "Unicast VEP steering support",
+ [42] = "Multicast VEP steering support",
+ [48] = "Counters support",
+ [59] = "Port management change event support",
+ [60] = "eSwitch support",
+ [61] = "64 byte EQE support",
+ [62] = "64 byte CQE support",
};
int i;
@@ -113,6 +120,21 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags)
mlx4_dbg(dev, " %s\n", fname[i]);
}
+static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
+{
+ static const char * const fname[] = {
+ [0] = "RSS support",
+ [1] = "RSS Toeplitz Hash Function support",
+ [2] = "RSS XOR Hash Function support",
+ [3] = "Device manage flow steering support"
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(fname); ++i)
+ if (fname[i] && (flags & (1LL << i)))
+ mlx4_dbg(dev, " %s\n", fname[i]);
+}
+
int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg)
{
struct mlx4_cmd_mailbox *mailbox;
@@ -135,9 +157,244 @@ int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg)
MLX4_PUT(inbox, cfg->log_pg_sz_m, MOD_STAT_CFG_PG_SZ_M_OFFSET);
err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_MOD_STAT_CFG,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u8 field;
+ u32 size;
+ int err = 0;
+
+#define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0
+#define QUERY_FUNC_CAP_NUM_PORTS_OFFSET 0x1
+#define QUERY_FUNC_CAP_PF_BHVR_OFFSET 0x4
+#define QUERY_FUNC_CAP_FMR_OFFSET 0x8
+#define QUERY_FUNC_CAP_QP_QUOTA_OFFSET 0x10
+#define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET 0x14
+#define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET 0x18
+#define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET 0x20
+#define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x24
+#define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x28
+#define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c
+#define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0x30
+
+#define QUERY_FUNC_CAP_FMR_FLAG 0x80
+#define QUERY_FUNC_CAP_FLAG_RDMA 0x40
+#define QUERY_FUNC_CAP_FLAG_ETH 0x80
+
+/* when opcode modifier = 1 */
+#define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3
+#define QUERY_FUNC_CAP_RDMA_PROPS_OFFSET 0x8
+#define QUERY_FUNC_CAP_ETH_PROPS_OFFSET 0xc
+
+#define QUERY_FUNC_CAP_QP0_TUNNEL 0x10
+#define QUERY_FUNC_CAP_QP0_PROXY 0x14
+#define QUERY_FUNC_CAP_QP1_TUNNEL 0x18
+#define QUERY_FUNC_CAP_QP1_PROXY 0x1c
+
+#define QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC 0x40
+#define QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN 0x80
+
+#define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80
+
+ if (vhcr->op_modifier == 1) {
+ field = 0;
+ /* ensure force vlan and force mac bits are not set */
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_ETH_PROPS_OFFSET);
+ /* ensure that phy_wqe_gid bit is not set */
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET);
+
+ field = vhcr->in_modifier; /* phys-port = logical-port */
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
+
+ /* size is now the QP number */
+ size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL);
+
+ size += 2;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL);
+
+ size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY);
+
+ size += 2;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY);
+
+ } else if (vhcr->op_modifier == 0) {
+ /* enable rdma and ethernet interfaces */
+ field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA);
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS_OFFSET);
+
+ field = dev->caps.num_ports;
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
+
+ size = dev->caps.function_caps; /* set PF behaviours */
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
+
+ field = 0; /* protected FMR support not available as yet */
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FMR_OFFSET);
+
+ size = priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[slave];
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET);
+
+ size = priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[slave];
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET);
+
+ size = priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[slave];
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET);
+
+ size = dev->caps.num_eqs;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
+
+ size = dev->caps.reserved_eqs;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
+
+ size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave];
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
+
+ size = priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[slave];
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET);
+
+ size = dev->caps.num_mgms + dev->caps.num_amgms;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET);
+
+ } else
+ err = -EINVAL;
+
+ return err;
+}
+
+int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
+ struct mlx4_func_cap *func_cap)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *outbox;
+ u8 field, op_modifier;
+ u32 size;
+ int err = 0;
+
+ op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier,
+ MLX4_CMD_QUERY_FUNC_CAP,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ goto out;
+
+ outbox = mailbox->buf;
+
+ if (!op_modifier) {
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET);
+ if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) {
+ mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n");
+ err = -EPROTONOSUPPORT;
+ goto out;
+ }
+ func_cap->flags = field;
+
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
+ func_cap->num_ports = field;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
+ func_cap->pf_context_behaviour = size;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET);
+ func_cap->qp_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET);
+ func_cap->srq_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET);
+ func_cap->cq_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
+ func_cap->max_eq = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
+ func_cap->reserved_eq = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
+ func_cap->mpt_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET);
+ func_cap->mtt_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET);
+ func_cap->mcg_quota = size & 0xFFFFFF;
+ goto out;
+ }
+ /* logical port query */
+ if (gen_or_port > dev->caps.num_ports) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) {
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET);
+ if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) {
+ mlx4_err(dev, "VLAN is enforced on this port\n");
+ err = -EPROTONOSUPPORT;
+ goto out;
+ }
+
+ if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) {
+ mlx4_err(dev, "Force mac is enabled on this port\n");
+ err = -EPROTONOSUPPORT;
+ goto out;
+ }
+ } else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) {
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET);
+ if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) {
+ mlx4_err(dev, "phy_wqe_gid is "
+ "enforced on this ib port\n");
+ err = -EPROTONOSUPPORT;
+ goto out;
+ }
+ }
+
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
+ func_cap->physical_port = field;
+ if (func_cap->physical_port != gen_or_port) {
+ err = -ENOSYS;
+ goto out;
+ }
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL);
+ func_cap->qp0_tunnel_qpn = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY);
+ func_cap->qp0_proxy_qpn = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL);
+ func_cap->qp1_tunnel_qpn = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY);
+ func_cap->qp1_proxy_qpn = size & 0xFFFFFF;
+
+ /* All other resources are allocated by the master, but we still report
+ * 'num' and 'reserved' capabilities as follows:
+ * - num remains the maximum resource index
+ * - 'num - reserved' is the total available objects of a resource, but
+ * resource indices may be less than 'reserved'
+ * TODO: set per-resource quotas */
+
+out:
mlx4_free_cmd_mailbox(dev, mailbox);
+
return err;
}
@@ -146,14 +403,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
struct mlx4_cmd_mailbox *mailbox;
u32 *outbox;
u8 field;
- u32 field32;
+ u32 field32, flags, ext_flags;
u16 size;
u16 stat_rate;
int err;
int i;
- u32 in_modifier;
- u64 out_param;
- u32 tmp1, tmp2;
#define QUERY_DEV_CAP_OUT_SIZE 0x100
#define QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET 0x10
@@ -178,8 +432,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29
#define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b
#define QUERY_DEV_CAP_MAX_GSO_OFFSET 0x2d
+#define QUERY_DEV_CAP_RSS_OFFSET 0x2e
#define QUERY_DEV_CAP_MAX_RDMA_OFFSET 0x2f
-#define QUERY_DEV_CAP_STAT_CFG_INL_OFFSET 0x31
#define QUERY_DEV_CAP_RSZ_SRQ_OFFSET 0x33
#define QUERY_DEV_CAP_ACK_DELAY_OFFSET 0x35
#define QUERY_DEV_CAP_MTU_WIDTH_OFFSET 0x36
@@ -187,10 +441,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET 0x38
#define QUERY_DEV_CAP_MAX_GID_OFFSET 0x3b
#define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET 0x3c
+#define QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET 0x3e
#define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f
#define QUERY_DEV_CAP_EXT_FLAGS_OFFSET 0x40
-#define QUERY_DEV_CAP_UDP_RSS_OFFSET 0x42
-#define QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET 0x43
+#define QUERY_DEV_CAP_SYNC_QP_OFFSET 0x42
#define QUERY_DEV_CAP_FLAGS_OFFSET 0x44
#define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48
#define QUERY_DEV_CAP_UAR_SZ_OFFSET 0x49
@@ -210,6 +464,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_DEV_CAP_MAX_PD_OFFSET 0x65
#define QUERY_DEV_CAP_RSVD_XRC_OFFSET 0x66
#define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67
+#define QUERY_DEV_CAP_MAX_BASIC_COUNTERS_OFFSET 0x68
+#define QUERY_DEV_CAP_MAX_EXTENDED_COUNTERS_OFFSET 0x6c
+#define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76
+#define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77
#define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80
#define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82
#define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84
@@ -223,16 +481,15 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x94
#define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98
#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0
-#define QUERY_DEV_CAP_MAX_BASIC_CNT_OFFSET 0x68
-#define QUERY_DEV_CAP_MAX_EXT_CNT_OFFSET 0x6c
+ dev_cap->flags2 = 0;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
outbox = mailbox->buf;
err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
if (err)
goto out;
@@ -253,7 +510,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET);
dev_cap->max_mpts = 1 << (field & 0x3f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET);
- dev_cap->reserved_eqs = 1 << (field & 0xf);
+ dev_cap->reserved_eqs = field & 0xf;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET);
dev_cap->max_eqs = 1 << (field & 0xf);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET);
@@ -275,6 +532,17 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
else
dev_cap->max_gso_sz = 1 << field;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_RSS_OFFSET);
+ if (field & 0x20)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS_XOR;
+ if (field & 0x10)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS_TOP;
+ field &= 0xf;
+ if (field) {
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS;
+ dev_cap->max_rss_tbl_sz = 1 << field;
+ } else
+ dev_cap->max_rss_tbl_sz = 0;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RDMA_OFFSET);
dev_cap->max_rdma_global = 1 << (field & 0x3f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET);
@@ -283,16 +551,21 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev_cap->num_ports = field & 0xf;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET);
dev_cap->max_msg_sz = 1 << (field & 0x1f);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET);
+ if (field & 0x80)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN;
+ dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET);
+ dev_cap->fs_max_num_qp_per_entry = field;
MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET);
dev_cap->stat_rate_support = stat_rate;
- MLX4_GET(field, outbox, QUERY_DEV_CAP_UDP_RSS_OFFSET);
- dev_cap->udp_rss = field & 0x1;
- MLX4_GET(field, outbox, QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET);
- dev_cap->loopback_support = field & 0x1;
- dev_cap->wol = field & 0x40;
- MLX4_GET(tmp1, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
- MLX4_GET(tmp2, outbox, QUERY_DEV_CAP_FLAGS_OFFSET);
- dev_cap->flags = tmp2 | (u64)tmp1 << 32;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET);
+ dev_cap->timestamp_support = field & 0x80;
+ MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+ MLX4_GET(flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET);
+ dev_cap->flags = flags | (u64)ext_flags << 32;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_SYNC_QP_OFFSET);
+ dev_cap->sync_qp = field & 0x10;
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET);
dev_cap->reserved_uars = field >> 4;
MLX4_GET(field, outbox, QUERY_DEV_CAP_UAR_SZ_OFFSET);
@@ -305,10 +578,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET);
dev_cap->bf_reg_size = 1 << (field & 0x1f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET);
- if ((1 << (field & 0x3f)) > (PAGE_SIZE / dev_cap->bf_reg_size)) {
- mlx4_dbg(dev, "log blue flame is invalid (%d), forcing 3\n", field & 0x1f);
+ if ((1 << (field & 0x3f)) > (PAGE_SIZE / dev_cap->bf_reg_size))
field = 3;
- }
dev_cap->bf_regs_per_page = 1 << (field & 0x3f);
mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n",
dev_cap->bf_reg_size, dev_cap->bf_regs_per_page);
@@ -332,7 +603,6 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev_cap->reserved_pds = field >> 4;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET);
dev_cap->max_pds = 1 << (field & 0x3f);
-
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_XRC_OFFSET);
dev_cap->reserved_xrcds = field >> 4;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_XRC_OFFSET);
@@ -363,8 +633,6 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev_cap->max_srq_sz = 1 << field;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_SZ_OFFSET);
dev_cap->max_qp_sz = 1 << field;
- MLX4_GET(field, outbox, QUERY_DEV_CAP_STAT_CFG_INL_OFFSET);
- dev_cap->inline_cfg = field & 1;
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSZ_SRQ_OFFSET);
dev_cap->resize_srq = field & 1;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_RQ_OFFSET);
@@ -378,10 +646,14 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
QUERY_DEV_CAP_RSVD_LKEY_OFFSET);
MLX4_GET(dev_cap->max_icm_sz, outbox,
QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET);
- MLX4_GET(dev_cap->max_basic_counters, outbox,
- QUERY_DEV_CAP_MAX_BASIC_CNT_OFFSET);
- MLX4_GET(dev_cap->max_ext_counters, outbox,
- QUERY_DEV_CAP_MAX_EXT_CNT_OFFSET);
+ if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS)
+ MLX4_GET(dev_cap->max_basic_counters, outbox,
+ QUERY_DEV_CAP_MAX_BASIC_COUNTERS_OFFSET);
+ /* FW reports 256 however real value is 255 */
+ dev_cap->max_basic_counters = min_t(u32, dev_cap->max_basic_counters, 255);
+ if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT)
+ MLX4_GET(dev_cap->max_extended_counters, outbox,
+ QUERY_DEV_CAP_MAX_EXTENDED_COUNTERS_OFFSET);
if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
for (i = 1; i <= dev_cap->num_ports; ++i) {
@@ -408,19 +680,16 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_PORT_WAVELENGTH_OFFSET 0x1c
#define QUERY_PORT_TRANS_CODE_OFFSET 0x20
-#define STAT_CFG_PORT_MODE (1 << 28)
-#define STAT_CFG_PORT_OFFSET 0x8
-#define STAT_CFG_PORT_MASK (1 << 20)
-#define STAT_CFG_MOD_INLINE 0x3
-
for (i = 1; i <= dev_cap->num_ports; ++i) {
err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 0, MLX4_CMD_QUERY_PORT,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
if (err)
goto out;
MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET);
dev_cap->supported_port_types[i] = field & 3;
+ dev_cap->suggested_type[i] = (field >> 3) & 1;
+ dev_cap->default_sense[i] = (field >> 4) & 1;
MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET);
dev_cap->ib_mtu[i] = field & 0xf;
MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET);
@@ -440,20 +709,6 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev_cap->vendor_oui[i] = field32 & 0xffffff;
MLX4_GET(dev_cap->wavelength[i], outbox, QUERY_PORT_WAVELENGTH_OFFSET);
MLX4_GET(dev_cap->trans_code[i], outbox, QUERY_PORT_TRANS_CODE_OFFSET);
-
- /* Query stat cfg for port enablement */
- if (dev_cap->inline_cfg) {
- in_modifier = STAT_CFG_PORT_MODE | i << 8 |
- STAT_CFG_PORT_OFFSET;
- err = mlx4_cmd_imm(dev, 0, &out_param,
- in_modifier,
- STAT_CFG_MOD_INLINE,
- MLX4_CMD_MOD_STAT_CFG,
- MLX4_CMD_TIME_CLASS_B);
- if (!err)
- if (!(out_param & STAT_CFG_PORT_MASK))
- dev_cap->supported_port_types[i] = 0;
- }
}
}
@@ -494,14 +749,134 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
mlx4_dbg(dev, "Max RQ desc size: %d, max RQ S/G: %d\n",
dev_cap->max_rq_desc_sz, dev_cap->max_rq_sg);
mlx4_dbg(dev, "Max GSO size: %d\n", dev_cap->max_gso_sz);
+ mlx4_dbg(dev, "Max basic counters: %d\n", dev_cap->max_basic_counters);
+ mlx4_dbg(dev, "Max extended counters: %d\n", dev_cap->max_extended_counters);
+ mlx4_dbg(dev, "Max RSS Table size: %d\n", dev_cap->max_rss_tbl_sz);
dump_dev_cap_flags(dev, dev_cap->flags);
+ dump_dev_cap_flags2(dev, dev_cap->flags2);
out:
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
+int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ u64 flags;
+ int err = 0;
+ u8 field;
+
+ err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+
+ /* add port mng change event capability unconditionally to slaves */
+ MLX4_GET(flags, outbox->buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+ flags |= MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV;
+ MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+
+ /* For guests, report Blueflame disabled */
+ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET);
+ field &= 0x7f;
+ MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET);
+
+ return 0;
+}
+
+int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u64 def_mac;
+ u8 port_type;
+ u16 short_field;
+ int err;
+
+#define MLX4_VF_PORT_NO_LINK_SENSE_MASK 0xE0
+#define QUERY_PORT_CUR_MAX_PKEY_OFFSET 0x0c
+#define QUERY_PORT_CUR_MAX_GID_OFFSET 0x0e
+
+ err = mlx4_cmd_box(dev, 0, outbox->dma, vhcr->in_modifier, 0,
+ MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+
+ if (!err && dev->caps.function != slave) {
+ /* set slave default_mac address */
+ MLX4_GET(def_mac, outbox->buf, QUERY_PORT_MAC_OFFSET);
+ def_mac += slave << 8;
+ /* if config MAC in DB use it */
+ if (priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac)
+ def_mac = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac;
+ MLX4_PUT(outbox->buf, def_mac, QUERY_PORT_MAC_OFFSET);
+
+ /* get port type - currently only eth is enabled */
+ MLX4_GET(port_type, outbox->buf,
+ QUERY_PORT_SUPPORTED_TYPE_OFFSET);
+
+ /* No link sensing allowed */
+ port_type &= MLX4_VF_PORT_NO_LINK_SENSE_MASK;
+ /* set port type to currently operating port type */
+ port_type |= (dev->caps.port_type[vhcr->in_modifier] & 0x3);
+
+ MLX4_PUT(outbox->buf, port_type,
+ QUERY_PORT_SUPPORTED_TYPE_OFFSET);
+
+ if (dev->caps.port_type[vhcr->in_modifier] == MLX4_PORT_TYPE_ETH)
+ short_field = mlx4_get_slave_num_gids(dev, slave);
+ else
+ short_field = 1; /* slave max gids */
+ MLX4_PUT(outbox->buf, short_field,
+ QUERY_PORT_CUR_MAX_GID_OFFSET);
+
+ short_field = dev->caps.pkey_table_len[vhcr->in_modifier];
+ MLX4_PUT(outbox->buf, short_field,
+ QUERY_PORT_CUR_MAX_PKEY_OFFSET);
+ }
+
+ return err;
+}
+
+int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port,
+ int *gid_tbl_len, int *pkey_tbl_len)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *outbox;
+ u16 field;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, port, 0,
+ MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ goto out;
+
+ outbox = mailbox->buf;
+
+ MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_GID_OFFSET);
+ *gid_tbl_len = field;
+
+ MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_PKEY_OFFSET);
+ *pkey_tbl_len = field;
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_get_slave_pkey_gid_tbl_len);
+
int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt)
{
struct mlx4_cmd_mailbox *mailbox;
@@ -551,7 +926,8 @@ int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt)
if (++nent == MLX4_MAILBOX_SIZE / 16) {
err = mlx4_cmd(dev, mailbox->dma, nent, 0, op,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
if (err)
goto out;
nent = 0;
@@ -560,7 +936,8 @@ int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt)
}
if (nent)
- err = mlx4_cmd(dev, mailbox->dma, nent, 0, op, MLX4_CMD_TIME_CLASS_B);
+ err = mlx4_cmd(dev, mailbox->dma, nent, 0, op,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
if (err)
goto out;
@@ -589,13 +966,15 @@ int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm)
int mlx4_UNMAP_FA(struct mlx4_dev *dev)
{
- return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_FA, MLX4_CMD_TIME_CLASS_B);
+ return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_FA,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
}
int mlx4_RUN_FW(struct mlx4_dev *dev)
{
- return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_RUN_FW, MLX4_CMD_TIME_CLASS_A);
+ return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_RUN_FW,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
}
int mlx4_QUERY_FW(struct mlx4_dev *dev)
@@ -611,7 +990,7 @@ int mlx4_QUERY_FW(struct mlx4_dev *dev)
#define QUERY_FW_OUT_SIZE 0x100
#define QUERY_FW_VER_OFFSET 0x00
-#define MC_PROMISC_VER 0x2000702bcull
+#define QUERY_FW_PPF_ID 0x09
#define QUERY_FW_CMD_IF_REV_OFFSET 0x0a
#define QUERY_FW_MAX_CMD_OFFSET 0x0f
#define QUERY_FW_ERR_START_OFFSET 0x30
@@ -622,13 +1001,19 @@ int mlx4_QUERY_FW(struct mlx4_dev *dev)
#define QUERY_FW_CLR_INT_BASE_OFFSET 0x20
#define QUERY_FW_CLR_INT_BAR_OFFSET 0x28
+#define QUERY_FW_COMM_BASE_OFFSET 0x40
+#define QUERY_FW_COMM_BAR_OFFSET 0x48
+
+#define QUERY_FW_CLOCK_OFFSET 0x50
+#define QUERY_FW_CLOCK_BAR 0x58
+
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
outbox = mailbox->buf;
err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FW,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
if (err)
goto out;
@@ -640,10 +1025,13 @@ int mlx4_QUERY_FW(struct mlx4_dev *dev)
dev->caps.fw_ver = (fw_ver & 0xffff00000000ull) |
((fw_ver & 0xffff0000ull) >> 16) |
((fw_ver & 0x0000ffffull) << 16);
- if (dev->caps.fw_ver < MC_PROMISC_VER)
- dev->caps.mc_promisc_mode = 2;
- else
- dev->caps.mc_promisc_mode = 1;
+
+ MLX4_GET(lg, outbox, QUERY_FW_PPF_ID);
+ dev->caps.function = lg;
+
+ if (mlx4_is_slave(dev))
+ goto out;
+
MLX4_GET(cmd_if_rev, outbox, QUERY_FW_CMD_IF_REV_OFFSET);
if (cmd_if_rev < MLX4_COMMAND_INTERFACE_MIN_REV ||
@@ -686,8 +1074,19 @@ int mlx4_QUERY_FW(struct mlx4_dev *dev)
MLX4_GET(fw->clr_int_bar, outbox, QUERY_FW_CLR_INT_BAR_OFFSET);
fw->clr_int_bar = (fw->clr_int_bar >> 6) * 2;
+ MLX4_GET(fw->comm_base, outbox, QUERY_FW_COMM_BASE_OFFSET);
+ MLX4_GET(fw->comm_bar, outbox, QUERY_FW_COMM_BAR_OFFSET);
+ fw->comm_bar = (fw->comm_bar >> 6) * 2;
+ mlx4_dbg(dev, "Communication vector bar:%d offset:0x%llx\n",
+ fw->comm_bar, fw->comm_base);
mlx4_dbg(dev, "FW size %d KB\n", fw->fw_pages >> 2);
+ MLX4_GET(fw->clock_offset, outbox, QUERY_FW_CLOCK_OFFSET);
+ MLX4_GET(fw->clock_bar, outbox, QUERY_FW_CLOCK_BAR);
+ fw->clock_bar = (fw->clock_bar >> 6) * 2;
+ mlx4_dbg(dev, "Internal clock bar:%d offset:0x%llx\n",
+ fw->comm_bar, fw->comm_base);
+
/*
* Round up number of system pages needed in case
* MLX4_ICM_PAGE_SIZE < PAGE_SIZE.
@@ -704,6 +1103,30 @@ out:
return err;
}
+int mlx4_QUERY_FW_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ u8 *outbuf;
+ int err;
+
+ outbuf = outbox->buf;
+ err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_FW,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+
+ /* for slaves, set pci PPF ID to invalid and zero out everything
+ * else except FW version */
+ outbuf[0] = outbuf[1] = 0;
+ memset(&outbuf[8], 0, QUERY_FW_OUT_SIZE - 8);
+ outbuf[QUERY_FW_PPF_ID] = MLX4_INVALID_SLAVE_ID;
+
+ return 0;
+}
+
static void get_board_id(void *vsd, char *board_id)
{
int i;
@@ -748,7 +1171,7 @@ int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter)
outbox = mailbox->buf;
err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_ADAPTER,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
if (err)
goto out;
@@ -772,7 +1195,6 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
#define INIT_HCA_VERSION_OFFSET 0x000
#define INIT_HCA_VERSION 2
#define INIT_HCA_CACHELINE_SZ_OFFSET 0x0e
-#define INIT_HCA_X86_64_BYTE_CACHELINE_SZ 0x40
#define INIT_HCA_FLAGS_OFFSET 0x014
#define INIT_HCA_QPC_OFFSET 0x020
#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10)
@@ -781,6 +1203,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
#define INIT_HCA_LOG_SRQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x2f)
#define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30)
#define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37)
+#define INIT_HCA_EQE_CQE_OFFSETS (INIT_HCA_QPC_OFFSET + 0x38)
#define INIT_HCA_ALTC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40)
#define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50)
#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60)
@@ -791,7 +1214,17 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
#define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00)
#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12)
#define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16)
+#define INIT_HCA_UC_STEERING_OFFSET (INIT_HCA_MCAST_OFFSET + 0x18)
#define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b)
+#define INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN 0x6
+#define INIT_HCA_FS_PARAM_OFFSET 0x1d0
+#define INIT_HCA_FS_BASE_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x00)
+#define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x12)
+#define INIT_HCA_FS_LOG_TABLE_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x1b)
+#define INIT_HCA_FS_ETH_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x21)
+#define INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x22)
+#define INIT_HCA_FS_IB_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x25)
+#define INIT_HCA_FS_IB_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x26)
#define INIT_HCA_TPT_OFFSET 0x0f0
#define INIT_HCA_DMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00)
#define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b)
@@ -809,9 +1242,9 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
memset(inbox, 0, INIT_HCA_IN_SIZE);
*((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION;
-#if defined(__x86_64__) || defined(__PPC64__)
- *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) = INIT_HCA_X86_64_BYTE_CACHELINE_SZ;
-#endif
+
+ *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) =
+ ((ilog2(CACHE_LINE_SIZE) - 4) << 5) | (1 << 4);
#if defined(__LITTLE_ENDIAN)
*(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
@@ -831,9 +1264,31 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
if (enable_qos)
*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 2);
- /* counters mode */
- *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |=
- cpu_to_be32(dev->caps.counters_mode << 4);
+ /* Enable fast drop performance optimization */
+ if (dev->caps.fast_drop)
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 7);
+
+ /* enable counters */
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4);
+
+ /* CX3 is capable of extending CQEs\EQEs from 32 to 64 bytes */
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) {
+ *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29);
+ dev->caps.eqe_size = 64;
+ dev->caps.eqe_factor = 1;
+ } else {
+ dev->caps.eqe_size = 32;
+ dev->caps.eqe_factor = 0;
+ }
+
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) {
+ *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30);
+ dev->caps.cqe_size = 64;
+ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+ } else {
+ dev->caps.cqe_size = 32;
+ }
/* QPC/EEC/CQC/EQC/RDMARC attributes */
@@ -850,12 +1305,45 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET);
MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET);
- /* multicast attributes */
-
- MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET);
- MLX4_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
- MLX4_PUT(inbox, param->log_mc_hash_sz, INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
- MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+ /* steering attributes */
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |=
+ cpu_to_be32(1 <<
+ INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN);
+
+ MLX4_PUT(inbox, param->mc_base, INIT_HCA_FS_BASE_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_entry_sz,
+ INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_table_sz,
+ INIT_HCA_FS_LOG_TABLE_SZ_OFFSET);
+ /* Enable Ethernet flow steering
+ * with udp unicast and tcp unicast
+ */
+ MLX4_PUT(inbox, (u8) (MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN),
+ INIT_HCA_FS_ETH_BITS_OFFSET);
+ MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR,
+ INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET);
+ /* Enable IPoIB flow steering
+ * with udp unicast and tcp unicast
+ */
+ MLX4_PUT(inbox, (u8) (MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN),
+ INIT_HCA_FS_IB_BITS_OFFSET);
+ MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR,
+ INIT_HCA_FS_IB_NUM_ADDRS_OFFSET);
+ } else {
+ MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_entry_sz,
+ INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_hash_sz,
+ INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_table_sz,
+ INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_B0) {
+ MLX4_PUT(inbox, (u8) (1 << 3),
+ INIT_HCA_UC_STEERING_OFFSET);
+ }
+ }
/* TPT attributes */
@@ -866,13 +1354,11 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
/* UAR attributes */
- MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET);
+ MLX4_PUT(inbox, param->uar_page_sz, INIT_HCA_UAR_PAGE_SZ_OFFSET);
MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET);
- if (!mlx4_pre_t11_mode && dev->caps.flags & (u32) MLX4_DEV_CAP_FLAG_FC_T11)
- *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 10);
-
- err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000);
+ err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000,
+ MLX4_CMD_NATIVE);
if (err)
mlx4_err(dev, "INIT_HCA returns %d\n", err);
@@ -881,6 +1367,154 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
return err;
}
+int mlx4_QUERY_HCA(struct mlx4_dev *dev,
+ struct mlx4_init_hca_param *param)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ __be32 *outbox;
+ u32 dword_field;
+ int err;
+ u8 byte_field;
+
+#define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04
+#define QUERY_HCA_CORE_CLOCK_OFFSET 0x0c
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0,
+ MLX4_CMD_QUERY_HCA,
+ MLX4_CMD_TIME_CLASS_B,
+ !mlx4_is_slave(dev));
+ if (err)
+ goto out;
+
+ MLX4_GET(param->global_caps, outbox, QUERY_HCA_GLOBAL_CAPS_OFFSET);
+ MLX4_GET(param->hca_core_clock, outbox, QUERY_HCA_CORE_CLOCK_OFFSET);
+
+ /* QPC/EEC/CQC/EQC/RDMARC attributes */
+
+ MLX4_GET(param->qpc_base, outbox, INIT_HCA_QPC_BASE_OFFSET);
+ MLX4_GET(param->log_num_qps, outbox, INIT_HCA_LOG_QP_OFFSET);
+ MLX4_GET(param->srqc_base, outbox, INIT_HCA_SRQC_BASE_OFFSET);
+ MLX4_GET(param->log_num_srqs, outbox, INIT_HCA_LOG_SRQ_OFFSET);
+ MLX4_GET(param->cqc_base, outbox, INIT_HCA_CQC_BASE_OFFSET);
+ MLX4_GET(param->log_num_cqs, outbox, INIT_HCA_LOG_CQ_OFFSET);
+ MLX4_GET(param->altc_base, outbox, INIT_HCA_ALTC_BASE_OFFSET);
+ MLX4_GET(param->auxc_base, outbox, INIT_HCA_AUXC_BASE_OFFSET);
+ MLX4_GET(param->eqc_base, outbox, INIT_HCA_EQC_BASE_OFFSET);
+ MLX4_GET(param->log_num_eqs, outbox, INIT_HCA_LOG_EQ_OFFSET);
+ MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET);
+ MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET);
+
+ MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET);
+ if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) {
+ param->steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
+ } else {
+ MLX4_GET(byte_field, outbox, INIT_HCA_UC_STEERING_OFFSET);
+ if (byte_field & 0x8) {
+ param->steering_mode = MLX4_STEERING_MODE_B0;
+ }
+ else {
+ param->steering_mode = MLX4_STEERING_MODE_A0;
+ }
+ }
+ if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET);
+ MLX4_GET(param->log_mc_entry_sz, outbox,
+ INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET);
+ MLX4_GET(param->log_mc_table_sz, outbox,
+ INIT_HCA_FS_LOG_TABLE_SZ_OFFSET);
+ } else {
+ MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET);
+ MLX4_GET(param->log_mc_entry_sz, outbox,
+ INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
+ MLX4_GET(param->log_mc_hash_sz, outbox,
+ INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
+ MLX4_GET(param->log_mc_table_sz, outbox,
+ INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+ }
+
+ /* CX3 is capable of extending CQEs\EQEs from 32 to 64 bytes */
+ MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS);
+ if (byte_field & 0x20) /* 64-bytes eqe enabled */
+ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
+ if (byte_field & 0x40) /* 64-bytes cqe enabled */
+ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
+
+ /* TPT attributes */
+
+ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET);
+ MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET);
+ MLX4_GET(param->mtt_base, outbox, INIT_HCA_MTT_BASE_OFFSET);
+ MLX4_GET(param->cmpt_base, outbox, INIT_HCA_CMPT_BASE_OFFSET);
+
+ /* UAR attributes */
+
+ MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET);
+ MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET);
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return err;
+}
+
+/* for IB-type ports only in SRIOV mode. Checks that both proxy QP0
+ * and real QP0 are active, so that the paravirtualized QP0 is ready
+ * to operate */
+static int check_qp0_state(struct mlx4_dev *dev, int function, int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ /* irrelevant if not infiniband */
+ if (priv->mfunc.master.qp0_state[port].proxy_qp0_active &&
+ priv->mfunc.master.qp0_state[port].qp0_active)
+ return 1;
+ return 0;
+}
+
+int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int port = vhcr->in_modifier;
+ int err;
+
+ if (priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port))
+ return 0;
+
+ if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) {
+ /* Enable port only if it was previously disabled */
+ if (!priv->mfunc.master.init_port_ref[port]) {
+ err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ }
+ priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
+ } else {
+ if (slave == mlx4_master_func_num(dev)) {
+ if (check_qp0_state(dev, slave, port) &&
+ !priv->mfunc.master.qp0_state[port].port_active) {
+ err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ priv->mfunc.master.qp0_state[port].port_active = 1;
+ priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
+ }
+ } else
+ priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
+ }
+ ++priv->mfunc.master.init_port_ref[port];
+ return 0;
+}
+
int mlx4_INIT_PORT(struct mlx4_dev *dev, int port)
{
struct mlx4_cmd_mailbox *mailbox;
@@ -924,33 +1558,76 @@ int mlx4_INIT_PORT(struct mlx4_dev *dev, int port)
MLX4_PUT(inbox, field, INIT_PORT_MAX_PKEY_OFFSET);
err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_INIT_PORT,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
} else
err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_INIT_PORT);
+int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int port = vhcr->in_modifier;
+ int err;
+
+ if (!(priv->mfunc.master.slave_state[slave].init_port_mask &
+ (1 << port)))
+ return 0;
+
+ if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) {
+ if (priv->mfunc.master.init_port_ref[port] == 1) {
+ err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT,
+ 1000, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ }
+ priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
+ } else {
+ /* infiniband port */
+ if (slave == mlx4_master_func_num(dev)) {
+ if (!priv->mfunc.master.qp0_state[port].qp0_active &&
+ priv->mfunc.master.qp0_state[port].port_active) {
+ err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT,
+ 1000, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
+ priv->mfunc.master.qp0_state[port].port_active = 0;
+ }
+ } else
+ priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
+ }
+ --priv->mfunc.master.init_port_ref[port];
+ return 0;
+}
+
int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port)
{
- return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000);
+ return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000,
+ MLX4_CMD_WRAPPED);
}
EXPORT_SYMBOL_GPL(mlx4_CLOSE_PORT);
int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic)
{
- return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, 1000);
+ return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, 1000,
+ MLX4_CMD_NATIVE);
}
int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages)
{
int ret = mlx4_cmd_imm(dev, icm_size, aux_pages, 0, 0,
MLX4_CMD_SET_ICM_SIZE,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
if (ret)
return ret;
@@ -967,30 +1644,12 @@ int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages)
int mlx4_NOP(struct mlx4_dev *dev)
{
/* Input modifier of 0x1f means "finish as soon as possible." */
- return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, 100);
+ return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
}
-#define MLX4_WOL_SETUP_MODE (5 << 28)
-int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port)
-{
- u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8;
-
- return mlx4_cmd_imm(dev, 0, config, in_mod, 0x3,
- MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A);
-}
-EXPORT_SYMBOL_GPL(mlx4_wol_read);
-
-int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port)
-{
- u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8;
-
- return mlx4_cmd(dev, config, in_mod, 0x1, MLX4_CMD_MOD_STAT_CFG,
- MLX4_CMD_TIME_CLASS_A);
-}
-EXPORT_SYMBOL_GPL(mlx4_wol_write);
-
int mlx4_query_diag_counters(struct mlx4_dev *dev, int array_length,
- u8 op_modifier, u32 in_offset[], u32 counter_out[])
+ u8 op_modifier, u32 in_offset[],
+ u32 counter_out[])
{
struct mlx4_cmd_mailbox *mailbox;
u32 *outbox;
@@ -1003,11 +1662,12 @@ int mlx4_query_diag_counters(struct mlx4_dev *dev, int array_length,
outbox = mailbox->buf;
ret = mlx4_cmd_box(dev, 0, mailbox->dma, 0, op_modifier,
- MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
if (ret)
goto out;
- for (i=0; i < array_length; i++) {
+ for (i = 0; i < array_length; i++) {
if (in_offset[i] > MLX4_MAILBOX_SIZE) {
ret = -EINVAL;
goto out;
@@ -1022,9 +1682,115 @@ out:
}
EXPORT_SYMBOL_GPL(mlx4_query_diag_counters);
-void mlx4_get_fc_t11_settings(struct mlx4_dev *dev, int *enable_pre_t11, int *t11_supported)
+#define MLX4_WOL_SETUP_MODE (5 << 28)
+int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port)
{
- *enable_pre_t11 = !!mlx4_pre_t11_mode;
- *t11_supported = !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FC_T11);
+ u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8;
+
+ return mlx4_cmd_imm(dev, 0, config, in_mod, 0x3,
+ MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+}
+EXPORT_SYMBOL_GPL(mlx4_wol_read);
+
+int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port)
+{
+ u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8;
+
+ return mlx4_cmd(dev, config, in_mod, 0x1, MLX4_CMD_MOD_STAT_CFG,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+}
+EXPORT_SYMBOL_GPL(mlx4_wol_write);
+
+enum {
+ ADD_TO_MCG = 0x26,
+};
+
+
+void mlx4_opreq_action(struct work_struct *work)
+{
+ struct mlx4_priv *priv = container_of(work, struct mlx4_priv, opreq_task);
+ struct mlx4_dev *dev = &priv->dev;
+ int num_tasks = atomic_read(&priv->opreq_count);
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
+ u32 *outbox;
+ u32 modifier;
+ u16 token;
+ u16 type_m;
+ u16 type;
+ int err;
+ u32 num_qps;
+ struct mlx4_qp qp;
+ int i;
+ u8 rem_mcg;
+ u8 prot;
+
+#define GET_OP_REQ_MODIFIER_OFFSET 0x08
+#define GET_OP_REQ_TOKEN_OFFSET 0x14
+#define GET_OP_REQ_TYPE_OFFSET 0x1a
+#define GET_OP_REQ_DATA_OFFSET 0x20
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ mlx4_err(dev, "Failed to allocate mailbox for GET_OP_REQ\n");
+ return;
+ }
+ outbox = mailbox->buf;
+
+ while (num_tasks) {
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0,
+ MLX4_CMD_GET_OP_REQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_err(dev, "Failed to retreive required operation: %d\n", err);
+ return;
+ }
+ MLX4_GET(modifier, outbox, GET_OP_REQ_MODIFIER_OFFSET);
+ MLX4_GET(token, outbox, GET_OP_REQ_TOKEN_OFFSET);
+ MLX4_GET(type, outbox, GET_OP_REQ_TYPE_OFFSET);
+ type_m = type >> 12;
+ type &= 0xfff;
+
+ switch (type) {
+ case ADD_TO_MCG:
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ mlx4_warn(dev, "ADD MCG operation is not supported in "
+ "DEVICE_MANAGED steerign mode\n");
+ err = EPERM;
+ break;
+ }
+ mgm = (struct mlx4_mgm *) ((u8 *) (outbox) + GET_OP_REQ_DATA_OFFSET);
+ num_qps = be32_to_cpu(mgm->members_count) & MGM_QPN_MASK;
+ rem_mcg = ((u8 *) (&mgm->members_count))[0] & 1;
+ prot = ((u8 *) (&mgm->members_count))[0] >> 6;
+
+ for (i = 0; i < num_qps; i++) {
+ qp.qpn = be32_to_cpu(mgm->qp[i]);
+ if (rem_mcg)
+ err = mlx4_multicast_detach(dev, &qp, mgm->gid, prot, 0);
+ else
+ err = mlx4_multicast_attach(dev, &qp, mgm->gid, mgm->gid[5] ,0, prot, NULL);
+ if (err)
+ break;
+ }
+ break;
+ default:
+ mlx4_warn(dev, "Bad type for required operation\n");
+ err = EINVAL;
+ break;
+ }
+ err = mlx4_cmd(dev, 0, ((u32) err | cpu_to_be32(token) << 16), 1,
+ MLX4_CMD_GET_OP_REQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_err(dev, "Failed to acknowledge required request: %d\n", err);
+ goto out;
+ }
+ memset(outbox, 0, 0xffc);
+ num_tasks = atomic_dec_return(&priv->opreq_count);
+ }
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
}
-EXPORT_SYMBOL_GPL(mlx4_get_fc_t11_settings);
diff --git a/sys/ofed/drivers/net/mlx4/fw.h b/sys/ofed/drivers/net/mlx4/fw.h
index fbcab21..5fe7782 100644
--- a/sys/ofed/drivers/net/mlx4/fw.h
+++ b/sys/ofed/drivers/net/mlx4/fw.h
@@ -78,10 +78,10 @@ struct mlx4_dev_cap {
u16 wavelength[MLX4_MAX_PORTS + 1];
u64 trans_code[MLX4_MAX_PORTS + 1];
u16 stat_rate_support;
- int udp_rss;
- int loopback_support;
- int wol;
+ int fs_log_max_ucast_qp_range_size;
+ int fs_max_num_qp_per_entry;
u64 flags;
+ u64 flags2;
int reserved_uars;
int uar_size;
int min_page_sz;
@@ -108,17 +108,41 @@ struct mlx4_dev_cap {
int dmpt_entry_sz;
int cmpt_entry_sz;
int mtt_entry_sz;
- int inline_cfg;
int resize_srq;
u32 bmme_flags;
u32 reserved_lkey;
u64 max_icm_sz;
int max_gso_sz;
+ int max_rss_tbl_sz;
u8 supported_port_types[MLX4_MAX_PORTS + 1];
+ u8 suggested_type[MLX4_MAX_PORTS + 1];
+ u8 default_sense[MLX4_MAX_PORTS + 1];
u8 log_max_macs[MLX4_MAX_PORTS + 1];
u8 log_max_vlans[MLX4_MAX_PORTS + 1];
u32 max_basic_counters;
- u32 max_ext_counters;
+ u32 sync_qp;
+ u8 timestamp_support;
+ u32 max_extended_counters;
+};
+
+struct mlx4_func_cap {
+ u8 num_ports;
+ u8 flags;
+ u32 pf_context_behaviour;
+ int qp_quota;
+ int cq_quota;
+ int srq_quota;
+ int mpt_quota;
+ int mtt_quota;
+ int max_eq;
+ int reserved_eq;
+ int mcg_quota;
+ u32 qp0_tunnel_qpn;
+ u32 qp0_proxy_qpn;
+ u32 qp1_tunnel_qpn;
+ u32 qp1_proxy_qpn;
+ u8 physical_port;
+ u8 port_flags;
};
struct mlx4_adapter {
@@ -138,8 +162,10 @@ struct mlx4_init_hca_param {
u64 dmpt_base;
u64 cmpt_base;
u64 mtt_base;
+ u64 global_caps;
u16 log_mc_entry_sz;
u16 log_mc_hash_sz;
+ u16 hca_core_clock;
u8 log_num_qps;
u8 log_num_srqs;
u8 log_num_cqs;
@@ -148,6 +174,9 @@ struct mlx4_init_hca_param {
u8 log_mc_table_sz;
u8 log_mpt_sz;
u8 log_uar_sz;
+ u8 uar_page_sz; /* log pg sz in 4k chunks */
+ u8 steering_mode; /* for QUERY_HCA */
+ u64 dev_cap_enabled;
};
struct mlx4_init_ib_param {
@@ -172,16 +201,27 @@ struct mlx4_set_ib_param {
};
int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
+int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
+ struct mlx4_func_cap *func_cap);
+int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm);
int mlx4_UNMAP_FA(struct mlx4_dev *dev);
int mlx4_RUN_FW(struct mlx4_dev *dev);
int mlx4_QUERY_FW(struct mlx4_dev *dev);
int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter);
int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param);
+int mlx4_QUERY_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param);
int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic);
int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt);
int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages);
+int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm);
+int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev);
int mlx4_NOP(struct mlx4_dev *dev);
int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg);
+void mlx4_opreq_action(struct work_struct *work);
#endif /* MLX4_FW_H */
diff --git a/sys/ofed/drivers/net/mlx4/icm.c b/sys/ofed/drivers/net/mlx4/icm.c
index 3a14d6b..d18fde1 100644
--- a/sys/ofed/drivers/net/mlx4/icm.c
+++ b/sys/ofed/drivers/net/mlx4/icm.c
@@ -31,10 +31,10 @@
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/scatterlist.h>
+#include <linux/slab.h>
#include <linux/mlx4/cmd.h>
@@ -93,13 +93,17 @@ void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent)
kfree(icm);
}
-static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
+static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order,
+ gfp_t gfp_mask, int node)
{
struct page *page;
- page = alloc_pages(gfp_mask, order);
- if (!page)
- return -ENOMEM;
+ page = alloc_pages_node(node, gfp_mask, order);
+ if (!page) {
+ page = alloc_pages(gfp_mask, order);
+ if (!page)
+ return -ENOMEM;
+ }
sg_set_page(mem, page, PAGE_SIZE << order, 0);
return 0;
@@ -130,9 +134,13 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
/* We use sg_set_buf for coherent allocs, which assumes low memory */
BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
- icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
- if (!icm)
- return NULL;
+ icm = kmalloc_node(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN),
+ dev->numa_node);
+ if (!icm) {
+ icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
+ if (!icm)
+ return NULL;
+ }
icm->refcount = 0;
INIT_LIST_HEAD(&icm->chunk_list);
@@ -141,10 +149,15 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
while (npages > 0) {
if (!chunk) {
- chunk = kmalloc(sizeof *chunk,
- gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
- if (!chunk)
- goto fail;
+ chunk = kmalloc_node(sizeof *chunk,
+ gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN),
+ dev->numa_node);
+ if (!chunk) {
+ chunk = kmalloc(sizeof *chunk,
+ gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
+ if (!chunk)
+ goto fail;
+ }
sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN);
chunk->npages = 0;
@@ -161,31 +174,33 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
cur_order, gfp_mask);
else
ret = mlx4_alloc_icm_pages(&chunk->mem[chunk->npages],
- cur_order, gfp_mask);
-
- if (!ret) {
- ++chunk->npages;
+ cur_order, gfp_mask,
+ dev->numa_node);
- if (coherent)
- ++chunk->nsg;
- else if (chunk->npages == MLX4_ICM_CHUNK_LEN) {
- chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
- chunk->npages,
- PCI_DMA_BIDIRECTIONAL);
+ if (ret) {
+ if (--cur_order < 0)
+ goto fail;
+ else
+ continue;
+ }
- if (chunk->nsg <= 0)
- goto fail;
- }
+ ++chunk->npages;
- if (chunk->npages == MLX4_ICM_CHUNK_LEN)
- chunk = NULL;
+ if (coherent)
+ ++chunk->nsg;
+ else if (chunk->npages == MLX4_ICM_CHUNK_LEN) {
+ chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
+ chunk->npages,
+ PCI_DMA_BIDIRECTIONAL);
- npages -= 1 << cur_order;
- } else {
- --cur_order;
- if (cur_order < 0)
+ if (chunk->nsg <= 0)
goto fail;
}
+
+ if (chunk->npages == MLX4_ICM_CHUNK_LEN)
+ chunk = NULL;
+
+ npages -= 1 << cur_order;
}
if (!coherent && chunk) {
@@ -209,36 +224,10 @@ static int mlx4_MAP_ICM(struct mlx4_dev *dev, struct mlx4_icm *icm, u64 virt)
return mlx4_map_cmd(dev, MLX4_CMD_MAP_ICM, icm, virt);
}
-int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count)
+static int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count)
{
return mlx4_cmd(dev, virt, page_count, 0, MLX4_CMD_UNMAP_ICM,
- MLX4_CMD_TIME_CLASS_B);
-}
-
-int mlx4_MAP_ICM_page(struct mlx4_dev *dev, u64 dma_addr, u64 virt)
-{
- struct mlx4_cmd_mailbox *mailbox;
- __be64 *inbox;
- int err;
-
- mailbox = mlx4_alloc_cmd_mailbox(dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
- inbox = mailbox->buf;
-
- inbox[0] = cpu_to_be64(virt);
- inbox[1] = cpu_to_be64(dma_addr);
-
- err = mlx4_cmd(dev, mailbox->dma, 1, 0, MLX4_CMD_MAP_ICM,
- MLX4_CMD_TIME_CLASS_B);
-
- mlx4_free_cmd_mailbox(dev, mailbox);
-
- if (!err)
- mlx4_dbg(dev, "Mapped page at %llx to %llx for ICM.\n",
- (unsigned long long) dma_addr, (unsigned long long) virt);
-
- return err;
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
}
int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm)
@@ -248,12 +237,14 @@ int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm)
int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev)
{
- return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_ICM_AUX, MLX4_CMD_TIME_CLASS_B);
+ return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_ICM_AUX,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
}
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
{
- int i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
+ u32 i = (obj & (table->num_obj - 1)) /
+ (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
int ret = 0;
mutex_lock(&table->mutex);
@@ -286,16 +277,18 @@ out:
return ret;
}
-void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
+void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
{
- int i;
+ u32 i;
+ u64 offset;
i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
mutex_lock(&table->mutex);
if (--table->icm[i]->refcount == 0) {
- mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE,
+ offset = (u64) i * MLX4_TABLE_CHUNK_SIZE;
+ mlx4_UNMAP_ICM(dev, table->virt + offset,
MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
mlx4_free_icm(dev, table->icm[i], table->coherent);
table->icm[i] = NULL;
@@ -304,9 +297,11 @@ void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
mutex_unlock(&table->mutex);
}
-void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle)
+void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj,
+ dma_addr_t *dma_handle)
{
- int idx, offset, dma_offset, i;
+ int offset, dma_offset, i;
+ u64 idx;
struct mlx4_icm_chunk *chunk;
struct mlx4_icm *icm;
struct page *page = NULL;
@@ -316,7 +311,7 @@ void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_han
mutex_lock(&table->mutex);
- idx = (obj & (table->num_obj - 1)) * table->obj_size;
+ idx = (u64) (obj & (table->num_obj - 1)) * table->obj_size;
icm = table->icm[idx / MLX4_TABLE_CHUNK_SIZE];
dma_offset = offset = idx % MLX4_TABLE_CHUNK_SIZE;
@@ -350,10 +345,11 @@ out:
}
int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
- int start, int end)
+ u32 start, u32 end)
{
int inc = MLX4_TABLE_CHUNK_SIZE / table->obj_size;
- int i, err;
+ int err;
+ u32 i;
for (i = start; i <= end; i += inc) {
err = mlx4_table_get(dev, table, i);
@@ -373,22 +369,23 @@ fail:
}
void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
- int start, int end)
+ u32 start, u32 end)
{
- int i;
+ u32 i;
for (i = start; i <= end; i += MLX4_TABLE_CHUNK_SIZE / table->obj_size)
mlx4_table_put(dev, table, i);
}
int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
- u64 virt, int obj_size, int nobj, int reserved,
+ u64 virt, int obj_size, u32 nobj, int reserved,
int use_lowmem, int use_coherent)
{
int obj_per_chunk;
int num_icm;
unsigned chunk_size;
int i;
+ u64 size;
obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size;
num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk;
@@ -404,10 +401,12 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
table->coherent = use_coherent;
mutex_init(&table->mutex);
+ size = (u64) nobj * obj_size;
for (i = 0; i * MLX4_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
chunk_size = MLX4_TABLE_CHUNK_SIZE;
- if ((i + 1) * MLX4_TABLE_CHUNK_SIZE > nobj * obj_size)
- chunk_size = PAGE_ALIGN(nobj * obj_size - i * MLX4_TABLE_CHUNK_SIZE);
+ if ((i + 1) * MLX4_TABLE_CHUNK_SIZE > size)
+ chunk_size = PAGE_ALIGN(size -
+ i * MLX4_TABLE_CHUNK_SIZE);
table->icm[i] = mlx4_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
(use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
@@ -437,6 +436,8 @@ err:
mlx4_free_icm(dev, table->icm[i], use_coherent);
}
+ kfree(table->icm);
+
return -ENOMEM;
}
diff --git a/sys/ofed/drivers/net/mlx4/icm.h b/sys/ofed/drivers/net/mlx4/icm.h
index b87f726..f83ad81 100644
--- a/sys/ofed/drivers/net/mlx4/icm.h
+++ b/sys/ofed/drivers/net/mlx4/icm.h
@@ -71,17 +71,17 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
gfp_t gfp_mask, int coherent);
void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent);
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
+void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
+int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
+ u32 start, u32 end);
+void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
+ u32 start, u32 end);
int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
- u64 virt, int obj_size, int nobj, int reserved,
+ u64 virt, int obj_size, u32 nobj, int reserved,
int use_lowmem, int use_coherent);
void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table);
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
-void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
-void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle);
-int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
- int start, int end);
-void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
- int start, int end);
+void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj, dma_addr_t *dma_handle);
static inline void mlx4_icm_first(struct mlx4_icm *icm,
struct mlx4_icm_iter *iter)
@@ -122,9 +122,5 @@ static inline unsigned long mlx4_icm_size(struct mlx4_icm_iter *iter)
return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
}
-int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count);
-int mlx4_MAP_ICM_page(struct mlx4_dev *dev, u64 dma_addr, u64 virt);
-int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm);
-int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev);
#endif /* MLX4_ICM_H */
diff --git a/sys/ofed/drivers/net/mlx4/intf.c b/sys/ofed/drivers/net/mlx4/intf.c
index bdf7e7d..0f6754b 100644
--- a/sys/ofed/drivers/net/mlx4/intf.c
+++ b/sys/ofed/drivers/net/mlx4/intf.c
@@ -31,6 +31,8 @@
* SOFTWARE.
*/
+#include <linux/slab.h>
+
#include "mlx4.h"
struct mlx4_device_context {
@@ -112,37 +114,8 @@ void mlx4_unregister_interface(struct mlx4_interface *intf)
}
EXPORT_SYMBOL_GPL(mlx4_unregister_interface);
-struct mlx4_dev *mlx4_query_interface(void *int_dev, int *port)
-{
- struct mlx4_priv *priv;
- struct mlx4_device_context *dev_ctx;
- enum mlx4_query_reply r;
- unsigned long flags;
-
- mutex_lock(&intf_mutex);
-
- list_for_each_entry(priv, &dev_list, dev_list) {
- spin_lock_irqsave(&priv->ctx_lock, flags);
- list_for_each_entry(dev_ctx, &priv->ctx_list, list) {
- if (!dev_ctx->intf->query)
- continue;
- r = dev_ctx->intf->query(dev_ctx->context, int_dev);
- if (r != MLX4_QUERY_NOT_MINE) {
- *port = r;
- spin_unlock_irqrestore(&priv->ctx_lock, flags);
- mutex_unlock(&intf_mutex);
- return &priv->dev;
- }
- }
- spin_unlock_irqrestore(&priv->ctx_lock, flags);
- }
-
- mutex_unlock(&intf_mutex);
- return NULL;
-}
-EXPORT_SYMBOL_GPL(mlx4_query_interface);
-
-void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int port)
+void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type,
+ unsigned long param)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_device_context *dev_ctx;
@@ -152,7 +125,7 @@ void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int por
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
if (dev_ctx->intf->event)
- dev_ctx->intf->event(dev, dev_ctx->context, type, port);
+ dev_ctx->intf->event(dev, dev_ctx->context, type, param);
spin_unlock_irqrestore(&priv->ctx_lock, flags);
}
@@ -169,7 +142,8 @@ int mlx4_register_device(struct mlx4_dev *dev)
mlx4_add_device(intf, priv);
mutex_unlock(&intf_mutex);
- mlx4_start_catas_poll(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_start_catas_poll(dev);
return 0;
}
@@ -179,7 +153,8 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_interface *intf;
- mlx4_stop_catas_poll(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_stop_catas_poll(dev);
mutex_lock(&intf_mutex);
list_for_each_entry(intf, &intf_list, list)
@@ -190,7 +165,7 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
mutex_unlock(&intf_mutex);
}
-void *mlx4_find_get_prot_dev(struct mlx4_dev *dev, enum mlx4_prot proto, int port)
+void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_device_context *dev_ctx;
@@ -200,13 +175,13 @@ void *mlx4_find_get_prot_dev(struct mlx4_dev *dev, enum mlx4_prot proto, int por
spin_lock_irqsave(&priv->ctx_lock, flags);
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
- if (dev_ctx->intf->protocol == proto && dev_ctx->intf->get_prot_dev) {
- result = dev_ctx->intf->get_prot_dev(dev, dev_ctx->context, port);
+ if (dev_ctx->intf->protocol == proto && dev_ctx->intf->get_dev) {
+ result = dev_ctx->intf->get_dev(dev, dev_ctx->context, port);
break;
- }
+ }
spin_unlock_irqrestore(&priv->ctx_lock, flags);
return result;
}
-
+EXPORT_SYMBOL_GPL(mlx4_get_protocol_dev);
diff --git a/sys/ofed/drivers/net/mlx4/main.c b/sys/ofed/drivers/net/mlx4/main.c
index b0897bf..dd10029 100644
--- a/sys/ofed/drivers/net/mlx4/main.c
+++ b/sys/ofed/drivers/net/mlx4/main.c
@@ -38,7 +38,10 @@
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
+#include <linux/slab.h>
#include <linux/io-mapping.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
@@ -62,10 +65,6 @@ MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
#endif /* CONFIG_MLX4_DEBUG */
-int mlx4_blck_lb=1;
-module_param_named(block_loopback, mlx4_blck_lb, int, 0644);
-MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0");
-
#ifdef CONFIG_PCI_MSI
static int msi_x = 1;
@@ -78,124 +77,177 @@ MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
#endif /* CONFIG_PCI_MSI */
+static int enable_sys_tune = 0;
+module_param(enable_sys_tune, int, 0444);
+MODULE_PARM_DESC(enable_sys_tune, "Tune the cpu's for better performance (default 0)");
+
+int mlx4_blck_lb = 1;
+module_param_named(block_loopback, mlx4_blck_lb, int, 0644);
+MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0 "
+ "(default: 1)");
+
+static int num_vfs;
+module_param(num_vfs, int, 0444);
+MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0");
+
+static int probe_vf;
+module_param(probe_vf, int, 0644);
+MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)");
+
+int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
+
+module_param_named(log_num_mgm_entry_size,
+ mlx4_log_num_mgm_entry_size, int, 0444);
+MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
+ " of qp per mcg, for example:"
+ " 10 gives 248.range: 7 <="
+ " log_num_mgm_entry_size <= 12."
+ " To activate device managed"
+ " flow steering when available, set to -1");
+
+static int high_rate_steer;
+module_param(high_rate_steer, int, 0444);
+MODULE_PARM_DESC(high_rate_steer, "Enable steering mode for higher packet rate"
+ " (default off)");
+
+static int fast_drop;
+module_param_named(fast_drop, fast_drop, int, 0444);
+MODULE_PARM_DESC(fast_drop,
+ "Enable fast packet drop when no recieve WQEs are posted");
+
+int mlx4_enable_64b_cqe_eqe;
+module_param_named(enable_64b_cqe_eqe, mlx4_enable_64b_cqe_eqe, int, 0644);
+MODULE_PARM_DESC(enable_64b_cqe_eqe,
+ "Enable 64 byte CQEs/EQEs when the the FW supports this, if nonzero");
+
+#define HCA_GLOBAL_CAP_MASK 0
+
+#define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE
+
static char mlx4_version[] __devinitdata =
DRV_NAME ": Mellanox ConnectX core driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
-struct mutex drv_mutex;
-
-static struct mlx4_profile default_profile = {
- .num_qp = 1 << 18,
- .num_srq = 1 << 16,
- .rdmarc_per_qp = 1 << 4,
- .num_cq = 1 << 16,
- .num_mcg = 1 << 13,
- .num_mpt = 1 << 19,
- .num_mtt = 1 << 20,
-};
-
-static int log_num_mac = 2;
+static int log_num_mac = 7;
module_param_named(log_num_mac, log_num_mac, int, 0444);
MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
-static int use_prio;
-module_param_named(use_prio, use_prio, bool, 0444);
-MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
- "(0/1, default 0)");
+static int log_num_vlan;
+module_param_named(log_num_vlan, log_num_vlan, int, 0444);
+MODULE_PARM_DESC(log_num_vlan,
+ "(Obsolete) Log2 max number of VLANs per ETH port (0-7)");
+/* Log2 max number of VLANs per ETH port (0-7) */
+#define MLX4_LOG_NUM_VLANS 7
-static struct mlx4_profile mod_param_profile = { 0 };
+int log_mtts_per_seg = ilog2(1);
+module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
+MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment "
+ "(0-7) (default: 0)");
+
+static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
+#if 0
+static int arr_argc = 2;
+module_param_array(port_type_array, int, &arr_argc, 0444);
+MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
+ "1 for IB, 2 for Ethernet");
+#endif
+
+struct mlx4_port_config {
+ struct list_head list;
+ enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
+ struct pci_dev *pdev;
+};
+
+#define MLX4_LOG_NUM_MTT 20
+/* We limit to 30 as of a bit map issue which uses int and not uint.
+ see mlx4_buddy_init -> bitmap_zero which gets int.
+*/
+#define MLX4_MAX_LOG_NUM_MTT 30
+static struct mlx4_profile mod_param_profile = {
+ .num_qp = 19,
+ .num_srq = 16,
+ .rdmarc_per_qp = 4,
+ .num_cq = 16,
+ .num_mcg = 13,
+ .num_mpt = 19,
+ .num_mtt = 0, /* max(20, 2*MTTs for host memory)) */
+};
module_param_named(log_num_qp, mod_param_profile.num_qp, int, 0444);
-MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA");
+MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA (default: 19)");
module_param_named(log_num_srq, mod_param_profile.num_srq, int, 0444);
-MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA");
+MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA "
+ "(default: 16)");
-module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int, 0444);
-MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP");
+module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int,
+ 0444);
+MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP "
+ "(default: 4)");
module_param_named(log_num_cq, mod_param_profile.num_cq, int, 0444);
-MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA");
+MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA (default: 16)");
module_param_named(log_num_mcg, mod_param_profile.num_mcg, int, 0444);
-MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA");
+MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA "
+ "(default: 13)");
module_param_named(log_num_mpt, mod_param_profile.num_mpt, int, 0444);
MODULE_PARM_DESC(log_num_mpt,
- "log maximum number of memory protection table entries per HCA");
+ "log maximum number of memory protection table entries per "
+ "HCA (default: 19)");
module_param_named(log_num_mtt, mod_param_profile.num_mtt, int, 0444);
MODULE_PARM_DESC(log_num_mtt,
- "log maximum number of memory translation table segments per HCA");
-
-static int log_mtts_per_seg = 0;
-module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
-MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
+ "log maximum number of memory translation table segments per "
+ "HCA (default: max(20, 2*MTTs for register all of the host memory limited to 30))");
-static void process_mod_param_profile(void)
-{
- default_profile.num_qp = (mod_param_profile.num_qp ?
- 1 << mod_param_profile.num_qp :
- default_profile.num_qp);
- default_profile.num_srq = (mod_param_profile.num_srq ?
- 1 << mod_param_profile.num_srq :
- default_profile.num_srq);
- default_profile.rdmarc_per_qp = (mod_param_profile.rdmarc_per_qp ?
- 1 << mod_param_profile.rdmarc_per_qp :
- default_profile.rdmarc_per_qp);
- default_profile.num_cq = (mod_param_profile.num_cq ?
- 1 << mod_param_profile.num_cq :
- default_profile.num_cq);
- default_profile.num_mcg = (mod_param_profile.num_mcg ?
- 1 << mod_param_profile.num_mcg :
- default_profile.num_mcg);
- default_profile.num_mpt = (mod_param_profile.num_mpt ?
- 1 << mod_param_profile.num_mpt :
- default_profile.num_mpt);
- default_profile.num_mtt = (mod_param_profile.num_mtt ?
- 1 << mod_param_profile.num_mtt :
- default_profile.num_mtt);
-}
-
-struct mlx4_port_config
-{
- struct list_head list;
- enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
- struct pci_dev *pdev;
+enum {
+ MLX4_IF_STATE_BASIC,
+ MLX4_IF_STATE_EXTENDED
};
-static LIST_HEAD(config_list);
-
-static void mlx4_config_cleanup(void)
-{
- struct mlx4_port_config *config, *tmp;
-
- list_for_each_entry_safe(config, tmp, &config_list, list) {
- list_del(&config->list);
- kfree(config);
- }
-}
-
-void *mlx4_get_prot_dev(struct mlx4_dev *dev, enum mlx4_prot proto, int port)
+static void process_mod_param_profile(struct mlx4_profile *profile)
{
- return mlx4_find_get_prot_dev(dev, proto, port);
-}
-EXPORT_SYMBOL(mlx4_get_prot_dev);
-
-void mlx4_set_iboe_counter(struct mlx4_dev *dev, int index, u8 port)
-{
- struct mlx4_priv *priv = mlx4_priv(dev);
-
- priv->iboe_counter_index[port - 1] = index;
-}
-EXPORT_SYMBOL(mlx4_set_iboe_counter);
-int mlx4_get_iboe_counter(struct mlx4_dev *dev, u8 port)
-{
- struct mlx4_priv *priv = mlx4_priv(dev);
+ vm_size_t hwphyssz;
+ hwphyssz = 0;
+ TUNABLE_ULONG_FETCH("hw.realmem", (u_long *) &hwphyssz);
- return priv->iboe_counter_index[port - 1];
+ profile->num_qp = 1 << mod_param_profile.num_qp;
+ profile->num_srq = 1 << mod_param_profile.num_srq;
+ profile->rdmarc_per_qp = 1 << mod_param_profile.rdmarc_per_qp;
+ profile->num_cq = 1 << mod_param_profile.num_cq;
+ profile->num_mcg = 1 << mod_param_profile.num_mcg;
+ profile->num_mpt = 1 << mod_param_profile.num_mpt;
+ /*
+ * We want to scale the number of MTTs with the size of the
+ * system memory, since it makes sense to register a lot of
+ * memory on a system with a lot of memory. As a heuristic,
+ * make sure we have enough MTTs to register twice the system
+ * memory (with PAGE_SIZE entries).
+ *
+ * This number has to be a power of two and fit into 32 bits
+ * due to device limitations. We cap this at 2^30 as of bit map
+ * limitation to work with int instead of uint (mlx4_buddy_init -> bitmap_zero)
+ * That limits us to 4TB of memory registration per HCA with
+ * 4KB pages, which is probably OK for the next few months.
+ */
+ if (mod_param_profile.num_mtt)
+ profile->num_mtt = 1 << mod_param_profile.num_mtt;
+ else {
+ profile->num_mtt =
+ roundup_pow_of_two(max_t(unsigned,
+ 1 << (MLX4_LOG_NUM_MTT - log_mtts_per_seg),
+ min(1UL <<
+ (MLX4_MAX_LOG_NUM_MTT -
+ log_mtts_per_seg),
+ (hwphyssz << 1)
+ >> log_mtts_per_seg)));
+ /* set the actual value, so it will be reflected to the user
+ using the sysfs */
+ mod_param_profile.num_mtt = ilog2(profile->num_mtt * (1 << log_mtts_per_seg));
+ }
}
-EXPORT_SYMBOL(mlx4_get_iboe_counter);
int mlx4_check_port_params(struct mlx4_dev *dev,
enum mlx4_port_type *port_type)
@@ -230,19 +282,6 @@ static void mlx4_set_port_mask(struct mlx4_dev *dev)
dev->caps.port_mask[i] = dev->caps.port_type[i];
}
-static u8 get_counters_mode(u64 flags)
-{
- switch (flags >> 48 & 3) {
- case 2:
- case 3:
- return MLX4_CUNTERS_EXT;
- case 1:
- return MLX4_CUNTERS_BASIC;
- default:
- return MLX4_CUNTERS_DISABLED;
- }
-}
-
static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
{
int err;
@@ -276,21 +315,29 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
}
dev->caps.num_ports = dev_cap->num_ports;
+ dev->phys_caps.num_phys_eqs = MLX4_MAX_EQ_NUM;
for (i = 1; i <= dev->caps.num_ports; ++i) {
dev->caps.vl_cap[i] = dev_cap->max_vl[i];
dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i];
+ dev->phys_caps.gid_phys_table_len[i] = dev_cap->max_gids[i];
+ dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i];
+ /* set gid and pkey table operating lengths by default
+ * to non-sriov values */
dev->caps.gid_table_len[i] = dev_cap->max_gids[i];
dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i];
dev->caps.port_width_cap[i] = dev_cap->max_port_width[i];
dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i];
dev->caps.def_mac[i] = dev_cap->def_mac[i];
dev->caps.supported_type[i] = dev_cap->supported_port_types[i];
+ dev->caps.suggested_type[i] = dev_cap->suggested_type[i];
+ dev->caps.default_sense[i] = dev_cap->default_sense[i];
dev->caps.trans_type[i] = dev_cap->trans_type[i];
dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i];
dev->caps.wavelength[i] = dev_cap->wavelength[i];
dev->caps.trans_code[i] = dev_cap->trans_code[i];
}
+ dev->caps.uar_page_size = PAGE_SIZE;
dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE;
dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
dev->caps.bf_reg_size = dev_cap->bf_reg_size;
@@ -304,51 +351,96 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev->caps.reserved_srqs = dev_cap->reserved_srqs;
dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz;
dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz;
- dev->caps.num_qp_per_mgm = MLX4_QP_PER_MGM;
/*
* Subtract 1 from the limit because we need to allocate a
- * spare CQE so the HCA HW can tell the difference between an
- * empty CQ and a full CQ.
+ * spare CQE to enable resizing the CQ
*/
dev->caps.max_cqes = dev_cap->max_cq_sz - 1;
dev->caps.reserved_cqs = dev_cap->reserved_cqs;
dev->caps.reserved_eqs = dev_cap->reserved_eqs;
- dev->caps.mtts_per_seg = 1 << log_mtts_per_seg;
- dev->caps.reserved_mtts = DIV_ROUND_UP(dev_cap->reserved_mtts,
- dev->caps.mtts_per_seg);
+ dev->caps.reserved_mtts = dev_cap->reserved_mtts;
dev->caps.reserved_mrws = dev_cap->reserved_mrws;
- dev->caps.reserved_uars = dev_cap->reserved_uars;
+
+ /* The first 128 UARs are used for EQ doorbells */
+ dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars);
dev->caps.reserved_pds = dev_cap->reserved_pds;
- dev->caps.mtt_entry_sz = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
+ dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
+ dev_cap->reserved_xrcds : 0;
+ dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
+ dev_cap->max_xrcds : 0;
+ dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz;
+
dev->caps.max_msg_sz = dev_cap->max_msg_sz;
dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1);
dev->caps.flags = dev_cap->flags;
+ dev->caps.flags2 = dev_cap->flags2;
dev->caps.bmme_flags = dev_cap->bmme_flags;
dev->caps.reserved_lkey = dev_cap->reserved_lkey;
dev->caps.stat_rate_support = dev_cap->stat_rate_support;
- dev->caps.udp_rss = dev_cap->udp_rss;
- dev->caps.loopback_support = dev_cap->loopback_support;
- dev->caps.wol = dev_cap->wol;
+ dev->caps.cq_timestamp = dev_cap->timestamp_support;
dev->caps.max_gso_sz = dev_cap->max_gso_sz;
- dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
- dev_cap->reserved_xrcds : 0;
- dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
- dev_cap->max_xrcds : 0;
+ dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz;
+
+ /* Sense port always allowed on supported devices for ConnectX-1 and -2 */
+ if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
+ dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
+ /* Don't do sense port on multifunction devices (for now at least) */
+ if (mlx4_is_mfunc(dev))
+ dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
dev->caps.log_num_macs = log_num_mac;
- dev->caps.log_num_prios = use_prio ? 3 : 0;
+ dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
+
+ dev->caps.fast_drop = fast_drop ?
+ !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FAST_DROP) :
+ 0;
for (i = 1; i <= dev->caps.num_ports; ++i) {
dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
if (dev->caps.supported_type[i]) {
- if (dev->caps.supported_type[i] != MLX4_PORT_TYPE_ETH)
- dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
- else
+ /* if only ETH is supported - assign ETH */
+ if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
+ /* if only IB is supported, assign IB */
+ else if (dev->caps.supported_type[i] ==
+ MLX4_PORT_TYPE_IB)
+ dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
+ else {
+ /* if IB and ETH are supported, we set the port
+ * type according to user selection of port type;
+ * if user selected none, take the FW hint */
+ if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE)
+ dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
+ MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
+ else
+ dev->caps.port_type[i] = port_type_array[i - 1];
+ }
}
- dev->caps.possible_type[i] = dev->caps.port_type[i];
+ /*
+ * Link sensing is allowed on the port if 3 conditions are true:
+ * 1. Both protocols are supported on the port.
+ * 2. Different types are supported on the port
+ * 3. FW declared that it supports link sensing
+ */
mlx4_priv(dev)->sense.sense_allowed[i] =
- dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO;
+ ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
+ (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
+ (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
+
+ /*
+ * If "default_sense" bit is set, we move the port to "AUTO" mode
+ * and perform sense_port FW command to try and set the correct
+ * port type from beginning
+ */
+ if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
+ enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
+ dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
+ mlx4_SENSE_PORT(dev, i, &sensed_port);
+ if (sensed_port != MLX4_PORT_TYPE_NONE)
+ dev->caps.port_type[i] = sensed_port;
+ } else {
+ dev->caps.possible_type[i] = dev->caps.port_type[i];
+ }
if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
dev->caps.log_num_macs = dev_cap->log_max_macs[i];
@@ -356,52 +448,316 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
"for port %d, reducing to %d.\n",
i, 1 << dev->caps.log_num_macs);
}
- dev->caps.log_num_vlans = dev_cap->log_max_vlans[i];
+ if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) {
+ dev->caps.log_num_vlans = dev_cap->log_max_vlans[i];
+ mlx4_warn(dev, "Requested number of VLANs is too much "
+ "for port %d, reducing to %d.\n",
+ i, 1 << dev->caps.log_num_vlans);
+ }
}
- dev->caps.counters_mode = get_counters_mode(dev_cap->flags);
- dev->caps.max_basic_counters = 1 << ilog2(dev_cap->max_basic_counters);
- dev->caps.max_ext_counters = 1 << ilog2(dev_cap->max_ext_counters);
+ dev->caps.max_basic_counters = dev_cap->max_basic_counters;
+ dev->caps.max_extended_counters = dev_cap->max_extended_counters;
+ /* support extended counters if available */
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT)
+ dev->caps.max_counters = dev->caps.max_extended_counters;
+ else
+ dev->caps.max_counters = dev->caps.max_basic_counters;
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
(1 << dev->caps.log_num_macs) *
(1 << dev->caps.log_num_vlans) *
- (1 << dev->caps.log_num_prios) *
dev->caps.num_ports;
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
- dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR];
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
+
+ dev->caps.sync_qp = dev_cap->sync_qp;
+ dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
+
+ if (!mlx4_enable_64b_cqe_eqe) {
+ if (dev_cap->flags &
+ (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
+ mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
+ dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
+ dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
+ }
+ }
+
+ if ((dev->caps.flags &
+ (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
+ mlx4_is_master(dev))
+ dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
return 0;
}
-
-static int mlx4_save_config(struct mlx4_dev *dev)
+/*The function checks if there are live vf, return the num of them*/
+static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
{
- struct mlx4_port_config *config;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_state;
int i;
+ int ret = 0;
+
+ for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
+ s_state = &priv->mfunc.master.slave_state[i];
+ if (s_state->active && s_state->last_cmd !=
+ MLX4_COMM_CMD_RESET) {
+ mlx4_warn(dev, "%s: slave: %d is still active\n",
+ __func__, i);
+ ret++;
+ }
+ }
+ return ret;
+}
+
+int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
+{
+ u32 qk = MLX4_RESERVED_QKEY_BASE;
+
+ if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
+ qpn < dev->phys_caps.base_proxy_sqpn)
+ return -EINVAL;
+
+ if (qpn >= dev->phys_caps.base_tunnel_sqpn)
+ /* tunnel qp */
+ qk += qpn - dev->phys_caps.base_tunnel_sqpn;
+ else
+ qk += qpn - dev->phys_caps.base_proxy_sqpn;
+ *qkey = qk;
+ return 0;
+}
+EXPORT_SYMBOL(mlx4_get_parav_qkey);
+
+void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
+{
+ struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
+
+ if (!mlx4_is_master(dev))
+ return;
+
+ priv->virt2phys_pkey[slave][port - 1][i] = val;
+}
+EXPORT_SYMBOL(mlx4_sync_pkey_table);
+
+void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
+{
+ struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
+
+ if (!mlx4_is_master(dev))
+ return;
+
+ priv->slave_node_guids[slave] = guid;
+}
+EXPORT_SYMBOL(mlx4_put_slave_node_guid);
+
+__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
+
+ if (!mlx4_is_master(dev))
+ return 0;
+
+ return priv->slave_node_guids[slave];
+}
+EXPORT_SYMBOL(mlx4_get_slave_node_guid);
+
+int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_slave;
+
+ if (!mlx4_is_master(dev))
+ return 0;
+
+ s_slave = &priv->mfunc.master.slave_state[slave];
+ return !!s_slave->active;
+}
+EXPORT_SYMBOL(mlx4_is_slave_active);
+
+static void slave_adjust_steering_mode(struct mlx4_dev *dev,
+ struct mlx4_dev_cap *dev_cap,
+ struct mlx4_init_hca_param *hca_param)
+{
+ dev->caps.steering_mode = hca_param->steering_mode;
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED)
+ dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
+ else
+ dev->caps.num_qp_per_mgm =
+ 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
+
+ mlx4_dbg(dev, "Steering mode is: %s\n",
+ mlx4_steering_mode_str(dev->caps.steering_mode));
+}
+
+static int mlx4_slave_cap(struct mlx4_dev *dev)
+{
+ int err;
+ u32 page_size;
+ struct mlx4_dev_cap dev_cap;
+ struct mlx4_func_cap func_cap;
+ struct mlx4_init_hca_param hca_param;
+ int i;
+
+ memset(&hca_param, 0, sizeof(hca_param));
+ err = mlx4_QUERY_HCA(dev, &hca_param);
+ if (err) {
+ mlx4_err(dev, "QUERY_HCA command failed, aborting.\n");
+ return err;
+ }
- list_for_each_entry(config, &config_list, list) {
- if (config->pdev == dev->pdev) {
- for (i = 1; i <= dev->caps.num_ports; i++)
- config->port_type[i] = dev->caps.possible_type[i];
- return 0;
+ /*fail if the hca has an unknown capability */
+ if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) !=
+ HCA_GLOBAL_CAP_MASK) {
+ mlx4_err(dev, "Unknown hca global capabilities\n");
+ return -ENOSYS;
+ }
+
+ mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz;
+
+ dev->caps.hca_core_clock = hca_param.hca_core_clock;
+
+ memset(&dev_cap, 0, sizeof(dev_cap));
+ dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp;
+ err = mlx4_dev_cap(dev, &dev_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+ return err;
+ }
+
+ err = mlx4_QUERY_FW(dev);
+ if (err)
+ mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n");
+
+ page_size = ~dev->caps.page_size_cap + 1;
+ mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
+ if (page_size > PAGE_SIZE) {
+ mlx4_err(dev, "HCA minimum page size of %d bigger than "
+ "kernel PAGE_SIZE of %d, aborting.\n",
+ page_size, PAGE_SIZE);
+ return -ENODEV;
+ }
+
+ /* slave gets uar page size from QUERY_HCA fw command */
+ dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
+
+ /* TODO: relax this assumption */
+ if (dev->caps.uar_page_size != PAGE_SIZE) {
+ mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %d\n",
+ dev->caps.uar_page_size, PAGE_SIZE);
+ return -ENODEV;
+ }
+
+ memset(&func_cap, 0, sizeof(func_cap));
+ err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n",
+ err);
+ return err;
+ }
+
+ if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
+ PF_CONTEXT_BEHAVIOUR_MASK) {
+ mlx4_err(dev, "Unknown pf context behaviour\n");
+ return -ENOSYS;
+ }
+
+ dev->caps.num_ports = func_cap.num_ports;
+ dev->quotas.qp = func_cap.qp_quota;
+ dev->quotas.srq = func_cap.srq_quota;
+ dev->quotas.cq = func_cap.cq_quota;
+ dev->quotas.mpt = func_cap.mpt_quota;
+ dev->quotas.mtt = func_cap.mtt_quota;
+ dev->caps.num_qps = 1 << hca_param.log_num_qps;
+ dev->caps.num_srqs = 1 << hca_param.log_num_srqs;
+ dev->caps.num_cqs = 1 << hca_param.log_num_cqs;
+ dev->caps.num_mpts = 1 << hca_param.log_mpt_sz;
+ dev->caps.num_eqs = func_cap.max_eq;
+ dev->caps.reserved_eqs = func_cap.reserved_eq;
+ dev->caps.num_pds = MLX4_NUM_PDS;
+ dev->caps.num_mgms = 0;
+ dev->caps.num_amgms = 0;
+
+ if (dev->caps.num_ports > MLX4_MAX_PORTS) {
+ mlx4_err(dev, "HCA has %d ports, but we only support %d, "
+ "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS);
+ return -ENODEV;
+ }
+
+ dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+ dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+ dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+ dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+
+ if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
+ !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
+ err = -ENOMEM;
+ goto err_mem;
+ }
+
+ for (i = 1; i <= dev->caps.num_ports; ++i) {
+ err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_FUNC_CAP port command failed for"
+ " port %d, aborting (%d).\n", i, err);
+ goto err_mem;
}
+ dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
+ dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
+ dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
+ dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
+ dev->caps.port_mask[i] = dev->caps.port_type[i];
+ err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
+ &dev->caps.gid_table_len[i],
+ &dev->caps.pkey_table_len[i]);
+ if (err)
+ goto err_mem;
}
- config = kmalloc(sizeof(struct mlx4_port_config), GFP_KERNEL);
- if (!config)
- return -ENOMEM;
+ if (dev->caps.uar_page_size * (dev->caps.num_uars -
+ dev->caps.reserved_uars) >
+ pci_resource_len(dev->pdev, 2)) {
+ mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than "
+ "PCI resource 2 size of 0x%llx, aborting.\n",
+ dev->caps.uar_page_size * dev->caps.num_uars,
+ (unsigned long long) pci_resource_len(dev->pdev, 2));
+ err = -ENOMEM;
+ goto err_mem;
+ }
- config->pdev = dev->pdev;
- for (i = 1; i <= dev->caps.num_ports; i++)
- config->port_type[i] = dev->caps.possible_type[i];
+ if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
+ dev->caps.eqe_size = 64;
+ dev->caps.eqe_factor = 1;
+ } else {
+ dev->caps.eqe_size = 32;
+ dev->caps.eqe_factor = 0;
+ }
- list_add_tail(&config->list, &config_list);
+ if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
+ dev->caps.cqe_size = 64;
+ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+ } else {
+ dev->caps.cqe_size = 32;
+ }
+
+ slave_adjust_steering_mode(dev, &dev_cap, &hca_param);
return 0;
+
+err_mem:
+ kfree(dev->caps.qp0_tunnel);
+ kfree(dev->caps.qp0_proxy);
+ kfree(dev->caps.qp1_tunnel);
+ kfree(dev->caps.qp1_proxy);
+ dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
+ dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
+
+ return err;
}
/*
@@ -418,16 +774,15 @@ int mlx4_change_port_types(struct mlx4_dev *dev,
for (port = 0; port < dev->caps.num_ports; port++) {
/* Change the port type only if the new type is different
* from the current, and not set to Auto */
- if (port_types[port] != dev->caps.port_type[port + 1]) {
+ if (port_types[port] != dev->caps.port_type[port + 1])
change = 1;
- dev->caps.port_type[port + 1] = port_types[port];
- }
}
if (change) {
mlx4_unregister_device(dev);
for (port = 1; port <= dev->caps.num_ports; port++) {
mlx4_CLOSE_PORT(dev, port);
- err = mlx4_SET_PORT(dev, port);
+ dev->caps.port_type[port] = port_types[port - 1];
+ err = mlx4_SET_PORT(dev, port, -1);
if (err) {
mlx4_err(dev, "Failed to set port %d, "
"aborting\n", port);
@@ -435,7 +790,6 @@ int mlx4_change_port_types(struct mlx4_dev *dev,
}
}
mlx4_set_port_mask(dev);
- mlx4_save_config(dev);
err = mlx4_register_device(dev);
}
@@ -487,7 +841,7 @@ static ssize_t set_port_type(struct device *dev,
return -EINVAL;
}
- mlx4_stop_sense(mdev);
+ mlx4_stop_sense(mdev);
mutex_lock(&priv->port_mutex);
/* Possible type is always the one that was delivered */
mdev->caps.possible_type[info->port] = info->tmp_type;
@@ -499,14 +853,8 @@ static ssize_t set_port_type(struct device *dev,
types[i] = mdev->caps.port_type[i+1];
}
- if (priv->trig) {
- if (++priv->changed_ports < mdev->caps.num_ports)
- goto out;
- else
- priv->trig = priv->changed_ports = 0;
- }
-
- if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
+ if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
+ !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
for (i = 1; i <= mdev->caps.num_ports; i++) {
if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
mdev->caps.possible_type[i] = mdev->caps.port_type[i];
@@ -541,22 +889,97 @@ out:
return err ? err : count;
}
-static ssize_t trigger_port(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
+enum ibta_mtu {
+ IB_MTU_256 = 1,
+ IB_MTU_512 = 2,
+ IB_MTU_1024 = 3,
+ IB_MTU_2048 = 4,
+ IB_MTU_4096 = 5
+};
+
+static inline int int_to_ibta_mtu(int mtu)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct mlx4_dev *mdev = pci_get_drvdata(pdev);
- struct mlx4_priv *priv = container_of(mdev, struct mlx4_priv, dev);
+ switch (mtu) {
+ case 256: return IB_MTU_256;
+ case 512: return IB_MTU_512;
+ case 1024: return IB_MTU_1024;
+ case 2048: return IB_MTU_2048;
+ case 4096: return IB_MTU_4096;
+ default: return -1;
+ }
+}
- if (!priv)
- return -ENODEV;
+static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
+{
+ switch (mtu) {
+ case IB_MTU_256: return 256;
+ case IB_MTU_512: return 512;
+ case IB_MTU_1024: return 1024;
+ case IB_MTU_2048: return 2048;
+ case IB_MTU_4096: return 4096;
+ default: return -1;
+ }
+}
+static ssize_t show_port_ib_mtu(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
+ port_mtu_attr);
+ struct mlx4_dev *mdev = info->dev;
+
+ if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
+ mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
+
+ sprintf(buf, "%d\n",
+ ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
+ return strlen(buf);
+}
+
+static ssize_t set_port_ib_mtu(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
+ port_mtu_attr);
+ struct mlx4_dev *mdev = info->dev;
+ struct mlx4_priv *priv = mlx4_priv(mdev);
+ int err, port, mtu, ibta_mtu = -1;
+
+ if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
+ mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
+ return -EINVAL;
+ }
+
+ mtu = (int) simple_strtol(buf, NULL, 0);
+ ibta_mtu = int_to_ibta_mtu(mtu);
+
+ if (ibta_mtu < 0) {
+ mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
+ return -EINVAL;
+ }
+
+ mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
+
+ mlx4_stop_sense(mdev);
mutex_lock(&priv->port_mutex);
- priv->trig = 1;
+ mlx4_unregister_device(mdev);
+ for (port = 1; port <= mdev->caps.num_ports; port++) {
+ mlx4_CLOSE_PORT(mdev, port);
+ err = mlx4_SET_PORT(mdev, port, -1);
+ if (err) {
+ mlx4_err(mdev, "Failed to set port %d, "
+ "aborting\n", port);
+ goto err_set_port;
+ }
+ }
+ err = mlx4_register_device(mdev);
+err_set_port:
mutex_unlock(&priv->port_mutex);
- return count;
+ mlx4_start_sense(mdev);
+ return err ? err : count;
}
-DEVICE_ATTR(port_trigger, S_IWUGO, NULL, trigger_port);
static int mlx4_load_fw(struct mlx4_dev *dev)
{
@@ -597,6 +1020,7 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
{
struct mlx4_priv *priv = mlx4_priv(dev);
int err;
+ int num_eqs;
err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
cmpt_base +
@@ -626,12 +1050,13 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
if (err)
goto err_srq;
+ num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
+ dev->caps.num_eqs;
err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
cmpt_base +
((u64) (MLX4_CMPT_TYPE_EQ *
cmpt_entry_sz) << MLX4_CMPT_SHIFT),
- cmpt_entry_sz,
- dev->caps.num_eqs, dev->caps.num_eqs, 0, 0);
+ cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
if (err)
goto err_cq;
@@ -655,6 +1080,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
{
struct mlx4_priv *priv = mlx4_priv(dev);
u64 aux_pages;
+ int num_eqs;
int err;
err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
@@ -686,10 +1112,12 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
goto err_unmap_aux;
}
+
+ num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
+ dev->caps.num_eqs;
err = mlx4_init_icm_table(dev, &priv->eq_table.table,
init_hca->eqc_base, dev_cap->eqc_entry_sz,
- dev->caps.num_eqs, dev->caps.num_eqs,
- 0, 0);
+ num_eqs, num_eqs, 0, 0);
if (err) {
mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
goto err_unmap_cmpt;
@@ -709,7 +1137,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
init_hca->mtt_base,
dev->caps.mtt_entry_sz,
- dev->caps.num_mtt_segs,
+ dev->caps.num_mtts,
dev->caps.reserved_mtts, 1, 0);
if (err) {
mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
@@ -791,12 +1219,15 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
}
/*
- * It's not strictly required, but for simplicity just map the
- * whole multicast group table now. The table isn't very big
- * and it's a lot easier than trying to track ref counts.
+ * For flow steering device managed mode it is required to use
+ * mlx4_init_icm_table. For B0 steering mode it's not strictly
+ * required, but for simplicity just map the whole multicast
+ * group table now. The table isn't very big and it's a lot
+ * easier than trying to track ref counts.
*/
err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
- init_hca->mc_base, MLX4_MGM_ENTRY_SIZE,
+ init_hca->mc_base,
+ mlx4_get_mgm_entry_size(dev),
dev->caps.num_mgms + dev->caps.num_amgms,
dev->caps.num_mgms + dev->caps.num_amgms,
0, 0);
@@ -872,6 +1303,16 @@ static void mlx4_free_icms(struct mlx4_dev *dev)
mlx4_free_icm(dev, priv->fw.aux_icm, 0);
}
+static void mlx4_slave_exit(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ mutex_lock(&priv->cmd.slave_cmd_mutex);
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
+ mlx4_warn(dev, "Failed to close slave function.\n");
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+}
+
static int map_bf_area(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -879,8 +1320,13 @@ static int map_bf_area(struct mlx4_dev *dev)
resource_size_t bf_len;
int err = 0;
- bf_start = pci_resource_start(dev->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT);
- bf_len = pci_resource_len(dev->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT);
+ if (!dev->caps.bf_reg_size)
+ return -ENXIO;
+
+ bf_start = pci_resource_start(dev->pdev, 2) +
+ (dev->caps.num_uars << PAGE_SHIFT);
+ bf_len = pci_resource_len(dev->pdev, 2) -
+ (dev->caps.num_uars << PAGE_SHIFT);
priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
if (!priv->bf_mapping)
err = -ENOMEM;
@@ -894,177 +1340,473 @@ static void unmap_bf_area(struct mlx4_dev *dev)
io_mapping_free(mlx4_priv(dev)->bf_mapping);
}
+cycle_t mlx4_read_clock(struct mlx4_dev *dev)
+{
+ u32 clockhi, clocklo, clockhi1;
+ cycle_t cycles;
+ int i;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ for (i = 0; i < 10; i++) {
+ clockhi = swab32(readl(priv->clock_mapping));
+ clocklo = swab32(readl(priv->clock_mapping + 4));
+ clockhi1 = swab32(readl(priv->clock_mapping));
+ if (clockhi == clockhi1)
+ break;
+ }
+
+ cycles = (u64) clockhi << 32 | (u64) clocklo;
+
+ return cycles;
+}
+EXPORT_SYMBOL_GPL(mlx4_read_clock);
+
+
+static int map_internal_clock(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ priv->clock_mapping = ioremap(pci_resource_start(dev->pdev,
+ priv->fw.clock_bar) +
+ priv->fw.clock_offset, MLX4_CLOCK_SIZE);
+
+ if (!priv->clock_mapping)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void unmap_internal_clock(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (priv->clock_mapping)
+ iounmap(priv->clock_mapping);
+}
+
static void mlx4_close_hca(struct mlx4_dev *dev)
{
+ unmap_internal_clock(dev);
unmap_bf_area(dev);
- mlx4_CLOSE_HCA(dev, 0);
- mlx4_free_icms(dev);
- mlx4_UNMAP_FA(dev);
- mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
+ if (mlx4_is_slave(dev))
+ mlx4_slave_exit(dev);
+ else {
+ mlx4_CLOSE_HCA(dev, 0);
+ mlx4_free_icms(dev);
+ mlx4_UNMAP_FA(dev);
+ mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
+ }
+}
+
+static int mlx4_init_slave(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u64 dma = (u64) priv->mfunc.vhcr_dma;
+ int num_of_reset_retries = NUM_OF_RESET_RETRIES;
+ int ret_from_reset = 0;
+ u32 slave_read;
+ u32 cmd_channel_ver;
+
+ mutex_lock(&priv->cmd.slave_cmd_mutex);
+ priv->cmd.max_cmds = 1;
+ mlx4_warn(dev, "Sending reset\n");
+ ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
+ MLX4_COMM_TIME);
+ /* if we are in the middle of flr the slave will try
+ * NUM_OF_RESET_RETRIES times before leaving.*/
+ if (ret_from_reset) {
+ if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
+ msleep(SLEEP_TIME_IN_RESET);
+ while (ret_from_reset && num_of_reset_retries) {
+ mlx4_warn(dev, "slave is currently in the"
+ "middle of FLR. retrying..."
+ "(try num:%d)\n",
+ (NUM_OF_RESET_RETRIES -
+ num_of_reset_retries + 1));
+ ret_from_reset =
+ mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET,
+ 0, MLX4_COMM_TIME);
+ num_of_reset_retries = num_of_reset_retries - 1;
+ }
+ } else
+ goto err;
+ }
+
+ /* check the driver version - the slave I/F revision
+ * must match the master's */
+ slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
+ cmd_channel_ver = mlx4_comm_get_version();
+
+ if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
+ MLX4_COMM_GET_IF_REV(slave_read)) {
+ mlx4_err(dev, "slave driver version is not supported"
+ " by the master\n");
+ goto err;
+ }
+
+ mlx4_warn(dev, "Sending vhcr0\n");
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
+ MLX4_COMM_TIME))
+ goto err;
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
+ MLX4_COMM_TIME))
+ goto err;
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
+ MLX4_COMM_TIME))
+ goto err;
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME))
+ goto err;
+
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ return 0;
+
+err:
+ mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ return -EIO;
+}
+
+static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
+{
+ int i;
+
+ for (i = 1; i <= dev->caps.num_ports; i++) {
+ if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
+ dev->caps.gid_table_len[i] =
+ mlx4_get_slave_num_gids(dev, 0);
+ else
+ dev->caps.gid_table_len[i] = 1;
+ dev->caps.pkey_table_len[i] =
+ dev->phys_caps.pkey_phys_table_len[i] - 1;
+ }
+}
+
+static int choose_log_fs_mgm_entry_size(int qp_per_entry)
+{
+ int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
+
+ for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
+ i++) {
+ if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
+ break;
+ }
+
+ return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
+}
+
+static void choose_steering_mode(struct mlx4_dev *dev,
+ struct mlx4_dev_cap *dev_cap)
+{
+ // This is only valid to the integrated driver.
+ // The new ported mlx4_core driver is in B0 steering mode by default
+ // and the old mlx4_en driver is in A0 steering mode by default.
+ // If high_rate_steer == TRUE it means that A0 steering mode is on.
+ // The integration fix is to hard code high_rate_steer to TRUE.
+ high_rate_steer = 1;
+
+ if (high_rate_steer && !mlx4_is_mfunc(dev)) {
+ dev->caps.flags &= ~(MLX4_DEV_CAP_FLAG_VEP_MC_STEER |
+ MLX4_DEV_CAP_FLAG_VEP_UC_STEER);
+ dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_FS_EN;
+ }
+
+ if (mlx4_log_num_mgm_entry_size == -1 &&
+ dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
+ dev_cap->fs_log_max_ucast_qp_range_size == 0 &&
+ (!mlx4_is_mfunc(dev) ||
+ (dev_cap->fs_max_num_qp_per_entry >= (num_vfs + 1))) &&
+ choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
+ MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
+ dev->oper_log_mgm_entry_size =
+ choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
+ dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
+ dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
+ } else {
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
+ dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) {
+ dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
+ }
+ else {
+ dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
+
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
+ dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
+ mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags "
+ "set to use B0 steering. Falling back to A0 steering mode.\n");
+ }
+ dev->oper_log_mgm_entry_size =
+ mlx4_log_num_mgm_entry_size > 0 ?
+ mlx4_log_num_mgm_entry_size :
+ MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
+ dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
+ }
+ mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, "
+ "log_num_mgm_entry_size = %d\n",
+ mlx4_steering_mode_str(dev->caps.steering_mode),
+ dev->oper_log_mgm_entry_size, mlx4_log_num_mgm_entry_size);
}
static int mlx4_init_hca(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_dev_cap *dev_cap = NULL;
struct mlx4_adapter adapter;
- struct mlx4_dev_cap dev_cap;
struct mlx4_mod_stat_cfg mlx4_cfg;
struct mlx4_profile profile;
struct mlx4_init_hca_param init_hca;
- struct mlx4_port_config *config;
u64 icm_size;
int err;
- int i;
- err = mlx4_QUERY_FW(dev);
- if (err) {
- if (err == -EACCES)
- mlx4_info(dev, "non-primary physical function, skipping.\n");
- else
- mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
- return err;
- }
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_QUERY_FW(dev);
+ if (err) {
+ if (err == -EACCES)
+ mlx4_info(dev, "non-primary physical function, skipping.\n");
+ else
+ mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
+ return err;
+ }
- err = mlx4_load_fw(dev);
- if (err) {
- mlx4_err(dev, "Failed to start FW, aborting.\n");
- return err;
- }
+ err = mlx4_load_fw(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to start FW, aborting.\n");
+ return err;
+ }
- mlx4_cfg.log_pg_sz_m = 1;
- mlx4_cfg.log_pg_sz = 0;
- err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
- if (err)
- mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
+ mlx4_cfg.log_pg_sz_m = 1;
+ mlx4_cfg.log_pg_sz = 0;
+ err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
+ if (err)
+ mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
- err = mlx4_dev_cap(dev, &dev_cap);
- if (err) {
- mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
- goto err_stop_fw;
- }
+ dev_cap = kzalloc(sizeof *dev_cap, GFP_KERNEL);
+ if (!dev_cap) {
+ mlx4_err(dev, "Failed to allocate memory for dev_cap\n");
+ err = -ENOMEM;
+ goto err_stop_fw;
+ }
+
+ err = mlx4_dev_cap(dev, dev_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+ goto err_stop_fw;
+ }
- process_mod_param_profile();
- profile = default_profile;
+ choose_steering_mode(dev, dev_cap);
- list_for_each_entry(config, &config_list, list) {
- if (config->pdev == dev->pdev) {
- for (i = 1; i <= dev->caps.num_ports; i++) {
- dev->caps.possible_type[i] = config->port_type[i];
- if (config->port_type[i] != MLX4_PORT_TYPE_AUTO)
- dev->caps.port_type[i] = config->port_type[i];
- }
+ if (mlx4_is_master(dev))
+ mlx4_parav_master_pf_caps(dev);
+
+ process_mod_param_profile(&profile);
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ profile.num_mcg = MLX4_FS_NUM_MCG;
+
+ icm_size = mlx4_make_profile(dev, &profile, dev_cap,
+ &init_hca);
+ if ((long long) icm_size < 0) {
+ err = icm_size;
+ goto err_stop_fw;
+ }
+
+ dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
+
+ init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
+ init_hca.uar_page_sz = PAGE_SHIFT - 12;
+
+ err = mlx4_init_icm(dev, dev_cap, &init_hca, icm_size);
+ if (err)
+ goto err_stop_fw;
+
+ err = mlx4_INIT_HCA(dev, &init_hca);
+ if (err) {
+ mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
+ goto err_free_icm;
+ }
+ } else {
+ err = mlx4_init_slave(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to initialize slave\n");
+ return err;
}
- }
- mlx4_set_port_mask(dev);
- icm_size = mlx4_make_profile(dev, &profile, &dev_cap, &init_hca);
- if ((long long) icm_size < 0) {
- err = icm_size;
- goto err_stop_fw;
+ err = mlx4_slave_cap(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to obtain slave caps\n");
+ goto err_close;
+ }
}
if (map_bf_area(dev))
- mlx4_dbg(dev, "Kernel support for blue flame is not available for kernels < 2.6.28\n");
+ mlx4_dbg(dev, "Failed to map blue flame area\n");
- init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
-
- err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
- if (err)
- goto err_stop_fw;
+ /*
+ * Read HCA frequency by QUERY_HCA command
+ */
+ if (dev->caps.cq_timestamp) {
+ memset(&init_hca, 0, sizeof(init_hca));
+ err = mlx4_QUERY_HCA(dev, &init_hca);
+ if (err) {
+ mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n");
+ dev->caps.cq_timestamp = 0;
+ } else
+ dev->caps.hca_core_clock = init_hca.hca_core_clock;
+
+ /*
+ * In case we got HCA frequency 0 - disable timestamping
+ * to avoid dividing by zero
+ */
+ if (!dev->caps.hca_core_clock) {
+ dev->caps.cq_timestamp = 0;
+ mlx4_err(dev, "HCA frequency is 0. "
+ "Timestamping is not supported.");
+ }
- err = mlx4_INIT_HCA(dev, &init_hca);
- if (err) {
- mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
- goto err_free_icm;
+ /*
+ * Map internal clock, in case of failure disable timestamping
+ */
+ if (map_internal_clock(dev)) {
+ dev->caps.cq_timestamp = 0;
+ mlx4_err(dev, "Failed to map internal clock. "
+ "Timestamping is not supported.\n");
+ }
}
+ /*Only the master set the ports, all the rest got it from it.*/
+ if (!mlx4_is_slave(dev))
+ mlx4_set_port_mask(dev);
+
err = mlx4_QUERY_ADAPTER(dev, &adapter);
if (err) {
mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n");
- goto err_close;
+ goto unmap_bf;
}
priv->eq_table.inta_pin = adapter.inta_pin;
memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
+ if (!mlx4_is_slave(dev))
+ kfree(dev_cap);
+
return 0;
+unmap_bf:
+ unmap_internal_clock(dev);
+ unmap_bf_area(dev);
+
+ if (mlx4_is_slave(dev)) {
+ kfree(dev->caps.qp0_tunnel);
+ kfree(dev->caps.qp0_proxy);
+ kfree(dev->caps.qp1_tunnel);
+ kfree(dev->caps.qp1_proxy);
+ }
+
err_close:
- mlx4_CLOSE_HCA(dev, 0);
+ if (mlx4_is_slave(dev))
+ mlx4_slave_exit(dev);
+ else
+ mlx4_CLOSE_HCA(dev, 0);
err_free_icm:
- mlx4_free_icms(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_free_icms(dev);
err_stop_fw:
- unmap_bf_area(dev);
- mlx4_UNMAP_FA(dev);
- mlx4_free_icm(dev, priv->fw.fw_icm, 0);
-
+ if (!mlx4_is_slave(dev)) {
+ mlx4_UNMAP_FA(dev);
+ mlx4_free_icm(dev, priv->fw.fw_icm, 0);
+ if (dev_cap)
+ kfree(dev_cap);
+ }
return err;
}
static int mlx4_init_counters_table(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- int err;
- int nent;
-
- switch (dev->caps.counters_mode) {
- case MLX4_CUNTERS_BASIC:
- nent = dev->caps.max_basic_counters;
- break;
- case MLX4_CUNTERS_EXT:
- nent = dev->caps.max_ext_counters;
- break;
- default:
+ int res;
+ int nent_pow2;
+
+ if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
return -ENOENT;
- }
- err = mlx4_bitmap_init(&priv->counters_bitmap, nent, nent - 1, 0, 0);
- if (err)
- return err;
- return 0;
+ nent_pow2 = roundup_pow_of_two(dev->caps.max_counters);
+ res = mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2,
+ nent_pow2 - 1, 0,
+ nent_pow2 - dev->caps.max_counters);
+ if (res)
+ return res;
+
+ if (dev->caps.max_counters == dev->caps.max_basic_counters)
+ return 0;
+
+ res = mlx4_cmd(dev, MLX4_IF_STATE_EXTENDED, 0, 0,
+ MLX4_CMD_SET_IF_STAT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+
+ if (res)
+ mlx4_err(dev, "Failed to set extended counters (err=%d)\n",
+ res);
+ return res;
+
}
static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
{
- switch (dev->caps.counters_mode) {
- case MLX4_CUNTERS_BASIC:
- case MLX4_CUNTERS_EXT:
+ if (!mlx4_is_slave(dev) &&
+ (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
- break;
- default:
- break;
- }
}
-int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
+int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- switch (dev->caps.counters_mode) {
- case MLX4_CUNTERS_BASIC:
- case MLX4_CUNTERS_EXT:
- *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
- if (*idx == -1)
- return -ENOMEM;
- return 0;
- default:
+ if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
+ return -ENOENT;
+
+ *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
+ if (*idx == -1)
return -ENOMEM;
+
+ return 0;
+}
+
+int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
+{
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER,
+ RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (!err)
+ *idx = get_param_l(&out_param);
+
+ return err;
}
+ return __mlx4_counter_alloc(dev, idx);
}
EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
+void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
+{
+ mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx);
+ return;
+}
+
void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
{
- switch (dev->caps.counters_mode) {
- case MLX4_CUNTERS_BASIC:
- case MLX4_CUNTERS_EXT:
- mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx);
- return;
- default:
+ u64 in_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, idx);
+ mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE,
+ MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
return;
}
+ __mlx4_counter_free(dev, idx);
}
EXPORT_SYMBOL_GPL(mlx4_counter_free);
@@ -1078,18 +1820,19 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
err = mlx4_init_uar_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "user access region table, aborting.\n");
+ "user access region table (err=%d), aborting.\n",
+ err);
return err;
}
err = mlx4_uar_alloc(dev, &priv->driver_uar);
if (err) {
- mlx4_err(dev, "Failed to allocate driver access region, "
- "aborting.\n");
+ mlx4_err(dev, "Failed to allocate driver access region "
+ "(err=%d), aborting.\n", err);
goto err_uar_table_free;
}
- priv->kar = ioremap(priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+ priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
if (!priv->kar) {
mlx4_err(dev, "Couldn't map kernel access region, "
"aborting.\n");
@@ -1100,35 +1843,36 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
err = mlx4_init_pd_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "protection domain table, aborting.\n");
+ "protection domain table (err=%d), aborting.\n", err);
goto err_kar_unmap;
}
err = mlx4_init_xrcd_table(dev);
if (err) {
- mlx4_err(dev, "Failed to initialize extended "
- "reliably connected domain table, aborting.\n");
+ mlx4_err(dev, "Failed to initialize "
+ "reliable connection domain table (err=%d), "
+ "aborting.\n", err);
goto err_pd_table_free;
}
err = mlx4_init_mr_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "memory region table, aborting.\n");
+ "memory region table (err=%d), aborting.\n", err);
goto err_xrcd_table_free;
}
err = mlx4_init_eq_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "event queue table, aborting.\n");
+ "event queue table (err=%d), aborting.\n", err);
goto err_mr_table_free;
}
err = mlx4_cmd_use_events(dev);
if (err) {
mlx4_err(dev, "Failed to switch to event-driven "
- "firmware commands, aborting.\n");
+ "firmware commands (err=%d), aborting.\n", err);
goto err_eq_table_free;
}
@@ -1154,50 +1898,74 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
err = mlx4_init_cq_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "completion queue table, aborting.\n");
+ "completion queue table (err=%d), aborting.\n", err);
goto err_cmd_poll;
}
err = mlx4_init_srq_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "shared receive queue table, aborting.\n");
+ "shared receive queue table (err=%d), aborting.\n",
+ err);
goto err_cq_table_free;
}
err = mlx4_init_qp_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "queue pair table, aborting.\n");
+ "queue pair table (err=%d), aborting.\n", err);
goto err_srq_table_free;
}
- err = mlx4_init_mcg_table(dev);
- if (err) {
- mlx4_err(dev, "Failed to initialize "
- "multicast group table, aborting.\n");
- goto err_qp_table_free;
- }
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_init_mcg_table(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to initialize "
+ "multicast group table (err=%d), aborting.\n",
+ err);
+ goto err_qp_table_free;
+ }
- err = mlx4_init_counters_table(dev);
- if (err && err != -ENOENT) {
- mlx4_err(dev, "Failed to initialize counters table, aborting.\n");
- goto err_mcg_table_free;
- }
+ err = mlx4_init_counters_table(dev);
+ if (err && err != -ENOENT) {
+ mlx4_err(dev, "Failed to initialize counters table (err=%d), "
+ "aborting.\n", err);
+ goto err_mcg_table_free;
+ }
- for (port = 1; port <= dev->caps.num_ports; port++) {
- ib_port_default_caps = 0;
- err = mlx4_get_port_ib_caps(dev, port, &ib_port_default_caps);
- if (err)
- mlx4_warn(dev, "failed to get port %d default "
- "ib capabilities (%d). Continuing with "
- "caps = 0\n", port, err);
- dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
- err = mlx4_SET_PORT(dev, port);
- if (err) {
- mlx4_err(dev, "Failed to set port %d, aborting\n",
- port);
- goto err_counters_table_free;
+ for (port = 1; port <= dev->caps.num_ports; port++) {
+ ib_port_default_caps = 0;
+ err = mlx4_get_port_ib_caps(dev, port,
+ &ib_port_default_caps);
+ if (err)
+ mlx4_warn(dev, "failed to get port %d default "
+ "ib capabilities (%d). Continuing "
+ "with caps = 0\n", port, err);
+ dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
+
+ /* initialize per-slave default ib port capabilities */
+ if (mlx4_is_master(dev)) {
+ int i;
+ for (i = 0; i < dev->num_slaves; i++) {
+ if (i == mlx4_master_func_num(dev))
+ continue;
+ priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
+ ib_port_default_caps;
+ }
+ }
+
+ if (mlx4_is_mfunc(dev))
+ dev->caps.port_ib_mtu[port] = IB_MTU_2048;
+ else
+ dev->caps.port_ib_mtu[port] = IB_MTU_4096;
+
+ err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
+ dev->caps.pkey_table_len[port] : -1);
+ if (err) {
+ mlx4_err(dev, "Failed to set port %d (err=%d), "
+ "aborting\n", port, err);
+ goto err_counters_table_free;
+ }
}
}
@@ -1248,13 +2016,16 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct msix_entry *entries;
- int nreq;
+ int nreq = min_t(int, dev->caps.num_ports *
+ min_t(int, num_possible_cpus() + 1, MAX_MSIX_P_PORT)
+ + MSIX_LEGACY_SZ, MAX_MSIX);
int err;
int i;
if (msi_x) {
nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
- num_possible_cpus() + 1);
+ nreq);
+
entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
if (!entries)
goto no_msi;
@@ -1277,7 +2048,15 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
goto no_msi;
}
- dev->caps.num_comp_vectors = nreq - 1;
+ if (nreq <
+ MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) {
+ /*Working in legacy mode , all EQ's shared*/
+ dev->caps.comp_pool = 0;
+ dev->caps.num_comp_vectors = nreq - 1;
+ } else {
+ dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ;
+ dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1;
+ }
for (i = 0; i < nreq; ++i)
priv->eq_table.eq[i].irq = entries[i].vector;
@@ -1289,6 +2068,7 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
no_msi:
dev->caps.num_comp_vectors = 1;
+ dev->caps.comp_pool = 0;
for (i = 0; i < 2; ++i)
priv->eq_table.eq[i].irq = dev->pdev->irq;
@@ -1301,14 +2081,22 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
info->dev = dev;
info->port = port;
- mlx4_init_mac_table(dev, &info->mac_table);
- mlx4_init_vlan_table(dev, &info->vlan_table);
+ if (!mlx4_is_slave(dev)) {
+ mlx4_init_mac_table(dev, &info->mac_table);
+ mlx4_init_vlan_table(dev, &info->vlan_table);
+ info->base_qpn = mlx4_get_base_qpn(dev, port);
+ }
sprintf(info->dev_name, "mlx4_port%d", port);
info->port_attr.attr.name = info->dev_name;
- info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
+ if (mlx4_is_mfunc(dev))
+ info->port_attr.attr.mode = S_IRUGO;
+ else {
+ info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
+ info->port_attr.store = set_port_type;
+ }
info->port_attr.show = show_port_type;
- info->port_attr.store = set_port_type;
+ sysfs_attr_init(&info->port_attr.attr);
err = device_create_file(&dev->pdev->dev, &info->port_attr);
if (err) {
@@ -1316,6 +2104,24 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
info->port = -1;
}
+ sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
+ info->port_mtu_attr.attr.name = info->dev_mtu_name;
+ if (mlx4_is_mfunc(dev))
+ info->port_mtu_attr.attr.mode = S_IRUGO;
+ else {
+ info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR;
+ info->port_mtu_attr.store = set_port_ib_mtu;
+ }
+ info->port_mtu_attr.show = show_port_ib_mtu;
+ sysfs_attr_init(&info->port_mtu_attr.attr);
+
+ err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr);
+ if (err) {
+ mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
+ device_remove_file(&info->dev->pdev->dev, &info->port_attr);
+ info->port = -1;
+ }
+
return err;
}
@@ -1325,25 +2131,114 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
return;
device_remove_file(&info->dev->pdev->dev, &info->port_attr);
+ device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr);
+}
+
+static int mlx4_init_steering(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int num_entries = dev->caps.num_ports;
+ int i, j;
+
+ priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
+ if (!priv->steer)
+ return -ENOMEM;
+
+ for (i = 0; i < num_entries; i++)
+ for (j = 0; j < MLX4_NUM_STEERS; j++) {
+ INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
+ INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
+ }
+ return 0;
+}
+
+static void mlx4_clear_steering(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_steer_index *entry, *tmp_entry;
+ struct mlx4_promisc_qp *pqp, *tmp_pqp;
+ int num_entries = dev->caps.num_ports;
+ int i, j;
+
+ for (i = 0; i < num_entries; i++) {
+ for (j = 0; j < MLX4_NUM_STEERS; j++) {
+ list_for_each_entry_safe(pqp, tmp_pqp,
+ &priv->steer[i].promisc_qps[j],
+ list) {
+ list_del(&pqp->list);
+ kfree(pqp);
+ }
+ list_for_each_entry_safe(entry, tmp_entry,
+ &priv->steer[i].steer_entries[j],
+ list) {
+ list_del(&entry->list);
+ list_for_each_entry_safe(pqp, tmp_pqp,
+ &entry->duplicates,
+ list) {
+ list_del(&pqp->list);
+ kfree(pqp);
+ }
+ kfree(entry);
+ }
+ }
+ }
+ kfree(priv->steer);
}
-static int mlx4_init_trigger(struct mlx4_priv *priv)
+static int extended_func_num(struct pci_dev *pdev)
{
- memcpy(&priv->trigger_attr, &dev_attr_port_trigger,
- sizeof(struct device_attribute));
- return device_create_file(&priv->dev.pdev->dev, &priv->trigger_attr);
+ return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
+}
+
+#define MLX4_OWNER_BASE 0x8069c
+#define MLX4_OWNER_SIZE 4
+
+static int mlx4_get_ownership(struct mlx4_dev *dev)
+{
+ void __iomem *owner;
+ u32 ret;
+
+ if (pci_channel_offline(dev->pdev))
+ return -EIO;
+
+ owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
+ MLX4_OWNER_SIZE);
+ if (!owner) {
+ mlx4_err(dev, "Failed to obtain ownership bit\n");
+ return -ENOMEM;
+ }
+
+ ret = readl(owner);
+ iounmap(owner);
+ return (int) !!ret;
}
-static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
+static void mlx4_free_ownership(struct mlx4_dev *dev)
+{
+ void __iomem *owner;
+
+ if (pci_channel_offline(dev->pdev))
+ return;
+
+ owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
+ MLX4_OWNER_SIZE);
+ if (!owner) {
+ mlx4_err(dev, "Failed to obtain ownership bit\n");
+ return;
+ }
+ writel(0, owner);
+ msleep(1000);
+ iounmap(owner);
+}
+
+static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
{
struct mlx4_priv *priv;
struct mlx4_dev *dev;
int err;
int port;
- int i;
- printk(KERN_INFO PFX "Initializing %s\n",
- pci_name(pdev));
+ pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
err = pci_enable_device(pdev);
if (err) {
@@ -1351,13 +2246,24 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
"aborting.\n");
return err;
}
+ if (num_vfs > MLX4_MAX_NUM_VF) {
+ dev_err(&pdev->dev, "There are more VF's (%d) than allowed(%d)\n",
+ num_vfs, MLX4_MAX_NUM_VF);
+ return -EINVAL;
+ }
+ if (num_vfs < 0) {
+ dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
+ return -EINVAL;
+ }
/*
- * Check for BARs. We expect 0: 1MB
+ * Check for BARs.
*/
- if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
- pci_resource_len(pdev, 0) != 1 << 20) {
- dev_err(&pdev->dev, "Missing DCS, aborting.\n");
+ if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
+ !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+ dev_err(&pdev->dev, "Missing DCS, aborting."
+ "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%x)\n",
+ pci_dev_data, pci_resource_flags(pdev, 0));
err = -ENODEV;
goto err_disable_pdev;
}
@@ -1367,18 +2273,12 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_disable_pdev;
}
- err = pci_request_region(pdev, 0, DRV_NAME);
+ err = pci_request_regions(pdev, DRV_NAME);
if (err) {
- dev_err(&pdev->dev, "Cannot request control region, aborting.\n");
+ dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
goto err_disable_pdev;
}
- err = pci_request_region(pdev, 2, DRV_NAME);
- if (err) {
- dev_err(&pdev->dev, "Cannot request UAR region, aborting.\n");
- goto err_release_bar0;
- }
-
pci_set_master(pdev);
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
@@ -1387,7 +2287,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
- goto err_release_bar2;
+ goto err_release_regions;
}
}
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
@@ -1398,16 +2298,19 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
if (err) {
dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
"aborting.\n");
- goto err_release_bar2;
+ goto err_release_regions;
}
}
+ /* Allow large DMA segments, up to the firmware limit of 1 GB */
+ dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
+
priv = kzalloc(sizeof *priv, GFP_KERNEL);
if (!priv) {
dev_err(&pdev->dev, "Device struct alloc failed, "
"aborting.\n");
err = -ENOMEM;
- goto err_release_bar2;
+ goto err_release_regions;
}
dev = &priv->dev;
@@ -1419,47 +2322,154 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
INIT_LIST_HEAD(&priv->pgdir_list);
mutex_init(&priv->pgdir_mutex);
- for (i = 0; i < MLX4_MAX_PORTS; ++i)
- priv->iboe_counter_index[i] = -1;
INIT_LIST_HEAD(&priv->bf_list);
mutex_init(&priv->bf_mutex);
- /*
- * Now reset the HCA before we touch the PCI capabilities or
- * attempt a firmware command, since a boot ROM may have left
- * the HCA in an undefined state.
- */
- err = mlx4_reset(dev);
- if (err) {
- mlx4_err(dev, "Failed to reset HCA, aborting.\n");
- goto err_free_dev;
+ dev->rev_id = pdev->revision;
+ dev->numa_node = dev_to_node(&pdev->dev);
+ /* Detect if this device is a virtual function */
+ if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
+ /* When acting as pf, we normally skip vfs unless explicitly
+ * requested to probe them. */
+ if (num_vfs && extended_func_num(pdev) > probe_vf) {
+ mlx4_warn(dev, "Skipping virtual function:%d\n",
+ extended_func_num(pdev));
+ err = -ENODEV;
+ goto err_free_dev;
+ }
+ mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
+ dev->flags |= MLX4_FLAG_SLAVE;
+ } else {
+ /* We reset the device and enable SRIOV only for physical
+ * devices. Try to claim ownership on the device;
+ * if already taken, skip -- do not allow multiple PFs */
+ err = mlx4_get_ownership(dev);
+ if (err) {
+ if (err < 0)
+ goto err_free_dev;
+ else {
+ mlx4_warn(dev, "Multiple PFs not yet supported."
+ " Skipping PF.\n");
+ err = -EINVAL;
+ goto err_free_dev;
+ }
+ }
+
+ if (num_vfs) {
+ mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", num_vfs);
+ err = pci_enable_sriov(pdev, num_vfs);
+ if (err) {
+ mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n",
+ err);
+ err = 0;
+ } else {
+ mlx4_warn(dev, "Running in master mode\n");
+ dev->flags |= MLX4_FLAG_SRIOV |
+ MLX4_FLAG_MASTER;
+ dev->num_vfs = num_vfs;
+ }
+ }
+
+ atomic_set(&priv->opreq_count, 0);
+ INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
+
+ /*
+ * Now reset the HCA before we touch the PCI capabilities or
+ * attempt a firmware command, since a boot ROM may have left
+ * the HCA in an undefined state.
+ */
+ err = mlx4_reset(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to reset HCA, aborting.\n");
+ goto err_sriov;
+ }
}
- if (mlx4_cmd_init(dev)) {
+slave_start:
+ err = mlx4_cmd_init(dev);
+ if (err) {
mlx4_err(dev, "Failed to init command interface, aborting.\n");
- goto err_free_dev;
+ goto err_sriov;
+ }
+
+ /* In slave functions, the communication channel must be initialized
+ * before posting commands. Also, init num_slaves before calling
+ * mlx4_init_hca */
+ if (mlx4_is_mfunc(dev)) {
+ if (mlx4_is_master(dev))
+ dev->num_slaves = MLX4_MAX_NUM_SLAVES;
+ else {
+ dev->num_slaves = 0;
+ err = mlx4_multi_func_init(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to init slave mfunc"
+ " interface, aborting.\n");
+ goto err_cmd;
+ }
+ }
}
err = mlx4_init_hca(dev);
- if (err)
- goto err_cmd;
+ if (err) {
+ if (err == -EACCES) {
+ /* Not primary Physical function
+ * Running in slave mode */
+ mlx4_cmd_cleanup(dev);
+ dev->flags |= MLX4_FLAG_SLAVE;
+ dev->flags &= ~MLX4_FLAG_MASTER;
+ goto slave_start;
+ } else
+ goto err_mfunc;
+ }
+
+ /* In master functions, the communication channel must be initialized
+ * after obtaining its address from fw */
+ if (mlx4_is_master(dev)) {
+ err = mlx4_multi_func_init(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to init master mfunc"
+ "interface, aborting.\n");
+ goto err_close;
+ }
+ }
err = mlx4_alloc_eq_table(dev);
if (err)
- goto err_close;
+ goto err_master_mfunc;
+
+ priv->msix_ctl.pool_bm = 0;
+ mutex_init(&priv->msix_ctl.pool_lock);
mlx4_enable_msi_x(dev);
+ if ((mlx4_is_mfunc(dev)) &&
+ !(dev->flags & MLX4_FLAG_MSI_X)) {
+ err = -ENOSYS;
+ mlx4_err(dev, "INTx is not supported in multi-function mode."
+ " aborting.\n");
+ goto err_free_eq;
+ }
+
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_init_steering(dev);
+ if (err)
+ goto err_free_eq;
+ }
err = mlx4_setup_hca(dev);
- if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X)) {
+ if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
+ !mlx4_is_mfunc(dev)) {
dev->flags &= ~MLX4_FLAG_MSI_X;
+ dev->caps.num_comp_vectors = 1;
+ dev->caps.comp_pool = 0;
pci_disable_msix(pdev);
err = mlx4_setup_hca(dev);
}
if (err)
- goto err_free_eq;
+ goto err_steer;
+
+ mlx4_init_quotas(dev);
for (port = 1; port <= dev->caps.num_ports; port++) {
err = mlx4_init_port_info(dev, port);
@@ -1471,24 +2481,17 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
if (err)
goto err_port;
- err = mlx4_init_trigger(priv);
- if (err)
- goto err_register;
-
err = mlx4_sense_init(dev);
if (err)
- goto err_trigger;
+ goto err_port;
mlx4_start_sense(dev);
+ priv->pci_dev_data = pci_dev_data;
pci_set_drvdata(pdev, dev);
return 0;
-err_trigger:
- device_remove_file(&dev->pdev->dev, &priv->trigger_attr);
-err_register:
- mlx4_unregister_device(dev);
err_port:
for (--port; port >= 1; --port)
mlx4_cleanup_port_info(&priv->port[port]);
@@ -1505,26 +2508,49 @@ err_port:
mlx4_cleanup_pd_table(dev);
mlx4_cleanup_uar_table(dev);
+err_steer:
+ if (!mlx4_is_slave(dev))
+ mlx4_clear_steering(dev);
+
err_free_eq:
mlx4_free_eq_table(dev);
+err_master_mfunc:
+ if (mlx4_is_master(dev))
+ mlx4_multi_func_cleanup(dev);
+
+ if (mlx4_is_slave(dev)) {
+ kfree(dev->caps.qp0_tunnel);
+ kfree(dev->caps.qp0_proxy);
+ kfree(dev->caps.qp1_tunnel);
+ kfree(dev->caps.qp1_proxy);
+ }
+
err_close:
if (dev->flags & MLX4_FLAG_MSI_X)
pci_disable_msix(pdev);
mlx4_close_hca(dev);
+err_mfunc:
+ if (mlx4_is_slave(dev))
+ mlx4_multi_func_cleanup(dev);
+
err_cmd:
mlx4_cmd_cleanup(dev);
+err_sriov:
+ if (dev->flags & MLX4_FLAG_SRIOV)
+ pci_disable_sriov(pdev);
+
+ if (!mlx4_is_slave(dev))
+ mlx4_free_ownership(dev);
+
err_free_dev:
kfree(priv);
-err_release_bar2:
- pci_release_region(pdev, 2);
-
-err_release_bar0:
- pci_release_region(pdev, 0);
+err_release_regions:
+ pci_release_regions(pdev);
err_disable_pdev:
pci_disable_device(pdev);
@@ -1535,14 +2561,9 @@ err_disable_pdev:
static int __devinit mlx4_init_one(struct pci_dev *pdev,
const struct pci_device_id *id)
{
- static int mlx4_version_printed;
+ printk_once(KERN_INFO "%s", mlx4_version);
- if (!mlx4_version_printed) {
- printk(KERN_INFO "%s", mlx4_version);
- ++mlx4_version_printed;
- }
-
- return __mlx4_init_one(pdev, id);
+ return __mlx4_init_one(pdev, id->driver_data);
}
static void mlx4_remove_one(struct pci_dev *pdev)
@@ -1552,16 +2573,25 @@ static void mlx4_remove_one(struct pci_dev *pdev)
int p;
if (dev) {
+ /* in SRIOV it is not allowed to unload the pf's
+ * driver while there are alive vf's */
+ if (mlx4_is_master(dev)) {
+ if (mlx4_how_many_lives_vf(dev))
+ mlx4_err(dev, "Removing PF when there are assigned VF's !!!\n");
+ }
mlx4_sense_cleanup(dev);
mlx4_unregister_device(dev);
- device_remove_file(&dev->pdev->dev, &priv->trigger_attr);
for (p = 1; p <= dev->caps.num_ports; p++) {
mlx4_cleanup_port_info(&priv->port[p]);
mlx4_CLOSE_PORT(dev, p);
}
- mlx4_cleanup_counters_table(dev);
+ if (mlx4_is_master(dev))
+ mlx4_free_resource_tracker(dev,
+ RES_TR_FREE_SLAVES_ONLY);
+
+ mlx4_cleanup_counters_table(dev);
mlx4_cleanup_mcg_table(dev);
mlx4_cleanup_qp_table(dev);
mlx4_cleanup_srq_table(dev);
@@ -1572,19 +2602,40 @@ static void mlx4_remove_one(struct pci_dev *pdev)
mlx4_cleanup_xrcd_table(dev);
mlx4_cleanup_pd_table(dev);
+ if (mlx4_is_master(dev))
+ mlx4_free_resource_tracker(dev,
+ RES_TR_FREE_STRUCTS_ONLY);
+
iounmap(priv->kar);
mlx4_uar_free(dev, &priv->driver_uar);
mlx4_cleanup_uar_table(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_clear_steering(dev);
mlx4_free_eq_table(dev);
+ if (mlx4_is_master(dev))
+ mlx4_multi_func_cleanup(dev);
mlx4_close_hca(dev);
+ if (mlx4_is_slave(dev))
+ mlx4_multi_func_cleanup(dev);
mlx4_cmd_cleanup(dev);
if (dev->flags & MLX4_FLAG_MSI_X)
pci_disable_msix(pdev);
+ if (dev->flags & MLX4_FLAG_SRIOV) {
+ mlx4_warn(dev, "Disabling SR-IOV\n");
+ pci_disable_sriov(pdev);
+ }
+
+ if (!mlx4_is_slave(dev))
+ mlx4_free_ownership(dev);
+
+ kfree(dev->caps.qp0_tunnel);
+ kfree(dev->caps.qp0_proxy);
+ kfree(dev->caps.qp1_tunnel);
+ kfree(dev->caps.qp1_proxy);
kfree(priv);
- pci_release_region(pdev, 2);
- pci_release_region(pdev, 0);
+ pci_release_regions(pdev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
}
@@ -1592,66 +2643,187 @@ static void mlx4_remove_one(struct pci_dev *pdev)
int mlx4_restart_one(struct pci_dev *pdev)
{
+ struct mlx4_dev *dev = pci_get_drvdata(pdev);
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int pci_dev_data;
+
+ pci_dev_data = priv->pci_dev_data;
mlx4_remove_one(pdev);
- return __mlx4_init_one(pdev, NULL);
+ return __mlx4_init_one(pdev, pci_dev_data);
}
-static struct pci_device_id mlx4_pci_table[] = {
- { PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */
- { PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */
- { PCI_VDEVICE(MELLANOX, 0x6354) }, /* MT25408 "Hermon" QDR */
- { PCI_VDEVICE(MELLANOX, 0x6732) }, /* MT25408 "Hermon" DDR PCIe gen2 */
- { PCI_VDEVICE(MELLANOX, 0x673c) }, /* MT25408 "Hermon" QDR PCIe gen2 */
- { PCI_VDEVICE(MELLANOX, 0x6368) }, /* MT25408 "Hermon" EN 10GigE */
- { PCI_VDEVICE(MELLANOX, 0x6750) }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
- { PCI_VDEVICE(MELLANOX, 0x6372) }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */
- { PCI_VDEVICE(MELLANOX, 0x675a) }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
- { PCI_VDEVICE(MELLANOX, 0x6764) }, /* MT26468 ConnectX EN 10GigE PCIe gen2 */
- { PCI_VDEVICE(MELLANOX, 0x6746) }, /* MT26438 ConnectX VPI PCIe 2.0 5GT/s - IB QDR / 10GigE Virt+ */
- { PCI_VDEVICE(MELLANOX, 0x676e) }, /* MT26478 ConnectX EN 40GigE PCIe 2.0 5GT/s */
- { PCI_VDEVICE(MELLANOX, 0x6778) }, /* MT26488 ConnectX VPI PCIe 2.0 5GT/s - IB DDR / 10GigE Virt+ */
- { PCI_VDEVICE(MELLANOX, 0x1000) },
- { PCI_VDEVICE(MELLANOX, 0x1001) },
- { PCI_VDEVICE(MELLANOX, 0x1002) },
- { PCI_VDEVICE(MELLANOX, 0x1003) },
- { PCI_VDEVICE(MELLANOX, 0x1004) },
- { PCI_VDEVICE(MELLANOX, 0x1005) },
- { PCI_VDEVICE(MELLANOX, 0x1006) },
- { PCI_VDEVICE(MELLANOX, 0x1007) },
- { PCI_VDEVICE(MELLANOX, 0x1008) },
- { PCI_VDEVICE(MELLANOX, 0x1009) },
- { PCI_VDEVICE(MELLANOX, 0x100a) },
- { PCI_VDEVICE(MELLANOX, 0x100b) },
- { PCI_VDEVICE(MELLANOX, 0x100c) },
- { PCI_VDEVICE(MELLANOX, 0x100d) },
- { PCI_VDEVICE(MELLANOX, 0x100e) },
- { PCI_VDEVICE(MELLANOX, 0x100f) },
+static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = {
+ /* MT25408 "Hermon" SDR */
+ { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" DDR */
+ { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" QDR */
+ { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" DDR PCIe gen2 */
+ { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" QDR PCIe gen2 */
+ { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" EN 10GigE */
+ { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
+ { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25458 ConnectX EN 10GBASE-T 10GigE */
+ { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
+ { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT26468 ConnectX EN 10GigE PCIe gen2*/
+ { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
+ { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT26478 ConnectX2 40GigE PCIe gen2 */
+ { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25400 Family [ConnectX-2 Virtual Function] */
+ { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF },
+ /* MT27500 Family [ConnectX-3] */
+ { PCI_VDEVICE(MELLANOX, 0x1003), 0 },
+ /* MT27500 Family [ConnectX-3 Virtual Function] */
+ { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF },
+ { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
+ { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
+ { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
+ { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */
+ { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */
+ { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */
{ 0, }
};
MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
+static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ mlx4_remove_one(pdev);
+
+ return state == pci_channel_io_perm_failure ?
+ PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
+{
+ int ret = __mlx4_init_one(pdev, 0);
+
+ return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
+}
+
+static const struct pci_error_handlers mlx4_err_handler = {
+ .error_detected = mlx4_pci_err_detected,
+ .slot_reset = mlx4_pci_slot_reset,
+};
+
+static int suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ mlx4_remove_one(pdev);
+
+ if (mlx4_log_num_mgm_entry_size != -1 &&
+ (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
+ mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) {
+ pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not "
+ "in legal range (-1 or %d..%d)\n",
+ mlx4_log_num_mgm_entry_size,
+ MLX4_MIN_MGM_LOG_ENTRY_SIZE,
+ MLX4_MAX_MGM_LOG_ENTRY_SIZE);
+ return -1;
+ }
+ return 0;
+}
+
+static int resume(struct pci_dev *pdev)
+{
+ return __mlx4_init_one(pdev, 0);
+}
+
static struct pci_driver mlx4_driver = {
.name = DRV_NAME,
- .id_table = mlx4_pci_table,
+ .id_table = (struct pci_device_id*)mlx4_pci_table,
.probe = mlx4_init_one,
- .remove = __devexit_p(mlx4_remove_one)
+ .remove = __devexit_p(mlx4_remove_one),
+ .suspend = suspend,
+ .resume = resume,
+ .err_handler = (struct pci_error_handlers*)&mlx4_err_handler,
};
static int __init mlx4_verify_params(void)
{
if ((log_num_mac < 0) || (log_num_mac > 7)) {
- printk(KERN_WARNING "mlx4_core: bad num_mac: %d\n", log_num_mac);
+ pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac);
+ return -1;
+ }
+
+ if (log_num_vlan != 0)
+ pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
+ MLX4_LOG_NUM_VLANS);
+
+ if (mlx4_set_4k_mtu != -1)
+ pr_warning("mlx4_core: set_4k_mtu - obsolete module param\n");
+
+ if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) {
+ pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
+ return -1;
+ }
+
+ /* Check if module param for ports type has legal combination */
+ if (port_type_array[0] == false && port_type_array[1] == true) {
+ pr_warning("mlx4_core: module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
+ port_type_array[0] = true;
+ }
+
+ if (mlx4_log_num_mgm_entry_size != -1 &&
+ (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
+ mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) {
+ pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not "
+ "in legal range (-1 or %d..%d)\n",
+ mlx4_log_num_mgm_entry_size,
+ MLX4_MIN_MGM_LOG_ENTRY_SIZE,
+ MLX4_MAX_MGM_LOG_ENTRY_SIZE);
+ return -1;
+ }
+
+ if (mod_param_profile.num_qp < 18 || mod_param_profile.num_qp > 23) {
+ pr_warning("mlx4_core: bad log_num_qp: %d\n",
+ mod_param_profile.num_qp);
+ return -1;
+ }
+
+ if (mod_param_profile.num_srq < 10) {
+ pr_warning("mlx4_core: too low log_num_srq: %d\n",
+ mod_param_profile.num_srq);
return -1;
}
- if (log_mtts_per_seg == 0)
- log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
- if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
- printk(KERN_WARNING "mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
+ if (mod_param_profile.num_cq < 10) {
+ pr_warning("mlx4_core: too low log_num_cq: %d\n",
+ mod_param_profile.num_cq);
return -1;
}
+ if (mod_param_profile.num_mpt < 10) {
+ pr_warning("mlx4_core: too low log_num_mpt: %d\n",
+ mod_param_profile.num_mpt);
+ return -1;
+ }
+
+ if (mod_param_profile.num_mtt && mod_param_profile.num_mtt < 15) {
+ pr_warning("mlx4_core: too low log_num_mtt: %d\n",
+ mod_param_profile.num_mtt);
+ return -1;
+ }
+
+ if (mod_param_profile.num_mtt > MLX4_MAX_LOG_NUM_MTT) {
+ pr_warning("mlx4_core: too high log_num_mtt: %d\n",
+ mod_param_profile.num_mtt);
+ return -1;
+ }
return 0;
}
@@ -1659,8 +2831,6 @@ static int __init mlx4_init(void)
{
int ret;
- mutex_init(&drv_mutex);
-
if (mlx4_verify_params())
return -EINVAL;
@@ -1670,20 +2840,26 @@ static int __init mlx4_init(void)
if (!mlx4_wq)
return -ENOMEM;
+ if (enable_sys_tune)
+ sys_tune_init();
+
ret = pci_register_driver(&mlx4_driver);
+ if (ret < 0 && enable_sys_tune)
+ sys_tune_fini();
+
return ret < 0 ? ret : 0;
}
static void __exit mlx4_cleanup(void)
{
- mutex_lock(&drv_mutex);
- mlx4_config_cleanup();
+ if (enable_sys_tune)
+ sys_tune_fini();
+
pci_unregister_driver(&mlx4_driver);
- mutex_unlock(&drv_mutex);
destroy_workqueue(mlx4_wq);
}
-module_init_order(mlx4_init, SI_ORDER_MIDDLE);
+module_init(mlx4_init);
module_exit(mlx4_cleanup);
#undef MODULE_VERSION
diff --git a/sys/ofed/drivers/net/mlx4/mcg.c b/sys/ofed/drivers/net/mlx4/mcg.c
index 70493e3..625e5e4 100644
--- a/sys/ofed/drivers/net/mlx4/mcg.c
+++ b/sys/ofed/drivers/net/mlx4/mcg.c
@@ -31,50 +31,88 @@
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/string.h>
-#include <linux/slab.h>
#include <linux/mlx4/cmd.h>
-#include <linux/mlx4/driver.h>
#include "mlx4.h"
-#define MGM_QPN_MASK 0x00FFFFFF
-#define MGM_BLCK_LB_BIT 30
-
-struct mlx4_mgm {
- __be32 next_gid_index;
- __be32 members_count;
- u32 reserved[2];
- u8 gid[16];
- __be32 qp[MLX4_QP_PER_MGM];
-};
static const u8 zero_gid[16]; /* automatically initialized to 0 */
-static int mlx4_READ_MCG(struct mlx4_dev *dev, int index,
- struct mlx4_cmd_mailbox *mailbox)
+int mlx4_get_mgm_entry_size(struct mlx4_dev *dev)
+{
+ return 1 << dev->oper_log_mgm_entry_size;
+}
+
+int mlx4_get_qp_per_mgm(struct mlx4_dev *dev)
+{
+ return 4 * (mlx4_get_mgm_entry_size(dev) / 16 - 2);
+}
+
+static int mlx4_QP_FLOW_STEERING_ATTACH(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *mailbox,
+ u32 size,
+ u64 *reg_id)
+{
+ u64 imm;
+ int err = 0;
+
+ err = mlx4_cmd_imm(dev, mailbox->dma, &imm, size, 0,
+ MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ *reg_id = imm;
+
+ return err;
+}
+
+static int mlx4_QP_FLOW_STEERING_DETACH(struct mlx4_dev *dev, u64 regid)
+{
+ int err = 0;
+
+ err = mlx4_cmd(dev, regid, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+
+ return err;
+}
+
+static int mlx4_READ_ENTRY(struct mlx4_dev *dev, int index,
+ struct mlx4_cmd_mailbox *mailbox)
{
return mlx4_cmd_box(dev, 0, mailbox->dma, index, 0, MLX4_CMD_READ_MCG,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
}
-static int mlx4_WRITE_MCG(struct mlx4_dev *dev, int index,
- struct mlx4_cmd_mailbox *mailbox)
+static int mlx4_WRITE_ENTRY(struct mlx4_dev *dev, int index,
+ struct mlx4_cmd_mailbox *mailbox)
{
return mlx4_cmd(dev, mailbox->dma, index, 0, MLX4_CMD_WRITE_MCG,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+}
+
+static int mlx4_WRITE_PROMISC(struct mlx4_dev *dev, u8 port, u8 steer,
+ struct mlx4_cmd_mailbox *mailbox)
+{
+ u32 in_mod;
+
+ in_mod = (u32) port << 16 | steer << 1;
+ return mlx4_cmd(dev, mailbox->dma, in_mod, 0x1,
+ MLX4_CMD_WRITE_MCG, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
}
-static int mlx4_MGID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
- u16 *hash)
+static int mlx4_GID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+ u16 *hash, u8 op_mod)
{
u64 imm;
int err;
- err = mlx4_cmd_imm(dev, mailbox->dma, &imm, 0, 0, MLX4_CMD_MGID_HASH,
- MLX4_CMD_TIME_CLASS_A);
+ err = mlx4_cmd_imm(dev, mailbox->dma, &imm, 0, op_mod,
+ MLX4_CMD_MGID_HASH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
if (!err)
*hash = imm;
@@ -82,6 +120,476 @@ static int mlx4_MGID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
return err;
}
+static struct mlx4_promisc_qp *get_promisc_qp(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ u32 qpn)
+{
+ struct mlx4_steer *s_steer = &mlx4_priv(dev)->steer[port - 1];
+ struct mlx4_promisc_qp *pqp;
+
+ list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) {
+ if (pqp->qpn == qpn)
+ return pqp;
+ }
+ /* not found */
+ return NULL;
+}
+
+/*
+ * Add new entry to steering data structure.
+ * All promisc QPs should be added as well
+ */
+static int new_steering_entry(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ unsigned int index, u32 qpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
+ u32 members_count;
+ struct mlx4_steer_index *new_entry;
+ struct mlx4_promisc_qp *pqp;
+ struct mlx4_promisc_qp *dqp = NULL;
+ u32 prot;
+ int err;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+ new_entry = kzalloc(sizeof *new_entry, GFP_KERNEL);
+ if (!new_entry)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&new_entry->duplicates);
+ new_entry->index = index;
+ list_add_tail(&new_entry->list, &s_steer->steer_entries[steer]);
+
+ /* If the given qpn is also a promisc qp,
+ * it should be inserted to duplicates list
+ */
+ pqp = get_promisc_qp(dev, port, steer, qpn);
+ if (pqp) {
+ dqp = kmalloc(sizeof *dqp, GFP_KERNEL);
+ if (!dqp) {
+ err = -ENOMEM;
+ goto out_alloc;
+ }
+ dqp->qpn = qpn;
+ list_add_tail(&dqp->list, &new_entry->duplicates);
+ }
+
+ /* if no promisc qps for this vep, we are done */
+ if (list_empty(&s_steer->promisc_qps[steer]))
+ return 0;
+
+ /* now need to add all the promisc qps to the new
+ * steering entry, as they should also receive the packets
+ * destined to this address */
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = -ENOMEM;
+ goto out_alloc;
+ }
+ mgm = mailbox->buf;
+
+ err = mlx4_READ_ENTRY(dev, index, mailbox);
+ if (err)
+ goto out_mailbox;
+
+ members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
+ prot = be32_to_cpu(mgm->members_count) >> 30;
+ list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) {
+ /* don't add already existing qpn */
+ if (pqp->qpn == qpn)
+ continue;
+ if (members_count == dev->caps.num_qp_per_mgm) {
+ /* out of space */
+ err = -ENOMEM;
+ goto out_mailbox;
+ }
+
+ /* add the qpn */
+ mgm->qp[members_count++] = cpu_to_be32(pqp->qpn & MGM_QPN_MASK);
+ }
+ /* update the qps count and update the entry with all the promisc qps*/
+ mgm->members_count = cpu_to_be32(members_count | (prot << 30));
+ err = mlx4_WRITE_ENTRY(dev, index, mailbox);
+
+out_mailbox:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ if (!err)
+ return 0;
+out_alloc:
+ if (dqp) {
+ list_del(&dqp->list);
+ kfree(dqp);
+ }
+ list_del(&new_entry->list);
+ kfree(new_entry);
+ return err;
+}
+
+/* update the data structures with existing steering entry */
+static int existing_steering_entry(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ unsigned int index, u32 qpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_steer_index *tmp_entry, *entry = NULL;
+ struct mlx4_promisc_qp *pqp;
+ struct mlx4_promisc_qp *dqp;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+
+ pqp = get_promisc_qp(dev, port, steer, qpn);
+ if (!pqp)
+ return 0; /* nothing to do */
+
+ list_for_each_entry(tmp_entry, &s_steer->steer_entries[steer], list) {
+ if (tmp_entry->index == index) {
+ entry = tmp_entry;
+ break;
+ }
+ }
+ if (unlikely(!entry)) {
+ mlx4_warn(dev, "Steering entry at index %x is not registered\n", index);
+ return -EINVAL;
+ }
+
+ /* the given qpn is listed as a promisc qpn
+ * we need to add it as a duplicate to this entry
+ * for future references */
+ list_for_each_entry(dqp, &entry->duplicates, list) {
+ if (qpn == pqp->qpn)
+ return 0; /* qp is already duplicated */
+ }
+
+ /* add the qp as a duplicate on this index */
+ dqp = kmalloc(sizeof *dqp, GFP_KERNEL);
+ if (!dqp)
+ return -ENOMEM;
+ dqp->qpn = qpn;
+ list_add_tail(&dqp->list, &entry->duplicates);
+
+ return 0;
+}
+
+/* Check whether a qpn is a duplicate on steering entry
+ * If so, it should not be removed from mgm */
+static bool check_duplicate_entry(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ unsigned int index, u32 qpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_steer_index *tmp_entry, *entry = NULL;
+ struct mlx4_promisc_qp *dqp, *tmp_dqp;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+
+ /* if qp is not promisc, it cannot be duplicated */
+ if (!get_promisc_qp(dev, port, steer, qpn))
+ return false;
+
+ /* The qp is promisc qp so it is a duplicate on this index
+ * Find the index entry, and remove the duplicate */
+ list_for_each_entry(tmp_entry, &s_steer->steer_entries[steer], list) {
+ if (tmp_entry->index == index) {
+ entry = tmp_entry;
+ break;
+ }
+ }
+ if (unlikely(!entry)) {
+ mlx4_warn(dev, "Steering entry for index %x is not registered\n", index);
+ return false;
+ }
+ list_for_each_entry_safe(dqp, tmp_dqp, &entry->duplicates, list) {
+ if (dqp->qpn == qpn) {
+ list_del(&dqp->list);
+ kfree(dqp);
+ }
+ }
+ return true;
+}
+
+/* I a steering entry contains only promisc QPs, it can be removed. */
+static bool can_remove_steering_entry(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ unsigned int index, u32 tqpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
+ struct mlx4_steer_index *entry = NULL, *tmp_entry;
+ u32 qpn;
+ u32 members_count;
+ bool ret = false;
+ int i;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return false;
+ mgm = mailbox->buf;
+
+ if (mlx4_READ_ENTRY(dev, index, mailbox))
+ goto out;
+ members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
+ for (i = 0; i < members_count; i++) {
+ qpn = be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK;
+ if (!get_promisc_qp(dev, port, steer, qpn) && qpn != tqpn) {
+ /* the qp is not promisc, the entry can't be removed */
+ goto out;
+ }
+ }
+ /* All the qps currently registered for this entry are promiscuous,
+ * Checking for duplicates */
+ ret = true;
+ list_for_each_entry_safe(entry, tmp_entry, &s_steer->steer_entries[steer], list) {
+ if (entry->index == index) {
+ if (list_empty(&entry->duplicates) || members_count == 1) {
+ struct mlx4_promisc_qp *pqp, *tmp_pqp;
+ /*
+ * If there is only 1 entry in duplicates than
+ * this is the QP we want to delete, going over
+ * the list and deleting the entry.
+ */
+ list_del(&entry->list);
+ list_for_each_entry_safe(pqp, tmp_pqp,
+ &entry->duplicates,
+ list) {
+ list_del(&pqp->list);
+ kfree(pqp);
+ }
+ kfree(entry);
+ } else {
+ /* This entry contains duplicates so it shouldn't be removed */
+ ret = false;
+ goto out;
+ }
+ }
+ }
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return ret;
+}
+
+static int add_promisc_qp(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer, u32 qpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
+ struct mlx4_steer_index *entry;
+ struct mlx4_promisc_qp *pqp;
+ struct mlx4_promisc_qp *dqp;
+ u32 members_count;
+ u32 prot;
+ int i;
+ bool found;
+ int err;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+
+ mutex_lock(&priv->mcg_table.mutex);
+
+ if (get_promisc_qp(dev, port, steer, qpn)) {
+ err = 0; /* Noting to do, already exists */
+ goto out_mutex;
+ }
+
+ pqp = kmalloc(sizeof *pqp, GFP_KERNEL);
+ if (!pqp) {
+ err = -ENOMEM;
+ goto out_mutex;
+ }
+ pqp->qpn = qpn;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = -ENOMEM;
+ goto out_alloc;
+ }
+ mgm = mailbox->buf;
+
+ /* the promisc qp needs to be added for each one of the steering
+ * entries, if it already exists, needs to be added as a duplicate
+ * for this entry */
+ list_for_each_entry(entry, &s_steer->steer_entries[steer], list) {
+ err = mlx4_READ_ENTRY(dev, entry->index, mailbox);
+ if (err)
+ goto out_mailbox;
+
+ members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
+ prot = be32_to_cpu(mgm->members_count) >> 30;
+ found = false;
+ for (i = 0; i < members_count; i++) {
+ if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qpn) {
+ /* Entry already exists, add to duplicates */
+ dqp = kmalloc(sizeof *dqp, GFP_KERNEL);
+ if (!dqp) {
+ err = -ENOMEM;
+ goto out_mailbox;
+ }
+ dqp->qpn = qpn;
+ list_add_tail(&dqp->list, &entry->duplicates);
+ found = true;
+ }
+ }
+ if (!found) {
+ /* Need to add the qpn to mgm */
+ if (members_count == dev->caps.num_qp_per_mgm) {
+ /* entry is full */
+ err = -ENOMEM;
+ goto out_mailbox;
+ }
+ mgm->qp[members_count++] = cpu_to_be32(qpn & MGM_QPN_MASK);
+ mgm->members_count = cpu_to_be32(members_count | (prot << 30));
+ err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox);
+ if (err)
+ goto out_mailbox;
+ }
+ }
+
+ /* add the new qpn to list of promisc qps */
+ list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]);
+ /* now need to add all the promisc qps to default entry */
+ memset(mgm, 0, sizeof *mgm);
+ members_count = 0;
+ list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list) {
+ if (members_count == dev->caps.num_qp_per_mgm) {
+ /* entry is full */
+ err = -ENOMEM;
+ goto out_list;
+ }
+ mgm->qp[members_count++] = cpu_to_be32(dqp->qpn & MGM_QPN_MASK);
+ }
+ mgm->members_count = cpu_to_be32(members_count | MLX4_PROT_ETH << 30);
+
+ err = mlx4_WRITE_PROMISC(dev, port, steer, mailbox);
+ if (err)
+ goto out_list;
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ mutex_unlock(&priv->mcg_table.mutex);
+ return 0;
+
+out_list:
+ list_del(&pqp->list);
+out_mailbox:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+out_alloc:
+ kfree(pqp);
+out_mutex:
+ mutex_unlock(&priv->mcg_table.mutex);
+ return err;
+}
+
+static int remove_promisc_qp(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer, u32 qpn)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_steer *s_steer;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
+ struct mlx4_steer_index *entry;
+ struct mlx4_promisc_qp *pqp;
+ struct mlx4_promisc_qp *dqp;
+ u32 members_count;
+ bool found;
+ bool back_to_list = false;
+ int i, loc = -1;
+ int err;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+ mutex_lock(&priv->mcg_table.mutex);
+
+ pqp = get_promisc_qp(dev, port, steer, qpn);
+ if (unlikely(!pqp)) {
+ mlx4_warn(dev, "QP %x is not promiscuous QP\n", qpn);
+ /* nothing to do */
+ err = 0;
+ goto out_mutex;
+ }
+
+ /*remove from list of promisc qps */
+ list_del(&pqp->list);
+
+ /* set the default entry not to include the removed one */
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = -ENOMEM;
+ back_to_list = true;
+ goto out_list;
+ }
+ mgm = mailbox->buf;
+ memset(mgm, 0, sizeof *mgm);
+ members_count = 0;
+ list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list)
+ mgm->qp[members_count++] = cpu_to_be32(dqp->qpn & MGM_QPN_MASK);
+ mgm->members_count = cpu_to_be32(members_count | MLX4_PROT_ETH << 30);
+
+ err = mlx4_WRITE_PROMISC(dev, port, steer, mailbox);
+ if (err)
+ goto out_mailbox;
+
+ /* remove the qp from all the steering entries*/
+ list_for_each_entry(entry, &s_steer->steer_entries[steer], list) {
+ found = false;
+ list_for_each_entry(dqp, &entry->duplicates, list) {
+ if (dqp->qpn == qpn) {
+ found = true;
+ break;
+ }
+ }
+ if (found) {
+ /* a duplicate, no need to change the mgm,
+ * only update the duplicates list */
+ list_del(&dqp->list);
+ kfree(dqp);
+ } else {
+ err = mlx4_READ_ENTRY(dev, entry->index, mailbox);
+ if (err)
+ goto out_mailbox;
+ members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
+ for (i = 0; i < members_count; ++i)
+ if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qpn) {
+ loc = i;
+ break;
+ }
+
+ if (loc < 0) {
+ mlx4_err(dev, "QP %06x wasn't found in entry %d\n",
+ qpn, entry->index);
+ err = -EINVAL;
+ goto out_mailbox;
+ }
+
+ /* copy the last QP in this MGM over removed QP */
+ mgm->qp[loc] = mgm->qp[members_count - 1];
+ mgm->qp[members_count - 1] = 0;
+ mgm->members_count = cpu_to_be32(--members_count |
+ (MLX4_PROT_ETH << 30));
+
+ err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox);
+ if (err)
+ goto out_mailbox;
+ }
+
+ }
+
+out_mailbox:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+out_list:
+ if (back_to_list)
+ list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]);
+ else
+ kfree(pqp);
+out_mutex:
+ mutex_unlock(&priv->mcg_table.mutex);
+ return err;
+}
+
/*
* Caller must hold MCG table semaphore. gid and mgm parameters must
* be properly aligned for command interface.
@@ -97,15 +605,18 @@ static int mlx4_MGID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
* If no AMGM exists for given gid, *index = -1, *prev = index of last
* entry in hash chain and *mgm holds end of hash chain.
*/
-static int find_mgm(struct mlx4_dev *dev,
- u8 *gid, enum mlx4_mcast_prot prot,
- struct mlx4_cmd_mailbox *mgm_mailbox,
- u16 *hash, int *prev, int *index)
+static int find_entry(struct mlx4_dev *dev, u8 port,
+ u8 *gid, enum mlx4_protocol prot,
+ struct mlx4_cmd_mailbox *mgm_mailbox,
+ int *prev, int *index)
{
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_mgm *mgm = mgm_mailbox->buf;
u8 *mgid;
int err;
+ u16 hash;
+ u8 op_mod = (prot == MLX4_PROT_ETH) ?
+ !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) : 0;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
@@ -114,24 +625,24 @@ static int find_mgm(struct mlx4_dev *dev,
memcpy(mgid, gid, 16);
- err = mlx4_MGID_HASH(dev, mailbox, hash);
+ err = mlx4_GID_HASH(dev, mailbox, &hash, op_mod);
mlx4_free_cmd_mailbox(dev, mailbox);
if (err)
return err;
if (0)
- mlx4_dbg(dev, "Hash for %pI6 is %04x\n", gid, *hash);
+ mlx4_dbg(dev, "Hash for %pI6 is %04x\n", gid, hash);
- *index = *hash;
+ *index = hash;
*prev = -1;
do {
- err = mlx4_READ_MCG(dev, *index, mgm_mailbox);
+ err = mlx4_READ_ENTRY(dev, *index, mgm_mailbox);
if (err)
return err;
- if (!memcmp(mgm->gid, zero_gid, 16)) {
- if (*index != *hash) {
+ if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) {
+ if (*index != hash) {
mlx4_err(dev, "Found zero MGID in AMGM.\n");
err = -EINVAL;
}
@@ -139,7 +650,7 @@ static int find_mgm(struct mlx4_dev *dev,
}
if (!memcmp(mgm->gid, gid, 16) &&
- (prot == be32_to_cpu(mgm->members_count) >> 30))
+ be32_to_cpu(mgm->members_count) >> 30 == prot)
return err;
*prev = *index;
@@ -150,18 +661,266 @@ static int find_mgm(struct mlx4_dev *dev,
return err;
}
-int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
- int block_mcast_loopback, enum mlx4_mcast_prot prot)
+static void trans_rule_ctrl_to_hw(struct mlx4_net_trans_rule *ctrl,
+ struct mlx4_net_trans_rule_hw_ctrl *hw)
+{
+ static const u8 __promisc_mode[] = {
+ [MLX4_FS_REGULAR] = 0x0,
+ [MLX4_FS_ALL_DEFAULT] = 0x1,
+ [MLX4_FS_MC_DEFAULT] = 0x3,
+ [MLX4_FS_UC_SNIFFER] = 0x4,
+ [MLX4_FS_MC_SNIFFER] = 0x5,
+ };
+
+ u32 dw = 0;
+
+ dw = ctrl->queue_mode == MLX4_NET_TRANS_Q_LIFO ? 1 : 0;
+ dw |= ctrl->exclusive ? (1 << 2) : 0;
+ dw |= ctrl->allow_loopback ? (1 << 3) : 0;
+ dw |= __promisc_mode[ctrl->promisc_mode] << 8;
+ dw |= ctrl->priority << 16;
+
+ hw->ctrl = cpu_to_be32(dw);
+ hw->port = ctrl->port;
+ hw->qpn = cpu_to_be32(ctrl->qpn);
+}
+
+const u16 __sw_id_hw[] = {
+ [MLX4_NET_TRANS_RULE_ID_ETH] = 0xE001,
+ [MLX4_NET_TRANS_RULE_ID_IB] = 0xE005,
+ [MLX4_NET_TRANS_RULE_ID_IPV6] = 0xE003,
+ [MLX4_NET_TRANS_RULE_ID_IPV4] = 0xE002,
+ [MLX4_NET_TRANS_RULE_ID_TCP] = 0xE004,
+ [MLX4_NET_TRANS_RULE_ID_UDP] = 0xE006
+};
+
+static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec,
+ struct _rule_hw *rule_hw)
+{
+ static const size_t __rule_hw_sz[] = {
+ [MLX4_NET_TRANS_RULE_ID_ETH] =
+ sizeof(struct mlx4_net_trans_rule_hw_eth),
+ [MLX4_NET_TRANS_RULE_ID_IB] =
+ sizeof(struct mlx4_net_trans_rule_hw_ib),
+ [MLX4_NET_TRANS_RULE_ID_IPV6] = 0,
+ [MLX4_NET_TRANS_RULE_ID_IPV4] =
+ sizeof(struct mlx4_net_trans_rule_hw_ipv4),
+ [MLX4_NET_TRANS_RULE_ID_TCP] =
+ sizeof(struct mlx4_net_trans_rule_hw_tcp_udp),
+ [MLX4_NET_TRANS_RULE_ID_UDP] =
+ sizeof(struct mlx4_net_trans_rule_hw_tcp_udp)
+ };
+ if (spec->id >= MLX4_NET_TRANS_RULE_NUM) {
+ mlx4_err(dev, "Invalid network rule id. id = %d\n", spec->id);
+ return -EINVAL;
+ }
+ memset(rule_hw, 0, __rule_hw_sz[spec->id]);
+ rule_hw->id = cpu_to_be16(__sw_id_hw[spec->id]);
+ rule_hw->size = __rule_hw_sz[spec->id] >> 2;
+
+ switch (spec->id) {
+ case MLX4_NET_TRANS_RULE_ID_ETH:
+ memcpy(rule_hw->eth.dst_mac, spec->eth.dst_mac, ETH_ALEN);
+ memcpy(rule_hw->eth.dst_mac_msk, spec->eth.dst_mac_msk,
+ ETH_ALEN);
+ memcpy(rule_hw->eth.src_mac, spec->eth.src_mac, ETH_ALEN);
+ memcpy(rule_hw->eth.src_mac_msk, spec->eth.src_mac_msk,
+ ETH_ALEN);
+ if (spec->eth.ether_type_enable) {
+ rule_hw->eth.ether_type_enable = 1;
+ rule_hw->eth.ether_type = spec->eth.ether_type;
+ }
+ rule_hw->eth.vlan_id = spec->eth.vlan_id;
+ rule_hw->eth.vlan_id_msk = spec->eth.vlan_id_msk;
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IB:
+ rule_hw->ib.r_u_qpn = spec->ib.r_u_qpn;
+ rule_hw->ib.qpn_mask = spec->ib.qpn_msk;
+ memcpy(&rule_hw->ib.dst_gid, &spec->ib.dst_gid, 16);
+ memcpy(&rule_hw->ib.dst_gid_msk, &spec->ib.dst_gid_msk, 16);
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IPV6:
+ return -EOPNOTSUPP;
+
+ case MLX4_NET_TRANS_RULE_ID_IPV4:
+ rule_hw->ipv4.src_ip = spec->ipv4.src_ip;
+ rule_hw->ipv4.src_ip_msk = spec->ipv4.src_ip_msk;
+ rule_hw->ipv4.dst_ip = spec->ipv4.dst_ip;
+ rule_hw->ipv4.dst_ip_msk = spec->ipv4.dst_ip_msk;
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_TCP:
+ case MLX4_NET_TRANS_RULE_ID_UDP:
+ rule_hw->tcp_udp.dst_port = spec->tcp_udp.dst_port;
+ rule_hw->tcp_udp.dst_port_msk = spec->tcp_udp.dst_port_msk;
+ rule_hw->tcp_udp.src_port = spec->tcp_udp.src_port;
+ rule_hw->tcp_udp.src_port_msk = spec->tcp_udp.src_port_msk;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return __rule_hw_sz[spec->id];
+}
+
+static void mlx4_err_rule(struct mlx4_dev *dev, char *str,
+ struct mlx4_net_trans_rule *rule)
+{
+#define BUF_SIZE 256
+ struct mlx4_spec_list *cur;
+ char buf[BUF_SIZE];
+ int len = 0;
+
+ mlx4_err(dev, "%s", str);
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "port = %d prio = 0x%x qp = 0x%x ",
+ rule->port, rule->priority, rule->qpn);
+
+ list_for_each_entry(cur, &rule->list, list) {
+ switch (cur->id) {
+ case MLX4_NET_TRANS_RULE_ID_ETH:
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dmac = %pM ", &cur->eth.dst_mac);
+ if (cur->eth.ether_type)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "ethertype = 0x%x ",
+ be16_to_cpu(cur->eth.ether_type));
+ if (cur->eth.vlan_id)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "vlan-id = %d ",
+ be16_to_cpu(cur->eth.vlan_id));
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IPV4:
+ if (cur->ipv4.src_ip)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "src-ip = %pI4 ",
+ &cur->ipv4.src_ip);
+ if (cur->ipv4.dst_ip)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dst-ip = %pI4 ",
+ &cur->ipv4.dst_ip);
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_TCP:
+ case MLX4_NET_TRANS_RULE_ID_UDP:
+ if (cur->tcp_udp.src_port)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "src-port = %d ",
+ be16_to_cpu(cur->tcp_udp.src_port));
+ if (cur->tcp_udp.dst_port)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dst-port = %d ",
+ be16_to_cpu(cur->tcp_udp.dst_port));
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IB:
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dst-gid = %pI6\n", cur->ib.dst_gid);
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dst-gid-mask = %pI6\n",
+ cur->ib.dst_gid_msk);
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IPV6:
+ break;
+
+ default:
+ break;
+ }
+ }
+ len += snprintf(buf + len, BUF_SIZE - len, "\n");
+ mlx4_err(dev, "%s", buf);
+
+ if (len >= BUF_SIZE)
+ mlx4_err(dev, "Network rule error message was truncated, print buffer is too small.\n");
+}
+
+int mlx4_flow_attach(struct mlx4_dev *dev,
+ struct mlx4_net_trans_rule *rule, u64 *reg_id)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_spec_list *cur;
+ u32 size = 0;
+ int ret;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ memset(mailbox->buf, 0, sizeof(struct mlx4_net_trans_rule_hw_ctrl));
+ trans_rule_ctrl_to_hw(rule, mailbox->buf);
+
+ size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+
+ list_for_each_entry(cur, &rule->list, list) {
+ ret = parse_trans_rule(dev, cur, mailbox->buf + size);
+ if (ret < 0) {
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return -EINVAL;
+ }
+ size += ret;
+ }
+
+ ret = mlx4_QP_FLOW_STEERING_ATTACH(dev, mailbox, size >> 2, reg_id);
+ if (ret == -ENOMEM)
+ mlx4_err_rule(dev,
+ "mcg table is full. Fail to register network rule.\n",
+ rule);
+ else if (ret)
+ mlx4_err_rule(dev, "Fail to register network rule.\n", rule);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_flow_attach);
+
+int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id)
+{
+ int err;
+
+ err = mlx4_QP_FLOW_STEERING_DETACH(dev, reg_id);
+ if (err)
+ mlx4_err(dev, "Fail to detach network rule. registration id = 0x%llx\n",
+ reg_id);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_flow_detach);
+
+int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, u32 max_range_qpn)
+{
+ int err;
+ u64 in_param;
+
+ in_param = ((u64) min_range_qpn) << 32;
+ in_param |= ((u64) max_range_qpn) & 0xFFFFFFFF;
+
+ err = mlx4_cmd(dev, in_param, 0, 0,
+ MLX4_FLOW_STEERING_IB_UC_QP_RANGE,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_FLOW_STEERING_IB_UC_QP_RANGE);
+
+int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ int block_mcast_loopback, enum mlx4_protocol prot,
+ enum mlx4_steer_type steer)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_mgm *mgm;
u32 members_count;
- u16 hash;
int index, prev;
int link = 0;
int i;
int err;
+ u8 port = gid[5];
+ u8 new_entry = 0;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
@@ -169,14 +928,16 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
mgm = mailbox->buf;
mutex_lock(&priv->mcg_table.mutex);
-
- err = find_mgm(dev, gid, prot, mailbox, &hash, &prev, &index);
+ err = find_entry(dev, port, gid, prot,
+ mailbox, &prev, &index);
if (err)
goto out;
if (index != -1) {
- if (!memcmp(mgm->gid, zero_gid, 16))
+ if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) {
+ new_entry = 1;
memcpy(mgm->gid, gid, 16);
+ }
} else {
link = 1;
@@ -188,12 +949,13 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
}
index += dev->caps.num_mgms;
+ new_entry = 1;
memset(mgm, 0, sizeof *mgm);
memcpy(mgm->gid, gid, 16);
}
members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
- if (members_count == MLX4_QP_PER_MGM) {
+ if (members_count == dev->caps.num_qp_per_mgm) {
mlx4_err(dev, "MGM at index %x is full.\n", index);
err = -ENOMEM;
goto out;
@@ -209,25 +971,34 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
mgm->qp[members_count++] = cpu_to_be32((qp->qpn & MGM_QPN_MASK) |
(!!mlx4_blck_lb << MGM_BLCK_LB_BIT));
- mgm->members_count = cpu_to_be32(members_count | ((u32) prot << 30));
+ mgm->members_count = cpu_to_be32(members_count | (u32) prot << 30);
- err = mlx4_WRITE_MCG(dev, index, mailbox);
+ err = mlx4_WRITE_ENTRY(dev, index, mailbox);
if (err)
goto out;
if (!link)
goto out;
- err = mlx4_READ_MCG(dev, prev, mailbox);
+ err = mlx4_READ_ENTRY(dev, prev, mailbox);
if (err)
goto out;
mgm->next_gid_index = cpu_to_be32(index << 6);
- err = mlx4_WRITE_MCG(dev, prev, mailbox);
+ err = mlx4_WRITE_ENTRY(dev, prev, mailbox);
if (err)
goto out;
+ if (prot == MLX4_PROT_ETH) {
+ /* manage the steering entry for promisc mode */
+ if (new_entry)
+ new_steering_entry(dev, port, steer, index, qp->qpn);
+ else
+ existing_steering_entry(dev, port, steer,
+ index, qp->qpn);
+ }
+
out:
if (err && link && index != -1) {
if (index < dev->caps.num_mgms)
@@ -242,19 +1013,19 @@ out:
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
-EXPORT_SYMBOL_GPL(mlx4_multicast_attach);
-int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
- enum mlx4_mcast_prot prot)
+int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ enum mlx4_protocol prot, enum mlx4_steer_type steer)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_mgm *mgm;
u32 members_count;
- u16 hash;
int prev, index;
- int i, loc;
+ int i, loc = -1;
int err;
+ u8 port = gid[5];
+ bool removed_entry = false;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
@@ -263,7 +1034,8 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
mutex_lock(&priv->mcg_table.mutex);
- err = find_mgm(dev, gid, prot, mailbox, &hash, &prev, &index);
+ err = find_entry(dev, port, gid, prot,
+ mailbox, &prev, &index);
if (err)
goto out;
@@ -273,10 +1045,17 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
goto out;
}
+ /* if this pq is also a promisc qp, it shouldn't be removed */
+ if (prot == MLX4_PROT_ETH &&
+ check_duplicate_entry(dev, port, steer, index, qp->qpn))
+ goto out;
+
members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
- for (loc = -1, i = 0; i < members_count; ++i)
- if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn)
+ for (i = 0; i < members_count; ++i)
+ if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) {
loc = i;
+ break;
+ }
if (loc == -1) {
mlx4_err(dev, "QP %06x not found in MGM\n", qp->qpn);
@@ -284,27 +1063,33 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
goto out;
}
+ /* copy the last QP in this MGM over removed QP */
+ mgm->qp[loc] = mgm->qp[members_count - 1];
+ mgm->qp[members_count - 1] = 0;
+ mgm->members_count = cpu_to_be32(--members_count | (u32) prot << 30);
- mgm->members_count = cpu_to_be32(--members_count | ((u32) prot << 30));
- mgm->qp[loc] = mgm->qp[i - 1];
- mgm->qp[i - 1] = 0;
-
- if (i != 1) {
- err = mlx4_WRITE_MCG(dev, index, mailbox);
+ if (prot == MLX4_PROT_ETH)
+ removed_entry = can_remove_steering_entry(dev, port, steer,
+ index, qp->qpn);
+ if (members_count && (prot != MLX4_PROT_ETH || !removed_entry)) {
+ err = mlx4_WRITE_ENTRY(dev, index, mailbox);
goto out;
}
+ /* We are going to delete the entry, members count should be 0 */
+ mgm->members_count = cpu_to_be32((u32) prot << 30);
+
if (prev == -1) {
/* Remove entry from MGM */
int amgm_index = be32_to_cpu(mgm->next_gid_index) >> 6;
if (amgm_index) {
- err = mlx4_READ_MCG(dev, amgm_index, mailbox);
+ err = mlx4_READ_ENTRY(dev, amgm_index, mailbox);
if (err)
goto out;
} else
memset(mgm->gid, 0, 16);
- err = mlx4_WRITE_MCG(dev, index, mailbox);
+ err = mlx4_WRITE_ENTRY(dev, index, mailbox);
if (err)
goto out;
@@ -319,13 +1104,13 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
} else {
/* Remove entry from AMGM */
int cur_next_index = be32_to_cpu(mgm->next_gid_index) >> 6;
- err = mlx4_READ_MCG(dev, prev, mailbox);
+ err = mlx4_READ_ENTRY(dev, prev, mailbox);
if (err)
goto out;
mgm->next_gid_index = cpu_to_be32(cur_next_index << 6);
- err = mlx4_WRITE_MCG(dev, prev, mailbox);
+ err = mlx4_WRITE_ENTRY(dev, prev, mailbox);
if (err)
goto out;
@@ -343,13 +1128,286 @@ out:
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
+
+static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp,
+ u8 gid[16], u8 attach, u8 block_loopback,
+ enum mlx4_protocol prot)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err = 0;
+ int qpn;
+
+ if (!mlx4_is_mfunc(dev))
+ return -EBADF;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ memcpy(mailbox->buf, gid, 16);
+ qpn = qp->qpn;
+ qpn |= (prot << 28);
+ if (attach && block_loopback)
+ qpn |= (1 << 31);
+
+ err = mlx4_cmd(dev, mailbox->dma, qpn, attach,
+ MLX4_CMD_QP_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ u8 port, int block_mcast_loopback,
+ enum mlx4_protocol prot, u64 *reg_id)
+{
+
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_A0:
+ if (prot == MLX4_PROT_ETH)
+ return 0;
+
+ case MLX4_STEERING_MODE_B0:
+ if (prot == MLX4_PROT_ETH)
+ gid[7] |= (MLX4_MC_STEER << 1);
+
+ if (mlx4_is_mfunc(dev))
+ return mlx4_QP_ATTACH(dev, qp, gid, 1,
+ block_mcast_loopback, prot);
+ return mlx4_qp_attach_common(dev, qp, gid,
+ block_mcast_loopback, prot,
+ MLX4_MC_STEER);
+
+ case MLX4_STEERING_MODE_DEVICE_MANAGED: {
+ struct mlx4_spec_list spec = { {NULL} };
+ __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+ struct mlx4_net_trans_rule rule = {
+ .queue_mode = MLX4_NET_TRANS_Q_FIFO,
+ .exclusive = 0,
+ .promisc_mode = MLX4_FS_REGULAR,
+ .priority = MLX4_DOMAIN_NIC,
+ };
+
+ rule.allow_loopback = !block_mcast_loopback;
+ rule.port = port;
+ rule.qpn = qp->qpn;
+ INIT_LIST_HEAD(&rule.list);
+
+ switch (prot) {
+ case MLX4_PROT_ETH:
+ spec.id = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(spec.eth.dst_mac, &gid[10], ETH_ALEN);
+ memcpy(spec.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
+ break;
+
+ case MLX4_PROT_IB_IPV6:
+ spec.id = MLX4_NET_TRANS_RULE_ID_IB;
+ memcpy(spec.ib.dst_gid, gid, 16);
+ memset(&spec.ib.dst_gid_msk, 0xff, 16);
+ break;
+ default:
+ return -EINVAL;
+ }
+ list_add_tail(&spec.list, &rule.list);
+
+ return mlx4_flow_attach(dev, &rule, reg_id);
+ }
+
+ default:
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL_GPL(mlx4_multicast_attach);
+
+int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ enum mlx4_protocol prot, u64 reg_id)
+{
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_A0:
+ if (prot == MLX4_PROT_ETH)
+ return 0;
+
+ case MLX4_STEERING_MODE_B0:
+ if (prot == MLX4_PROT_ETH)
+ gid[7] |= (MLX4_MC_STEER << 1);
+
+ if (mlx4_is_mfunc(dev))
+ return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot);
+
+ return mlx4_qp_detach_common(dev, qp, gid, prot,
+ MLX4_MC_STEER);
+
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ return mlx4_flow_detach(dev, reg_id);
+
+ default:
+ return -EINVAL;
+ }
+}
EXPORT_SYMBOL_GPL(mlx4_multicast_detach);
+int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port,
+ u32 qpn, enum mlx4_net_trans_promisc_mode mode)
+{
+ struct mlx4_net_trans_rule rule;
+ u64 *regid_p;
+
+ switch (mode) {
+ case MLX4_FS_ALL_DEFAULT:
+ regid_p = &dev->regid_promisc_array[port];
+ break;
+ case MLX4_FS_MC_DEFAULT:
+ regid_p = &dev->regid_allmulti_array[port];
+ break;
+ default:
+ return -1;
+ }
+
+ if (*regid_p != 0)
+ return -1;
+
+ rule.promisc_mode = mode;
+ rule.port = port;
+ rule.qpn = qpn;
+ INIT_LIST_HEAD(&rule.list);
+ mlx4_err(dev, "going promisc on %x\n", port);
+
+ return mlx4_flow_attach(dev, &rule, regid_p);
+}
+EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_add);
+
+int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port,
+ enum mlx4_net_trans_promisc_mode mode)
+{
+ int ret;
+ u64 *regid_p;
+
+ switch (mode) {
+ case MLX4_FS_ALL_DEFAULT:
+ regid_p = &dev->regid_promisc_array[port];
+ break;
+ case MLX4_FS_MC_DEFAULT:
+ regid_p = &dev->regid_allmulti_array[port];
+ break;
+ default:
+ return -1;
+ }
+
+ if (*regid_p == 0)
+ return -1;
+
+ ret = mlx4_flow_detach(dev, *regid_p);
+ if (ret == 0)
+ *regid_p = 0;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_remove);
+
+int mlx4_unicast_attach(struct mlx4_dev *dev,
+ struct mlx4_qp *qp, u8 gid[16],
+ int block_mcast_loopback, enum mlx4_protocol prot)
+{
+ if (prot == MLX4_PROT_ETH)
+ gid[7] |= (MLX4_UC_STEER << 1);
+
+ if (mlx4_is_mfunc(dev))
+ return mlx4_QP_ATTACH(dev, qp, gid, 1,
+ block_mcast_loopback, prot);
+
+ return mlx4_qp_attach_common(dev, qp, gid, block_mcast_loopback,
+ prot, MLX4_UC_STEER);
+}
+EXPORT_SYMBOL_GPL(mlx4_unicast_attach);
+
+int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp,
+ u8 gid[16], enum mlx4_protocol prot)
+{
+ if (prot == MLX4_PROT_ETH)
+ gid[7] |= (MLX4_UC_STEER << 1);
+
+ if (mlx4_is_mfunc(dev))
+ return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot);
+
+ return mlx4_qp_detach_common(dev, qp, gid, prot, MLX4_UC_STEER);
+}
+EXPORT_SYMBOL_GPL(mlx4_unicast_detach);
+
+int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ u32 qpn = (u32) vhcr->in_param & 0xffffffff;
+ u8 port = vhcr->in_param >> 62;
+ enum mlx4_steer_type steer = vhcr->in_modifier;
+
+ /* Promiscuous unicast is not allowed in mfunc */
+ if (mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER)
+ return 0;
+
+ if (vhcr->op_modifier)
+ return add_promisc_qp(dev, port, steer, qpn);
+ else
+ return remove_promisc_qp(dev, port, steer, qpn);
+}
+
+static int mlx4_PROMISC(struct mlx4_dev *dev, u32 qpn,
+ enum mlx4_steer_type steer, u8 add, u8 port)
+{
+ return mlx4_cmd(dev, (u64) qpn | (u64) port << 62, (u32) steer, add,
+ MLX4_CMD_PROMISC, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+}
+
+int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port)
+{
+ if (mlx4_is_mfunc(dev))
+ return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 1, port);
+
+ return add_promisc_qp(dev, port, MLX4_MC_STEER, qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_add);
+
+int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port)
+{
+ if (mlx4_is_mfunc(dev))
+ return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 0, port);
+
+ return remove_promisc_qp(dev, port, MLX4_MC_STEER, qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_remove);
+
+int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port)
+{
+ if (mlx4_is_mfunc(dev))
+ return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 1, port);
+
+ return add_promisc_qp(dev, port, MLX4_UC_STEER, qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_add);
+
+int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port)
+{
+ if (mlx4_is_mfunc(dev))
+ return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 0, port);
+
+ return remove_promisc_qp(dev, port, MLX4_UC_STEER, qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_remove);
+
int mlx4_init_mcg_table(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int err;
+ /* No need for mcg_table when fw managed the mcg table*/
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return 0;
err = mlx4_bitmap_init(&priv->mcg_table.bitmap, dev->caps.num_amgms,
dev->caps.num_amgms - 1, 0, 0);
if (err)
@@ -362,5 +1420,7 @@ int mlx4_init_mcg_table(struct mlx4_dev *dev)
void mlx4_cleanup_mcg_table(struct mlx4_dev *dev)
{
- mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap);
+ if (dev->caps.steering_mode !=
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap);
}
diff --git a/sys/ofed/drivers/net/mlx4/mlx4.h b/sys/ofed/drivers/net/mlx4/mlx4.h
index d5d3da9..b342d9a 100644
--- a/sys/ofed/drivers/net/mlx4/mlx4.h
+++ b/sys/ofed/drivers/net/mlx4/mlx4.h
@@ -39,28 +39,56 @@
#include <linux/mutex.h>
#include <linux/radix-tree.h>
+#include <linux/rbtree.h>
#include <linux/timer.h>
+#include <linux/semaphore.h>
#include <linux/workqueue.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/driver.h>
#include <linux/mlx4/doorbell.h>
+#include <linux/mlx4/cmd.h>
#define DRV_NAME "mlx4_core"
#define PFX DRV_NAME ": "
-#define DRV_VERSION "1.0-ofed1.5.2"
-#define DRV_RELDATE "August 4, 2010"
+#define DRV_VERSION "1.1"
+#define DRV_RELDATE "Dec, 2011"
+
+#define MLX4_FS_UDP_UC_EN (1 << 1)
+#define MLX4_FS_TCP_UC_EN (1 << 2)
+#define MLX4_FS_NUM_OF_L2_ADDR 8
+#define MLX4_FS_MGM_LOG_ENTRY_SIZE 7
+#define MLX4_FS_NUM_MCG (1 << 17)
+
+struct mlx4_set_port_prio2tc_context {
+ u8 prio2tc[4];
+};
+
+struct mlx4_port_scheduler_tc_cfg_be {
+ __be16 pg;
+ __be16 bw_precentage;
+ __be16 max_bw_units; /* 3-100Mbps, 4-1Gbps, other values - reserved */
+ __be16 max_bw_value;
+};
+
+struct mlx4_set_port_scheduler_context {
+ struct mlx4_port_scheduler_tc_cfg_be tc[MLX4_NUM_TC];
+};
enum {
MLX4_HCR_BASE = 0x80680,
MLX4_HCR_SIZE = 0x0001c,
- MLX4_CLR_INT_SIZE = 0x00008
+ MLX4_CLR_INT_SIZE = 0x00008,
+ MLX4_SLAVE_COMM_BASE = 0x0,
+ MLX4_COMM_PAGESIZE = 0x1000,
+ MLX4_CLOCK_SIZE = 0x00008
};
enum {
- MLX4_MGM_ENTRY_SIZE = 0x100,
- MLX4_QP_PER_MGM = 4 * (MLX4_MGM_ENTRY_SIZE / 16 - 2),
- MLX4_MTT_ENTRY_PER_SEG = 8
+ MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE = 10,
+ MLX4_MIN_MGM_LOG_ENTRY_SIZE = 7,
+ MLX4_MAX_MGM_LOG_ENTRY_SIZE = 12,
+ MLX4_MAX_QP_PER_MGM = 4 * ((1 << MLX4_MAX_MGM_LOG_ENTRY_SIZE)/16 - 2),
};
enum {
@@ -80,6 +108,100 @@ enum {
MLX4_NUM_CMPTS = MLX4_CMPT_NUM_TYPE << MLX4_CMPT_SHIFT
};
+enum mlx4_mr_state {
+ MLX4_MR_DISABLED = 0,
+ MLX4_MR_EN_HW,
+ MLX4_MR_EN_SW
+};
+
+#define MLX4_COMM_TIME 10000
+enum {
+ MLX4_COMM_CMD_RESET,
+ MLX4_COMM_CMD_VHCR0,
+ MLX4_COMM_CMD_VHCR1,
+ MLX4_COMM_CMD_VHCR2,
+ MLX4_COMM_CMD_VHCR_EN,
+ MLX4_COMM_CMD_VHCR_POST,
+ MLX4_COMM_CMD_FLR = 254
+};
+
+/*The flag indicates that the slave should delay the RESET cmd*/
+#define MLX4_DELAY_RESET_SLAVE 0xbbbbbbb
+/*indicates how many retries will be done if we are in the middle of FLR*/
+#define NUM_OF_RESET_RETRIES 10
+#define SLEEP_TIME_IN_RESET (2 * 1000)
+enum mlx4_resource {
+ RES_QP,
+ RES_CQ,
+ RES_SRQ,
+ RES_XRCD,
+ RES_MPT,
+ RES_MTT,
+ RES_MAC,
+ RES_VLAN,
+ RES_EQ,
+ RES_COUNTER,
+ RES_FS_RULE,
+ MLX4_NUM_OF_RESOURCE_TYPE
+};
+
+enum mlx4_alloc_mode {
+ RES_OP_RESERVE,
+ RES_OP_RESERVE_AND_MAP,
+ RES_OP_MAP_ICM,
+};
+
+enum mlx4_res_tracker_free_type {
+ RES_TR_FREE_ALL,
+ RES_TR_FREE_SLAVES_ONLY,
+ RES_TR_FREE_STRUCTS_ONLY,
+};
+
+/*
+ *Virtual HCR structures.
+ * mlx4_vhcr is the sw representation, in machine endianess
+ *
+ * mlx4_vhcr_cmd is the formalized structure, the one that is passed
+ * to FW to go through communication channel.
+ * It is big endian, and has the same structure as the physical HCR
+ * used by command interface
+ */
+struct mlx4_vhcr {
+ u64 in_param;
+ u64 out_param;
+ u32 in_modifier;
+ u32 errno;
+ u16 op;
+ u16 token;
+ u8 op_modifier;
+ u8 e_bit;
+};
+
+struct mlx4_vhcr_cmd {
+ __be64 in_param;
+ __be32 in_modifier;
+ __be64 out_param;
+ __be16 token;
+ u16 reserved;
+ u8 status;
+ u8 flags;
+ __be16 opcode;
+};
+
+struct mlx4_cmd_info {
+ u16 opcode;
+ bool has_inbox;
+ bool has_outbox;
+ bool out_is_imm;
+ bool encode_slave_id;
+ int (*verify)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox);
+ int (*wrapper)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+};
+
#ifdef CONFIG_MLX4_DEBUG
extern int mlx4_debug_level;
#else /* CONFIG_MLX4_DEBUG */
@@ -87,19 +209,25 @@ extern int mlx4_debug_level;
#endif /* CONFIG_MLX4_DEBUG */
#define mlx4_dbg(mdev, format, arg...) \
- do { \
- if (mlx4_debug_level) \
- dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ## arg); \
- } while (0)
+do { \
+ if (mlx4_debug_level) \
+ dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ##arg); \
+} while (0)
#define mlx4_err(mdev, format, arg...) \
- dev_err(&mdev->pdev->dev, format, ## arg)
+ dev_err(&mdev->pdev->dev, format, ##arg)
#define mlx4_info(mdev, format, arg...) \
- dev_info(&mdev->pdev->dev, format, ## arg)
+ dev_info(&mdev->pdev->dev, format, ##arg)
#define mlx4_warn(mdev, format, arg...) \
- dev_warn(&mdev->pdev->dev, format, ## arg)
+ dev_warn(&mdev->pdev->dev, format, ##arg)
+extern int mlx4_log_num_mgm_entry_size;
+extern int log_mtts_per_seg;
extern int mlx4_blck_lb;
+extern int mlx4_set_4k_mtu;
+
+#define MLX4_MAX_NUM_SLAVES (MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF)
+#define ALL_SLAVES 0xff
struct mlx4_bitmap {
u32 last;
@@ -115,7 +243,7 @@ struct mlx4_bitmap {
struct mlx4_buddy {
unsigned long **bits;
unsigned int *num_free;
- int max_order;
+ u32 max_order;
spinlock_t lock;
};
@@ -124,7 +252,7 @@ struct mlx4_icm;
struct mlx4_icm_table {
u64 virt;
int num_icm;
- int num_obj;
+ u32 num_obj;
int obj_size;
int lowmem;
int coherent;
@@ -132,6 +260,91 @@ struct mlx4_icm_table {
struct mlx4_icm **icm;
};
+/*
+ * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
+ */
+struct mlx4_mpt_entry {
+ __be32 flags;
+ __be32 qpn;
+ __be32 key;
+ __be32 pd_flags;
+ __be64 start;
+ __be64 length;
+ __be32 lkey;
+ __be32 win_cnt;
+ u8 reserved1[3];
+ u8 mtt_rep;
+ __be64 mtt_addr;
+ __be32 mtt_sz;
+ __be32 entity_size;
+ __be32 first_byte_offset;
+} __packed;
+
+/*
+ * Must be packed because start is 64 bits but only aligned to 32 bits.
+ */
+struct mlx4_eq_context {
+ __be32 flags;
+ u16 reserved1[3];
+ __be16 page_offset;
+ u8 log_eq_size;
+ u8 reserved2[4];
+ u8 eq_period;
+ u8 reserved3;
+ u8 eq_max_count;
+ u8 reserved4[3];
+ u8 intr;
+ u8 log_page_size;
+ u8 reserved5[2];
+ u8 mtt_base_addr_h;
+ __be32 mtt_base_addr_l;
+ u32 reserved6[2];
+ __be32 consumer_index;
+ __be32 producer_index;
+ u32 reserved7[4];
+};
+
+struct mlx4_cq_context {
+ __be32 flags;
+ u16 reserved1[3];
+ __be16 page_offset;
+ __be32 logsize_usrpage;
+ __be16 cq_period;
+ __be16 cq_max_count;
+ u8 reserved2[3];
+ u8 comp_eqn;
+ u8 log_page_size;
+ u8 reserved3[2];
+ u8 mtt_base_addr_h;
+ __be32 mtt_base_addr_l;
+ __be32 last_notified_index;
+ __be32 solicit_producer_index;
+ __be32 consumer_index;
+ __be32 producer_index;
+ u32 reserved4[2];
+ __be64 db_rec_addr;
+};
+
+struct mlx4_srq_context {
+ __be32 state_logsize_srqn;
+ u8 logstride;
+ u8 reserved1;
+ __be16 xrcd;
+ __be32 pg_offset_cqn;
+ u32 reserved2;
+ u8 log_page_size;
+ u8 reserved3[2];
+ u8 mtt_base_addr_h;
+ __be32 mtt_base_addr_l;
+ __be32 pd;
+ __be16 limit_watermark;
+ __be16 wqe_cnt;
+ u16 reserved4;
+ __be16 wqe_counter;
+ u32 reserved5;
+ __be64 db_rec_addr;
+};
+
struct mlx4_eq {
struct mlx4_dev *dev;
void __iomem *doorbell;
@@ -140,11 +353,22 @@ struct mlx4_eq {
u16 irq;
u16 have_irq;
int nent;
- int load;
+ int load;
struct mlx4_buf_list *page_list;
struct mlx4_mtt mtt;
};
+struct mlx4_slave_eqe {
+ u8 type;
+ u8 port;
+ u32 param;
+};
+
+struct mlx4_slave_event_eq_info {
+ int eqn;
+ u16 token;
+};
+
struct mlx4_profile {
int num_qp;
int rdmarc_per_qp;
@@ -152,24 +376,194 @@ struct mlx4_profile {
int num_cq;
int num_mcg;
int num_mpt;
- int num_mtt;
+ unsigned num_mtt;
};
struct mlx4_fw {
u64 clr_int_base;
u64 catas_offset;
+ u64 comm_base;
+ u64 clock_offset;
struct mlx4_icm *fw_icm;
struct mlx4_icm *aux_icm;
u32 catas_size;
u16 fw_pages;
u8 clr_int_bar;
u8 catas_bar;
+ u8 comm_bar;
+ u8 clock_bar;
+};
+
+struct mlx4_comm {
+ u32 slave_write;
+ u32 slave_read;
+};
+
+enum {
+ MLX4_MCAST_CONFIG = 0,
+ MLX4_MCAST_DISABLE = 1,
+ MLX4_MCAST_ENABLE = 2,
+};
+
+#define VLAN_FLTR_SIZE 128
+
+struct mlx4_vlan_fltr {
+ __be32 entry[VLAN_FLTR_SIZE];
+};
+
+struct mlx4_mcast_entry {
+ struct list_head list;
+ u64 addr;
+};
+
+struct mlx4_promisc_qp {
+ struct list_head list;
+ u32 qpn;
+};
+
+struct mlx4_steer_index {
+ struct list_head list;
+ unsigned int index;
+ struct list_head duplicates;
+};
+
+#define MLX4_EVENT_TYPES_NUM 64
+
+struct mlx4_slave_state {
+ u8 comm_toggle;
+ u8 last_cmd;
+ u8 init_port_mask;
+ bool active;
+ u8 function;
+ dma_addr_t vhcr_dma;
+ u16 mtu[MLX4_MAX_PORTS + 1];
+ __be32 ib_cap_mask[MLX4_MAX_PORTS + 1];
+ struct mlx4_slave_eqe eq[MLX4_MFUNC_MAX_EQES];
+ struct list_head mcast_filters[MLX4_MAX_PORTS + 1];
+ struct mlx4_vlan_fltr *vlan_filter[MLX4_MAX_PORTS + 1];
+ /* event type to eq number lookup */
+ struct mlx4_slave_event_eq_info event_eq[MLX4_EVENT_TYPES_NUM];
+ u16 eq_pi;
+ u16 eq_ci;
+ spinlock_t lock;
+ /*initialized via the kzalloc*/
+ u8 is_slave_going_down;
+ u32 cookie;
+ enum slave_port_state port_state[MLX4_MAX_PORTS + 1];
+};
+
+#define MLX4_VGT 4095
+#define NO_INDX (-1)
+
+struct mlx4_vport_state {
+ u64 mac;
+ u16 default_vlan;
+ u8 default_qos;
+ u32 tx_rate;
+ bool spoofchk;
+};
+
+struct mlx4_vf_admin_state {
+ struct mlx4_vport_state vport[MLX4_MAX_PORTS + 1];
+};
+
+struct mlx4_vport_oper_state {
+ struct mlx4_vport_state state;
+ int mac_idx;
+ int vlan_idx;
+};
+struct mlx4_vf_oper_state {
+ struct mlx4_vport_oper_state vport[MLX4_MAX_PORTS + 1];
+};
+
+struct slave_list {
+ struct mutex mutex;
+ struct list_head res_list[MLX4_NUM_OF_RESOURCE_TYPE];
+};
+
+struct resource_allocator {
+ spinlock_t alloc_lock;
+ union {
+ int res_reserved;
+ int res_port_rsvd[MLX4_MAX_PORTS];
+ };
+ union {
+ int res_free;
+ int res_port_free[MLX4_MAX_PORTS];
+ };
+ int *quota;
+ int *allocated;
+ int *guaranteed;
+};
+
+struct mlx4_resource_tracker {
+ spinlock_t lock;
+ /* tree for each resources */
+ struct rb_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE];
+ /* num_of_slave's lists, one per slave */
+ struct slave_list *slave_list;
+ struct resource_allocator res_alloc[MLX4_NUM_OF_RESOURCE_TYPE];
+};
+
+#define SLAVE_EVENT_EQ_SIZE 128
+struct mlx4_slave_event_eq {
+ u32 eqn;
+ u32 cons;
+ u32 prod;
+ spinlock_t event_lock;
+ struct mlx4_eqe event_eqe[SLAVE_EVENT_EQ_SIZE];
+};
+
+struct mlx4_master_qp0_state {
+ int proxy_qp0_active;
+ int qp0_active;
+ int port_active;
+};
+
+struct mlx4_mfunc_master_ctx {
+ struct mlx4_slave_state *slave_state;
+ struct mlx4_vf_admin_state *vf_admin;
+ struct mlx4_vf_oper_state *vf_oper;
+ struct mlx4_master_qp0_state qp0_state[MLX4_MAX_PORTS + 1];
+ int init_port_ref[MLX4_MAX_PORTS + 1];
+ u16 max_mtu[MLX4_MAX_PORTS + 1];
+ int disable_mcast_ref[MLX4_MAX_PORTS + 1];
+ struct mlx4_resource_tracker res_tracker;
+ struct workqueue_struct *comm_wq;
+ struct work_struct comm_work;
+ struct work_struct slave_event_work;
+ struct work_struct slave_flr_event_work;
+ spinlock_t slave_state_lock;
+ __be32 comm_arm_bit_vector[4];
+ struct mlx4_eqe cmd_eqe;
+ struct mlx4_slave_event_eq slave_eq;
+ struct mutex gen_eqe_mutex[MLX4_MFUNC_MAX];
+};
+
+struct mlx4_mfunc {
+ struct mlx4_comm __iomem *comm;
+ struct mlx4_vhcr_cmd *vhcr;
+ dma_addr_t vhcr_dma;
+
+ struct mlx4_mfunc_master_ctx master;
+};
+
+#define MGM_QPN_MASK 0x00FFFFFF
+#define MGM_BLCK_LB_BIT 30
+
+struct mlx4_mgm {
+ __be32 next_gid_index;
+ __be32 members_count;
+ u32 reserved[2];
+ u8 gid[16];
+ __be32 qp[MLX4_MAX_QP_PER_MGM];
};
struct mlx4_cmd {
struct pci_pool *pool;
void __iomem *hcr;
struct mutex hcr_mutex;
+ struct mutex slave_cmd_mutex;
struct semaphore poll_sem;
struct semaphore event_sem;
int max_cmds;
@@ -179,6 +573,7 @@ struct mlx4_cmd {
u16 token_mask;
u8 use_events;
u8 toggle;
+ u8 comm_toggle;
};
struct mlx4_uar_table {
@@ -218,6 +613,7 @@ struct mlx4_eq_table {
struct mlx4_srq_table {
struct mlx4_bitmap bitmap;
spinlock_t lock;
+ struct radix_tree_root tree;
struct mlx4_icm_table table;
struct mlx4_icm_table cmpt_table;
};
@@ -268,14 +664,59 @@ struct mlx4_vlan_table {
int max;
};
+#define SET_PORT_GEN_ALL_VALID 0x7
+#define SET_PORT_PROMISC_SHIFT 31
+#define SET_PORT_MC_PROMISC_SHIFT 30
+
+enum {
+ MCAST_DIRECT_ONLY = 0,
+ MCAST_DIRECT = 1,
+ MCAST_DEFAULT = 2
+};
+
+
+struct mlx4_set_port_general_context {
+ u8 reserved[3];
+ u8 flags;
+ u16 reserved2;
+ __be16 mtu;
+ u8 pptx;
+ u8 pfctx;
+ u16 reserved3;
+ u8 pprx;
+ u8 pfcrx;
+ u16 reserved4;
+};
+
+struct mlx4_set_port_rqp_calc_context {
+ __be32 base_qpn;
+ u8 rererved;
+ u8 n_mac;
+ u8 n_vlan;
+ u8 n_prio;
+ u8 reserved2[3];
+ u8 mac_miss;
+ u8 intra_no_vlan;
+ u8 no_vlan;
+ u8 intra_vlan_miss;
+ u8 vlan_miss;
+ u8 reserved3[3];
+ u8 no_vlan_prio;
+ __be32 promisc;
+ __be32 mcast;
+};
+
struct mlx4_port_info {
struct mlx4_dev *dev;
int port;
char dev_name[16];
struct device_attribute port_attr;
enum mlx4_port_type tmp_type;
+ char dev_mtu_name[16];
+ struct device_attribute port_mtu_attr;
struct mlx4_mac_table mac_table;
struct mlx4_vlan_table vlan_table;
+ int base_qpn;
};
struct mlx4_sense {
@@ -283,11 +724,107 @@ struct mlx4_sense {
u8 do_sense_port[MLX4_MAX_PORTS + 1];
u8 sense_allowed[MLX4_MAX_PORTS + 1];
struct delayed_work sense_poll;
- struct workqueue_struct *sense_wq;
- u32 resched;
+ struct workqueue_struct *sense_wq;
+ u32 resched;
+};
+
+struct mlx4_msix_ctl {
+ u64 pool_bm;
+ struct mutex pool_lock;
+};
+
+struct mlx4_steer {
+ struct list_head promisc_qps[MLX4_NUM_STEERS];
+ struct list_head steer_entries[MLX4_NUM_STEERS];
+};
+
+struct mlx4_net_trans_rule_hw_ctrl {
+ __be32 ctrl;
+ u8 rsvd1;
+ u8 funcid;
+ u8 vep;
+ u8 port;
+ __be32 qpn;
+ __be32 rsvd2;
};
-extern struct mutex drv_mutex;
+struct mlx4_net_trans_rule_hw_ib {
+ u8 size;
+ u8 rsvd1;
+ __be16 id;
+ u32 rsvd2;
+ __be32 r_u_qpn;
+ __be32 qpn_mask;
+ u8 dst_gid[16];
+ u8 dst_gid_msk[16];
+} __packed;
+
+struct mlx4_net_trans_rule_hw_eth {
+ u8 size;
+ u8 rsvd;
+ __be16 id;
+ u8 rsvd1[6];
+ u8 dst_mac[6];
+ u16 rsvd2;
+ u8 dst_mac_msk[6];
+ u16 rsvd3;
+ u8 src_mac[6];
+ u16 rsvd4;
+ u8 src_mac_msk[6];
+ u8 rsvd5;
+ u8 ether_type_enable;
+ __be16 ether_type;
+ __be16 vlan_id_msk;
+ __be16 vlan_id;
+} __packed;
+
+struct mlx4_net_trans_rule_hw_tcp_udp {
+ u8 size;
+ u8 rsvd;
+ __be16 id;
+ __be16 rsvd1[3];
+ __be16 dst_port;
+ __be16 rsvd2;
+ __be16 dst_port_msk;
+ __be16 rsvd3;
+ __be16 src_port;
+ __be16 rsvd4;
+ __be16 src_port_msk;
+} __packed;
+
+struct mlx4_net_trans_rule_hw_ipv4 {
+ u8 size;
+ u8 rsvd;
+ __be16 id;
+ __be32 rsvd1;
+ __be32 dst_ip;
+ __be32 dst_ip_msk;
+ __be32 src_ip;
+ __be32 src_ip_msk;
+} __packed;
+
+struct _rule_hw {
+ union {
+ struct {
+ u8 size;
+ u8 rsvd;
+ __be16 id;
+ };
+ struct mlx4_net_trans_rule_hw_eth eth;
+ struct mlx4_net_trans_rule_hw_ib ib;
+ struct mlx4_net_trans_rule_hw_ipv4 ipv4;
+ struct mlx4_net_trans_rule_hw_tcp_udp tcp_udp;
+ };
+};
+
+enum {
+ MLX4_PCI_DEV_IS_VF = 1 << 0,
+ MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1,
+};
+
+struct mlx4_roce_gid_entry {
+ u8 raw[16];
+};
struct mlx4_priv {
struct mlx4_dev dev;
@@ -296,11 +833,14 @@ struct mlx4_priv {
struct list_head ctx_list;
spinlock_t ctx_lock;
+ int pci_dev_data;
+
struct list_head pgdir_list;
struct mutex pgdir_mutex;
struct mlx4_fw fw;
struct mlx4_cmd cmd;
+ struct mlx4_mfunc mfunc;
struct mlx4_bitmap pd_bitmap;
struct mlx4_bitmap xrcd_bitmap;
@@ -312,8 +852,6 @@ struct mlx4_priv {
struct mlx4_qp_table qp_table;
struct mlx4_mcg_table mcg_table;
struct mlx4_bitmap counters_bitmap;
- struct list_head bf_list;
- struct mutex bf_mutex;
struct mlx4_catas_err catas_err;
@@ -322,13 +860,21 @@ struct mlx4_priv {
struct mlx4_uar driver_uar;
void __iomem *kar;
struct mlx4_port_info port[MLX4_MAX_PORTS + 1];
- struct device_attribute trigger_attr;
- int trig;
- int changed_ports;
struct mlx4_sense sense;
struct mutex port_mutex;
- int iboe_counter_index[MLX4_MAX_PORTS];
- struct io_mapping *bf_mapping;
+ struct mlx4_msix_ctl msix_ctl;
+ struct mlx4_steer *steer;
+ struct list_head bf_list;
+ struct mutex bf_mutex;
+ struct io_mapping *bf_mapping;
+ void __iomem *clock_mapping;
+ int reserved_mtts;
+ int fs_hash_mode;
+ u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
+ __be64 slave_node_guids[MLX4_MFUNC_MAX];
+ struct mlx4_roce_gid_entry roce_gids[MLX4_MAX_PORTS][128];
+ atomic_t opreq_count;
+ struct work_struct opreq_task;
};
static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
@@ -342,7 +888,8 @@ extern struct workqueue_struct *mlx4_wq;
u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap);
void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj);
-u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align);
+u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt,
+ int align, u32 skip_mask);
void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt);
u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap);
int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask,
@@ -365,6 +912,7 @@ int mlx4_init_srq_table(struct mlx4_dev *dev);
int mlx4_init_mcg_table(struct mlx4_dev *dev);
void mlx4_cleanup_pd_table(struct mlx4_dev *dev);
+void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev);
void mlx4_cleanup_uar_table(struct mlx4_dev *dev);
void mlx4_cleanup_mr_table(struct mlx4_dev *dev);
void mlx4_cleanup_eq_table(struct mlx4_dev *dev);
@@ -372,7 +920,65 @@ void mlx4_cleanup_cq_table(struct mlx4_dev *dev);
void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
-void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev);
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn);
+void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn);
+int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn);
+void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn);
+int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn);
+void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn);
+int __mlx4_mr_reserve(struct mlx4_dev *dev);
+void __mlx4_mr_release(struct mlx4_dev *dev, u32 index);
+int __mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index);
+void __mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index);
+u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order);
+void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order);
+
+int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SYNC_TPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
+ int *base, u8 bf_qp);
+void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
+int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac);
+void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
+int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ int start_index, int npages, u64 *page_list);
+int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
+void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
+int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn);
+void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
void mlx4_start_catas_poll(struct mlx4_dev *dev);
void mlx4_stop_catas_poll(struct mlx4_dev *dev);
@@ -380,8 +986,8 @@ void mlx4_catas_init(void);
int mlx4_restart_one(struct pci_dev *pdev);
int mlx4_register_device(struct mlx4_dev *dev);
void mlx4_unregister_device(struct mlx4_dev *dev);
-void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int port);
-void *mlx4_find_get_prot_dev(struct mlx4_dev *dev, enum mlx4_prot proto, int port);
+void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type,
+ unsigned long param);
struct mlx4_dev_cap;
struct mlx4_init_hca_param;
@@ -390,13 +996,158 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
struct mlx4_profile *request,
struct mlx4_dev_cap *dev_cap,
struct mlx4_init_hca_param *init_hca);
+void mlx4_master_comm_channel(struct work_struct *work);
+void mlx4_gen_slave_eqe(struct work_struct *work);
+void mlx4_master_handle_slave_flr(struct work_struct *work);
+
+int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_COMM_INT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_MODIFY_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_ARM_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_GEN_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_2ERR_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_RTS2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+
+int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe);
int mlx4_cmd_init(struct mlx4_dev *dev);
void mlx4_cmd_cleanup(struct mlx4_dev *dev);
+int mlx4_multi_func_init(struct mlx4_dev *dev);
+void mlx4_multi_func_cleanup(struct mlx4_dev *dev);
void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);
int mlx4_cmd_use_events(struct mlx4_dev *dev);
void mlx4_cmd_use_polling(struct mlx4_dev *dev);
+int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
+ unsigned long timeout);
+
void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);
void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type);
@@ -406,13 +1157,15 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);
void mlx4_handle_catas_err(struct mlx4_dev *dev);
+int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
+ enum mlx4_port_type *type);
void mlx4_do_sense_ports(struct mlx4_dev *dev,
enum mlx4_port_type *stype,
enum mlx4_port_type *defaults);
void mlx4_start_sense(struct mlx4_dev *dev);
void mlx4_stop_sense(struct mlx4_dev *dev);
-int mlx4_sense_init(struct mlx4_dev *dev);
void mlx4_sense_cleanup(struct mlx4_dev *dev);
+int mlx4_sense_init(struct mlx4_dev *dev);
int mlx4_check_port_params(struct mlx4_dev *dev,
enum mlx4_port_type *port_type);
int mlx4_change_port_types(struct mlx4_dev *dev,
@@ -420,8 +1173,147 @@ int mlx4_change_port_types(struct mlx4_dev *dev,
void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table);
void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table);
-
-int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port);
+void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
+int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
+
+int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz);
+/* resource tracker functions*/
+int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev,
+ enum mlx4_resource resource_type,
+ u64 resource_id, int *slave);
+void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave_id);
+int mlx4_init_resource_tracker(struct mlx4_dev *dev);
+
+void mlx4_free_resource_tracker(struct mlx4_dev *dev,
+ enum mlx4_res_tracker_free_type type);
+
+int mlx4_QUERY_FW_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps);
+int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port,
+ int *gid_tbl_len, int *pkey_tbl_len);
+
+int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+
+int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ enum mlx4_protocol prot, enum mlx4_steer_type steer);
+int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ int block_mcast_loopback, enum mlx4_protocol prot,
+ enum mlx4_steer_type steer);
+int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_common_set_vlan_fltr(struct mlx4_dev *dev, int function,
+ int port, void *buf);
+int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave, u32 in_mod,
+ struct mlx4_cmd_mailbox *outbox);
+int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_PKEY_TABLE_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+
+int mlx4_get_mgm_entry_size(struct mlx4_dev *dev);
+int mlx4_get_qp_per_mgm(struct mlx4_dev *dev);
+
+static inline void set_param_l(u64 *arg, u32 val)
+{
+ *arg = (*arg & 0xffffffff00000000ULL) | (u64) val;
+}
+
+static inline void set_param_h(u64 *arg, u32 val)
+{
+ *arg = (*arg & 0xffffffff) | ((u64) val << 32);
+}
+
+static inline u32 get_param_l(u64 *arg)
+{
+ return (u32) (*arg & 0xffffffff);
+}
+
+static inline u32 get_param_h(u64 *arg)
+{
+ return (u32)(*arg >> 32);
+}
+
+static inline spinlock_t *mlx4_tlock(struct mlx4_dev *dev)
+{
+ return &mlx4_priv(dev)->mfunc.master.res_tracker.lock;
+}
+
+#define NOT_MASKED_PD_BITS 17
+
+void sys_tune_init(void);
+void sys_tune_fini(void);
+
+void mlx4_init_quotas(struct mlx4_dev *dev);
+
+int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave);
+int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave);
+
#endif /* MLX4_H */
diff --git a/sys/ofed/drivers/net/mlx4/mlx4_en.h b/sys/ofed/drivers/net/mlx4/mlx4_en.h
index 5b21d93..f3f71c7 100644
--- a/sys/ofed/drivers/net/mlx4/mlx4_en.h
+++ b/sys/ofed/drivers/net/mlx4/mlx4_en.h
@@ -568,6 +568,7 @@ enum mlx4_en_wol {
MLX4_EN_WOL_DO_MODIFY = (1ULL << 63),
};
+
int mlx4_en_transmit(struct net_device *dev, struct mbuf *mb);
void mlx4_en_qflush(struct net_device *dev);
@@ -635,12 +636,12 @@ void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv);
int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring);
void mlx4_en_rx_irq(struct mlx4_cq *mcq);
-int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
+//int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, u8 port, u32 *vlans);
-int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
- u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
-int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
- u8 promisc);
+//int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
+// u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
+//int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
+// u8 promisc);
int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset);
int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port);
diff --git a/sys/ofed/drivers/net/mlx4/mr.c b/sys/ofed/drivers/net/mlx4/mr.c
index 9ed610a..3daa995 100644
--- a/sys/ofed/drivers/net/mlx4/mr.c
+++ b/sys/ofed/drivers/net/mlx4/mr.c
@@ -34,34 +34,15 @@
#include <linux/init.h>
#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
#include <linux/mlx4/cmd.h>
#include "mlx4.h"
#include "icm.h"
-/*
- * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
- */
-struct mlx4_mpt_entry {
- __be32 flags;
- __be32 qpn;
- __be32 key;
- __be32 pd_flags;
- __be64 start;
- __be64 length;
- __be32 lkey;
- __be32 win_cnt;
- u8 reserved1;
- u8 flags2;
- u8 reserved2;
- u8 mtt_rep;
- __be64 mtt_seg;
- __be32 mtt_sz;
- __be32 entity_size;
- __be32 first_byte_offset;
-} __attribute__((packed));
-
#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28)
#define MLX4_MPT_FLAG_FREE (0x3UL << 28)
#define MLX4_MPT_FLAG_MIO (1 << 17)
@@ -73,8 +54,6 @@ struct mlx4_mpt_entry {
#define MLX4_MPT_PD_FLAG_RAE (1 << 28)
#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24)
-#define MLX4_MPT_FLAG2_FBO_EN (1 << 7)
-
#define MLX4_MPT_STATUS_SW 0xF0
#define MLX4_MPT_STATUS_HW 0x00
@@ -141,19 +120,19 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
buddy->max_order = max_order;
spin_lock_init(&buddy->lock);
- buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
+ buddy->bits = kcalloc(buddy->max_order + 1, sizeof (long *),
GFP_KERNEL);
- buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
+ buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free,
GFP_KERNEL);
if (!buddy->bits || !buddy->num_free)
goto err_out;
for (i = 0; i <= buddy->max_order; ++i) {
s = BITS_TO_LONGS(1 << (buddy->max_order - i));
- buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
- if (!buddy->bits[i])
- goto err_out_free;
- bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i));
+ buddy->bits[i] = kcalloc(s, sizeof (long), GFP_KERNEL | __GFP_NOWARN);
+ if (!buddy->bits[i]) {
+ goto err_out_free;
+ }
}
set_bit(0, buddy->bits[buddy->max_order]);
@@ -163,7 +142,8 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
err_out_free:
for (i = 0; i <= buddy->max_order; ++i)
- kfree(buddy->bits[i]);
+ if ( buddy->bits[i] )
+ kfree(buddy->bits[i]);
err_out:
kfree(buddy->bits);
@@ -177,28 +157,54 @@ static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy)
int i;
for (i = 0; i <= buddy->max_order; ++i)
- kfree(buddy->bits[i]);
+ kfree(buddy->bits[i]);
kfree(buddy->bits);
kfree(buddy->num_free);
}
-static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
+u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
{
struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
u32 seg;
+ int seg_order;
+ u32 offset;
+
+ seg_order = max_t(int, order - log_mtts_per_seg, 0);
- seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, order);
+ seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, seg_order);
if (seg == -1)
return -1;
- if (mlx4_table_get_range(dev, &mr_table->mtt_table, seg,
- seg + (1 << order) - 1)) {
- mlx4_buddy_free(&mr_table->mtt_buddy, seg, order);
+ offset = seg * (1 << log_mtts_per_seg);
+
+ if (mlx4_table_get_range(dev, &mr_table->mtt_table, offset,
+ offset + (1 << order) - 1)) {
+ mlx4_buddy_free(&mr_table->mtt_buddy, seg, seg_order);
return -1;
}
- return seg;
+ return offset;
+}
+
+static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
+{
+ u64 in_param = 0;
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, order);
+ err = mlx4_cmd_imm(dev, in_param, &out_param, RES_MTT,
+ RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ return -1;
+ return get_param_l(&out_param);
+ }
+ return __mlx4_alloc_mtt_range(dev, order);
}
int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
@@ -213,33 +219,66 @@ int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
} else
mtt->page_shift = page_shift;
- for (mtt->order = 0, i = dev->caps.mtts_per_seg; i < npages; i <<= 1)
+ for (mtt->order = 0, i = 1; i < npages; i <<= 1)
++mtt->order;
- mtt->first_seg = mlx4_alloc_mtt_range(dev, mtt->order);
- if (mtt->first_seg == -1)
+ mtt->offset = mlx4_alloc_mtt_range(dev, mtt->order);
+ if (mtt->offset == -1) {
+ mlx4_err(dev, "Failed to allocate mtts for %d pages(order %d)\n",
+ npages, mtt->order);
return -ENOMEM;
+ }
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_mtt_init);
-void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
+void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
{
+ u32 first_seg;
+ int seg_order;
struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
+ seg_order = max_t(int, order - log_mtts_per_seg, 0);
+ first_seg = offset / (1 << log_mtts_per_seg);
+
+ mlx4_buddy_free(&mr_table->mtt_buddy, first_seg, seg_order);
+ mlx4_table_put_range(dev, &mr_table->mtt_table, offset,
+ offset + (1 << order) - 1);
+}
+
+static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
+{
+ u64 in_param = 0;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, offset);
+ set_param_h(&in_param, order);
+ err = mlx4_cmd(dev, in_param, RES_MTT, RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ mlx4_warn(dev, "Failed to free mtt range at:"
+ "%d order:%d\n", offset, order);
+ return;
+ }
+ __mlx4_free_mtt_range(dev, offset, order);
+}
+
+void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
+{
if (mtt->order < 0)
return;
- mlx4_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order);
- mlx4_table_put_range(dev, &mr_table->mtt_table, mtt->first_seg,
- mtt->first_seg + (1 << mtt->order) - 1);
+ mlx4_free_mtt_range(dev, mtt->offset, mtt->order);
}
EXPORT_SYMBOL_GPL(mlx4_mtt_cleanup);
u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
{
- return (u64) mtt->first_seg * dev->caps.mtt_entry_sz;
+ return (u64) mtt->offset * dev->caps.mtt_entry_sz;
}
EXPORT_SYMBOL_GPL(mlx4_mtt_addr);
@@ -256,106 +295,180 @@ static u32 key_to_hw_index(u32 key)
static int mlx4_SW2HW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int mpt_index)
{
- return mlx4_cmd(dev, mailbox->dma, mpt_index, 0, MLX4_CMD_SW2HW_MPT,
- MLX4_CMD_TIME_CLASS_B);
+ return mlx4_cmd(dev, mailbox->dma, mpt_index,
+ 0, MLX4_CMD_SW2HW_MPT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
}
static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int mpt_index)
{
return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
- !mailbox, MLX4_CMD_HW2SW_MPT, MLX4_CMD_TIME_CLASS_B);
+ !mailbox, MLX4_CMD_HW2SW_MPT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
+}
+
+static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
+ u64 iova, u64 size, u32 access, int npages,
+ int page_shift, struct mlx4_mr *mr)
+{
+ mr->iova = iova;
+ mr->size = size;
+ mr->pd = pd;
+ mr->access = access;
+ mr->enabled = MLX4_MR_DISABLED;
+ mr->key = hw_index_to_key(mridx);
+
+ return mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+}
+
+static int mlx4_WRITE_MTT(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *mailbox,
+ int num_entries)
+{
+ return mlx4_cmd(dev, mailbox->dma, num_entries, 0, MLX4_CMD_WRITE_MTT,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
}
-int mlx4_mr_reserve_range(struct mlx4_dev *dev, int cnt, int align, u32 *base_mridx)
+int __mlx4_mr_reserve(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- u32 mridx;
- mridx = mlx4_bitmap_alloc_range(&priv->mr_table.mpt_bitmap, cnt, align);
- if (mridx == -1)
- return -ENOMEM;
+ return mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap);
+}
- *base_mridx = mridx;
- return 0;
+static int mlx4_mr_reserve(struct mlx4_dev *dev)
+{
+ u64 out_param;
+ if (mlx4_is_mfunc(dev)) {
+ if (mlx4_cmd_imm(dev, 0, &out_param, RES_MPT, RES_OP_RESERVE,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED))
+ return -1;
+ return get_param_l(&out_param);
+ }
+ return __mlx4_mr_reserve(dev);
}
-EXPORT_SYMBOL_GPL(mlx4_mr_reserve_range);
-void mlx4_mr_release_range(struct mlx4_dev *dev, u32 base_mridx, int cnt)
+void __mlx4_mr_release(struct mlx4_dev *dev, u32 index)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- mlx4_bitmap_free_range(&priv->mr_table.mpt_bitmap, base_mridx, cnt);
+
+ mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index);
}
-EXPORT_SYMBOL_GPL(mlx4_mr_release_range);
-int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
- u64 iova, u64 size, u32 access, int npages,
- int page_shift, struct mlx4_mr *mr)
+static void mlx4_mr_release(struct mlx4_dev *dev, u32 index)
{
- mr->iova = iova;
- mr->size = size;
- mr->pd = pd;
- mr->access = access;
- mr->enabled = 0;
- mr->key = hw_index_to_key(mridx);
+ u64 in_param = 0;
- return mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, index);
+ if (mlx4_cmd(dev, in_param, RES_MPT, RES_OP_RESERVE,
+ MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED))
+ mlx4_warn(dev, "Failed to release mr index:%d\n",
+ index);
+ return;
+ }
+ __mlx4_mr_release(dev, index);
+}
+
+int __mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index)
+{
+ struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
+
+ return mlx4_table_get(dev, &mr_table->dmpt_table, index);
+}
+
+static int mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index)
+{
+ u64 param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&param, index);
+ return mlx4_cmd_imm(dev, param, &param, RES_MPT, RES_OP_MAP_ICM,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ }
+ return __mlx4_mr_alloc_icm(dev, index);
+}
+
+void __mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index)
+{
+ struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
+
+ mlx4_table_put(dev, &mr_table->dmpt_table, index);
+}
+
+static void mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index)
+{
+ u64 in_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, index);
+ if (mlx4_cmd(dev, in_param, RES_MPT, RES_OP_MAP_ICM,
+ MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED))
+ mlx4_warn(dev, "Failed to free icm of mr index:%d\n",
+ index);
+ return;
+ }
+ return __mlx4_mr_free_icm(dev, index);
}
-EXPORT_SYMBOL_GPL(mlx4_mr_alloc_reserved);
int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
int npages, int page_shift, struct mlx4_mr *mr)
{
- struct mlx4_priv *priv = mlx4_priv(dev);
u32 index;
int err;
- index = mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap);
+ index = mlx4_mr_reserve(dev);
if (index == -1)
return -ENOMEM;
err = mlx4_mr_alloc_reserved(dev, index, pd, iova, size,
access, npages, page_shift, mr);
if (err)
- mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index);
+ mlx4_mr_release(dev, index);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_mr_alloc);
-void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr)
+static void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr)
{
int err;
- if (mr->enabled) {
+ if (mr->enabled == MLX4_MR_EN_HW) {
err = mlx4_HW2SW_MPT(dev, NULL,
key_to_hw_index(mr->key) &
(dev->caps.num_mpts - 1));
if (err)
- mlx4_warn(dev, "HW2SW_MPT failed (%d)\n", err);
- }
+ mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err);
+ mr->enabled = MLX4_MR_EN_SW;
+ }
mlx4_mtt_cleanup(dev, &mr->mtt);
}
-EXPORT_SYMBOL_GPL(mlx4_mr_free_reserved);
void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
{
- struct mlx4_priv *priv = mlx4_priv(dev);
mlx4_mr_free_reserved(dev, mr);
- mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, key_to_hw_index(mr->key));
+ if (mr->enabled)
+ mlx4_mr_free_icm(dev, key_to_hw_index(mr->key));
+ mlx4_mr_release(dev, key_to_hw_index(mr->key));
}
EXPORT_SYMBOL_GPL(mlx4_mr_free);
int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
{
- struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_mpt_entry *mpt_entry;
int err;
- err = mlx4_table_get(dev, &mr_table->dmpt_table, key_to_hw_index(mr->key));
+ err = mlx4_mr_alloc_icm(dev, key_to_hw_index(mr->key));
if (err)
return err;
@@ -380,9 +493,10 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
if (mr->mtt.order < 0) {
mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
- mpt_entry->mtt_seg = 0;
+ mpt_entry->mtt_addr = 0;
} else {
- mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt));
+ mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
+ &mr->mtt));
}
if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) {
@@ -390,8 +504,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG |
MLX4_MPT_PD_FLAG_RAE);
- mpt_entry->mtt_sz = cpu_to_be32((1 << mr->mtt.order) *
- dev->caps.mtts_per_seg);
+ mpt_entry->mtt_sz = cpu_to_be32(1 << mr->mtt.order);
} else {
mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
}
@@ -402,8 +515,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err);
goto err_cmd;
}
-
- mr->enabled = 1;
+ mr->enabled = MLX4_MR_EN_HW;
mlx4_free_cmd_mailbox(dev, mailbox);
@@ -413,7 +525,7 @@ err_cmd:
mlx4_free_cmd_mailbox(dev, mailbox);
err_table:
- mlx4_table_put(dev, &mr_table->dmpt_table, key_to_hw_index(mr->key));
+ mlx4_mr_free_icm(dev, key_to_hw_index(mr->key));
return err;
}
EXPORT_SYMBOL_GPL(mlx4_mr_enable);
@@ -425,50 +537,94 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
__be64 *mtts;
dma_addr_t dma_handle;
int i;
- int s = start_index * sizeof (u64);
-
- /* All MTTs must fit in the same page */
- if (start_index / (PAGE_SIZE / sizeof (u64)) !=
- (start_index + npages - 1) / (PAGE_SIZE / sizeof (u64)))
- return -EINVAL;
- if (start_index & (dev->caps.mtts_per_seg - 1))
- return -EINVAL;
+ mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->offset +
+ start_index, &dma_handle);
- mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg +
- s / dev->caps.mtt_entry_sz, &dma_handle);
if (!mtts)
return -ENOMEM;
+ dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle,
+ npages * sizeof (u64), DMA_TO_DEVICE);
+
for (i = 0; i < npages; ++i)
mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
- dma_sync_single(&dev->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE);
+ dma_sync_single_for_device(&dev->pdev->dev, dma_handle,
+ npages * sizeof (u64), DMA_TO_DEVICE);
return 0;
}
-int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
- int start_index, int npages, u64 *page_list)
+int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ int start_index, int npages, u64 *page_list)
{
+ int err = 0;
int chunk;
- int err;
+ int mtts_per_page;
+ int max_mtts_first_page;
- if (mtt->order < 0)
- return -EINVAL;
+ /* compute how may mtts fit in the first page */
+ mtts_per_page = PAGE_SIZE / sizeof(u64);
+ max_mtts_first_page = mtts_per_page - (mtt->offset + start_index)
+ % mtts_per_page;
+
+ chunk = min_t(int, max_mtts_first_page, npages);
while (npages > 0) {
- chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages);
err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list);
if (err)
return err;
-
npages -= chunk;
start_index += chunk;
page_list += chunk;
+
+ chunk = min_t(int, mtts_per_page, npages);
}
+ return err;
+}
- return 0;
+int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ int start_index, int npages, u64 *page_list)
+{
+ struct mlx4_cmd_mailbox *mailbox = NULL;
+ __be64 *inbox = NULL;
+ int chunk;
+ int err = 0;
+ int i;
+
+ if (mtt->order < 0)
+ return -EINVAL;
+
+ if (mlx4_is_mfunc(dev)) {
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
+
+ while (npages > 0) {
+ chunk = min_t(int, MLX4_MAILBOX_SIZE / sizeof(u64) - 2,
+ npages);
+ inbox[0] = cpu_to_be64(mtt->offset + start_index);
+ inbox[1] = 0;
+ for (i = 0; i < chunk; ++i)
+ inbox[i + 2] = cpu_to_be64(page_list[i] |
+ MLX4_MTT_FLAG_PRESENT);
+ err = mlx4_WRITE_MTT(dev, mailbox, chunk);
+ if (err) {
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+ }
+
+ npages -= chunk;
+ start_index += chunk;
+ page_list += chunk;
+ }
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+ }
+
+ return __mlx4_write_mtt(dev, mtt, start_index, npages, page_list);
}
EXPORT_SYMBOL_GPL(mlx4_write_mtt);
@@ -484,7 +640,7 @@ int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
return -ENOMEM;
for (i = 0; i < buf->npages; ++i)
- if (buf->direct.map)
+ if (buf->nbufs == 1)
page_list[i] = buf->direct.map + (i << buf->page_shift);
else
page_list[i] = buf->page_list[i].map;
@@ -498,9 +654,15 @@ EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt);
int mlx4_init_mr_table(struct mlx4_dev *dev)
{
- struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_mr_table *mr_table = &priv->mr_table;
int err;
+ /* Nothing to do for slaves - all MR handling is forwarded
+ * to the master */
+ if (mlx4_is_slave(dev))
+ return 0;
+
if (!is_power_of_2(dev->caps.num_mpts))
return -EINVAL;
@@ -510,13 +672,17 @@ int mlx4_init_mr_table(struct mlx4_dev *dev)
return err;
err = mlx4_buddy_init(&mr_table->mtt_buddy,
- ilog2(dev->caps.num_mtt_segs));
+ ilog2((u32)dev->caps.num_mtts /
+ (1 << log_mtts_per_seg)));
if (err)
goto err_buddy;
if (dev->caps.reserved_mtts) {
- if (mlx4_alloc_mtt_range(dev, fls(dev->caps.reserved_mtts - 1)) == -1) {
- mlx4_warn(dev, "MTT table of order %d is too small.\n",
+ priv->reserved_mtts =
+ mlx4_alloc_mtt_range(dev,
+ fls(dev->caps.reserved_mtts - 1));
+ if (priv->reserved_mtts < 0) {
+ mlx4_warn(dev, "MTT table of order %u is too small.\n",
mr_table->mtt_buddy.max_order);
err = -ENOMEM;
goto err_reserve_mtts;
@@ -536,8 +702,14 @@ err_buddy:
void mlx4_cleanup_mr_table(struct mlx4_dev *dev)
{
- struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_mr_table *mr_table = &priv->mr_table;
+ if (mlx4_is_slave(dev))
+ return;
+ if (priv->reserved_mtts >= 0)
+ mlx4_free_mtt_range(dev, priv->reserved_mtts,
+ fls(dev->caps.reserved_mtts - 1));
mlx4_buddy_cleanup(&mr_table->mtt_buddy);
mlx4_bitmap_cleanup(&mr_table->mpt_bitmap);
}
@@ -569,9 +741,8 @@ static inline int mlx4_check_fmr(struct mlx4_fmr *fmr, u64 *page_list,
return 0;
}
-int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
- u64 *page_list, int npages, u64 iova, u32 fbo,
- u32 len, u32 *lkey, u32 *rkey, int same_key)
+int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
+ int npages, u64 iova, u32 *lkey, u32 *rkey)
{
u32 key;
int i, err;
@@ -583,8 +754,7 @@ int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
++fmr->maps;
key = key_to_hw_index(fmr->mr.key);
- if (!same_key)
- key += dev->caps.num_mpts;
+ key += dev->caps.num_mpts;
*lkey = *rkey = fmr->mr.key = hw_index_to_key(key);
*(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW;
@@ -592,18 +762,19 @@ int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
/* Make sure MPT status is visible before writing MTT entries */
wmb();
+ dma_sync_single_for_cpu(&dev->pdev->dev, fmr->dma_handle,
+ npages * sizeof(u64), DMA_TO_DEVICE);
+
for (i = 0; i < npages; ++i)
fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
- dma_sync_single(&dev->pdev->dev, fmr->dma_handle,
- npages * sizeof(u64), DMA_TO_DEVICE);
+ dma_sync_single_for_device(&dev->pdev->dev, fmr->dma_handle,
+ npages * sizeof(u64), DMA_TO_DEVICE);
fmr->mpt->key = cpu_to_be32(key);
fmr->mpt->lkey = cpu_to_be32(key);
- fmr->mpt->length = cpu_to_be64(len);
+ fmr->mpt->length = cpu_to_be64(npages * (1ull << fmr->page_shift));
fmr->mpt->start = cpu_to_be64(iova);
- fmr->mpt->first_byte_offset = cpu_to_be32(fbo & 0x001fffff);
- fmr->mpt->flags2 = (fbo ? MLX4_MPT_FLAG2_FBO_EN : 0);
/* Make MTT entries are visible before setting MPT status */
wmb();
@@ -615,25 +786,17 @@ int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
return 0;
}
-EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr_fbo);
-
-int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
- int npages, u64 iova, u32 *lkey, u32 *rkey)
-{
- u32 len = npages * (1ull << fmr->page_shift);
-
- return mlx4_map_phys_fmr_fbo(dev, fmr, page_list, npages, iova, 0,
- len, lkey, rkey, 0);
-}
EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr);
int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
int max_maps, u8 page_shift, struct mlx4_fmr *fmr)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- u64 mtt_seg;
int err = -ENOMEM;
+ if (max_maps > dev->caps.max_fmr_maps)
+ return -EINVAL;
+
if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32)
return -EINVAL;
@@ -651,11 +814,10 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
if (err)
return err;
- mtt_seg = fmr->mr.mtt.first_seg * dev->caps.mtt_entry_sz;
-
fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
- fmr->mr.mtt.first_seg,
+ fmr->mr.mtt.offset,
&fmr->dma_handle);
+
if (!fmr->mtts) {
err = -ENOMEM;
goto err_free;
@@ -669,49 +831,6 @@ err_free:
}
EXPORT_SYMBOL_GPL(mlx4_fmr_alloc);
-int mlx4_fmr_alloc_reserved(struct mlx4_dev *dev, u32 mridx,
- u32 pd, u32 access, int max_pages,
- int max_maps, u8 page_shift, struct mlx4_fmr *fmr)
-{
- struct mlx4_priv *priv = mlx4_priv(dev);
- u64 mtt_seg;
- int err = -ENOMEM;
-
- if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32)
- return -EINVAL;
-
- /* All MTTs must fit in the same page */
- if (max_pages * sizeof *fmr->mtts > PAGE_SIZE)
- return -EINVAL;
-
- fmr->page_shift = page_shift;
- fmr->max_pages = max_pages;
- fmr->max_maps = max_maps;
- fmr->maps = 0;
-
- err = mlx4_mr_alloc_reserved(dev, mridx, pd, 0, 0, access, max_pages,
- page_shift, &fmr->mr);
- if (err)
- return err;
-
- mtt_seg = fmr->mr.mtt.first_seg * dev->caps.mtt_entry_sz;
-
- fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
- fmr->mr.mtt.first_seg,
- &fmr->dma_handle);
- if (!fmr->mtts) {
- err = -ENOMEM;
- goto err_free;
- }
-
- return 0;
-
-err_free:
- mlx4_mr_free_reserved(dev, &fmr->mr);
- return err;
-}
-EXPORT_SYMBOL_GPL(mlx4_fmr_alloc_reserved);
-
int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -733,12 +852,30 @@ EXPORT_SYMBOL_GPL(mlx4_fmr_enable);
void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
u32 *lkey, u32 *rkey)
{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err;
+
if (!fmr->maps)
return;
fmr->maps = 0;
- *(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW;
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ mlx4_warn(dev, "mlx4_alloc_cmd_mailbox failed (%d)\n", err);
+ return;
+ }
+
+ err = mlx4_HW2SW_MPT(dev, NULL,
+ key_to_hw_index(fmr->mr.key) &
+ (dev->caps.num_mpts - 1));
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ if (err) {
+ mlx4_warn(dev, "mlx4_HW2SW_MPT failed (%d)\n", err);
+ return;
+ }
+ fmr->mr.enabled = MLX4_MR_EN_SW;
}
EXPORT_SYMBOL_GPL(mlx4_fmr_unmap);
@@ -747,27 +884,16 @@ int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
if (fmr->maps)
return -EBUSY;
- fmr->mr.enabled = 0;
mlx4_mr_free(dev, &fmr->mr);
+ fmr->mr.enabled = MLX4_MR_DISABLED;
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_fmr_free);
-int mlx4_fmr_free_reserved(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
-{
- if (fmr->maps)
- return -EBUSY;
-
- fmr->mr.enabled = 0;
- mlx4_mr_free_reserved(dev, &fmr->mr);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(mlx4_fmr_free_reserved);
-
int mlx4_SYNC_TPT(struct mlx4_dev *dev)
{
- return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000);
+ return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000,
+ MLX4_CMD_NATIVE);
}
EXPORT_SYMBOL_GPL(mlx4_SYNC_TPT);
diff --git a/sys/ofed/drivers/net/mlx4/pd.c b/sys/ofed/drivers/net/mlx4/pd.c
index cce9226..91f4b85 100644
--- a/sys/ofed/drivers/net/mlx4/pd.c
+++ b/sys/ofed/drivers/net/mlx4/pd.c
@@ -62,12 +62,66 @@ void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn)
}
EXPORT_SYMBOL_GPL(mlx4_pd_free);
+int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ *xrcdn = mlx4_bitmap_alloc(&priv->xrcd_bitmap);
+ if (*xrcdn == -1)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn)
+{
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, 0, &out_param,
+ RES_XRCD, RES_OP_RESERVE,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ return err;
+
+ *xrcdn = get_param_l(&out_param);
+ return 0;
+ }
+ return __mlx4_xrcd_alloc(dev, xrcdn);
+}
+EXPORT_SYMBOL_GPL(mlx4_xrcd_alloc);
+
+void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn)
+{
+ mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn);
+}
+
+void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn)
+{
+ u64 in_param = 0;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, xrcdn);
+ err = mlx4_cmd(dev, in_param, RES_XRCD,
+ RES_OP_RESERVE, MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ mlx4_warn(dev, "Failed to release xrcdn %d\n", xrcdn);
+ } else
+ __mlx4_xrcd_free(dev, xrcdn);
+}
+EXPORT_SYMBOL_GPL(mlx4_xrcd_free);
+
int mlx4_init_pd_table(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
return mlx4_bitmap_init(&priv->pd_bitmap, dev->caps.num_pds,
- (1 << 24) - 1, dev->caps.reserved_pds, 0);
+ (1 << NOT_MASKED_PD_BITS) - 1,
+ dev->caps.reserved_pds, 0);
}
void mlx4_cleanup_pd_table(struct mlx4_dev *dev)
@@ -75,16 +129,34 @@ void mlx4_cleanup_pd_table(struct mlx4_dev *dev)
mlx4_bitmap_cleanup(&mlx4_priv(dev)->pd_bitmap);
}
+int mlx4_init_xrcd_table(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ return mlx4_bitmap_init(&priv->xrcd_bitmap, (1 << 16),
+ (1 << 16) - 1, dev->caps.reserved_xrcds + 1, 0);
+}
+
+void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev)
+{
+ mlx4_bitmap_cleanup(&mlx4_priv(dev)->xrcd_bitmap);
+}
int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar)
{
+ int offset;
+
uar->index = mlx4_bitmap_alloc(&mlx4_priv(dev)->uar_table.bitmap);
if (uar->index == -1)
return -ENOMEM;
- uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index;
+ if (mlx4_is_slave(dev))
+ offset = uar->index % ((int) pci_resource_len(dev->pdev, 2) /
+ dev->caps.uar_page_size);
+ else
+ offset = uar->index;
+ uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + offset;
uar->map = NULL;
-
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_uar_alloc);
@@ -95,7 +167,8 @@ void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar)
}
EXPORT_SYMBOL_GPL(mlx4_uar_free);
-int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf)
+#ifndef CONFIG_PPC
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_uar *uar;
@@ -113,10 +186,13 @@ int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf)
err = -ENOMEM;
goto out;
}
- uar = kmalloc(sizeof *uar, GFP_KERNEL);
+ uar = kmalloc_node(sizeof *uar, GFP_KERNEL, node);
if (!uar) {
- err = -ENOMEM;
- goto out;
+ uar = kmalloc(sizeof *uar, GFP_KERNEL);
+ if (!uar) {
+ err = -ENOMEM;
+ goto out;
+ }
}
err = mlx4_uar_alloc(dev, uar);
if (err)
@@ -191,6 +267,21 @@ void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf)
}
EXPORT_SYMBOL_GPL(mlx4_bf_free);
+#else
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node)
+{
+ memset(bf, 0, sizeof *bf);
+ return -ENOSYS;
+}
+EXPORT_SYMBOL_GPL(mlx4_bf_alloc);
+
+void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf)
+{
+ return;
+}
+EXPORT_SYMBOL_GPL(mlx4_bf_free);
+#endif
+
int mlx4_init_uar_table(struct mlx4_dev *dev)
{
if (dev->caps.num_uars <= 128) {
@@ -202,7 +293,7 @@ int mlx4_init_uar_table(struct mlx4_dev *dev)
return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap,
dev->caps.num_uars, dev->caps.num_uars - 1,
- max(128, dev->caps.reserved_uars), 0);
+ dev->caps.reserved_uars, 0);
}
void mlx4_cleanup_uar_table(struct mlx4_dev *dev)
diff --git a/sys/ofed/drivers/net/mlx4/port.c b/sys/ofed/drivers/net/mlx4/port.c
index c8df375..2a009ea 100644
--- a/sys/ofed/drivers/net/mlx4/port.c
+++ b/sys/ofed/drivers/net/mlx4/port.c
@@ -34,19 +34,26 @@
#include <linux/if_ether.h>
#include <linux/mlx4/cmd.h>
-
+#include <linux/moduleparam.h>
#include "mlx4.h"
-int mlx4_ib_set_4k_mtu = 0;
-module_param_named(set_4k_mtu, mlx4_ib_set_4k_mtu, int, 0444);
-MODULE_PARM_DESC(set_4k_mtu, "attempt to set 4K MTU to all ConnectX ports");
+int mlx4_set_4k_mtu = -1;
+module_param_named(set_4k_mtu, mlx4_set_4k_mtu, int, 0444);
+MODULE_PARM_DESC(set_4k_mtu,
+ "(Obsolete) attempt to set 4K MTU to all ConnectX ports");
+
#define MLX4_MAC_VALID (1ull << 63)
-#define MLX4_MAC_MASK 0xffffffffffffULL
#define MLX4_VLAN_VALID (1u << 31)
#define MLX4_VLAN_MASK 0xfff
+#define MLX4_STATS_TRAFFIC_COUNTERS_MASK 0xfULL
+#define MLX4_STATS_TRAFFIC_DROPS_MASK 0xc0ULL
+#define MLX4_STATS_ERROR_COUNTERS_MASK 0x1ffc30ULL
+#define MLX4_STATS_PORT_COUNTERS_MASK 0x1fe00000ULL
+#define MLX4_STATS_IF_RX_ERRORS_COUNTERS_MASK 0x8010ULL
+
void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table)
{
int i;
@@ -69,10 +76,36 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table)
table->entries[i] = 0;
table->refs[i] = 0;
}
- table->max = 1 << dev->caps.log_num_vlans;
+ table->max = (1 << dev->caps.log_num_vlans) - MLX4_VLAN_REGULAR;
table->total = 0;
}
+static int validate_index(struct mlx4_dev *dev,
+ struct mlx4_mac_table *table, int index)
+{
+ int err = 0;
+
+ if (index < 0 || index >= table->max || !table->entries[index]) {
+ mlx4_warn(dev, "No valid Mac entry for the given index\n");
+ err = -EINVAL;
+ }
+ return err;
+}
+
+static int find_index(struct mlx4_dev *dev,
+ struct mlx4_mac_table *table, u64 mac)
+{
+ int i;
+
+ for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+ if ((mac & MLX4_MAC_MASK) ==
+ (MLX4_MAC_MASK & be64_to_cpu(table->entries[i])))
+ return i;
+ }
+ /* Mac not found */
+ return -EINVAL;
+}
+
static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
__be64 *entries)
{
@@ -87,40 +120,39 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
memcpy(mailbox->buf, entries, MLX4_MAC_TABLE_SIZE);
in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port;
+
err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
-int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index)
+int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
{
- struct mlx4_mac_table *table = &mlx4_priv(dev)->port[port].mac_table;
+ struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+ struct mlx4_mac_table *table = &info->mac_table;
int i, err = 0;
int free = -1;
- mlx4_dbg(dev, "Registering MAC: 0x%llx\n", (unsigned long long) mac);
+ mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d\n",
+ (unsigned long long) mac, port);
+
mutex_lock(&table->mutex);
- for (i = 0; i < MLX4_MAX_MAC_NUM - 1; i++) {
- if (free < 0 && !table->refs[i]) {
+ for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+ if (free < 0 && !table->entries[i]) {
free = i;
continue;
}
if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
- /* MAC already registered, increase refernce count */
- *index = i;
+ /* MAC already registered, Must not have duplicates */
+ err = i;
++table->refs[i];
goto out;
}
}
- if (free < 0) {
- err = -ENOMEM;
- goto out;
- }
-
mlx4_dbg(dev, "Free MAC index is %d\n", free);
if (table->total == table->max) {
@@ -130,47 +162,128 @@ int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index)
}
/* Register new MAC */
- table->refs[free] = 1;
table->entries[free] = cpu_to_be64(mac | MLX4_MAC_VALID);
err = mlx4_set_port_mac_table(dev, port, table->entries);
if (unlikely(err)) {
- mlx4_err(dev, "Failed adding MAC: 0x%llx\n", (unsigned long long) mac);
- table->refs[free] = 0;
+ mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
+ (unsigned long long) mac);
table->entries[free] = 0;
goto out;
}
+ table->refs[free] = 1;
- *index = free;
+ err = free;
++table->total;
out:
mutex_unlock(&table->mutex);
return err;
}
+EXPORT_SYMBOL_GPL(__mlx4_register_mac);
+
+int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
+{
+ u64 out_param = 0;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, mac, &out_param,
+ ((u32) port) << 8 | (u32) RES_MAC,
+ RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ return err;
+
+ return get_param_l(&out_param);
+ }
+ return __mlx4_register_mac(dev, port, mac);
+}
EXPORT_SYMBOL_GPL(mlx4_register_mac);
-void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index)
+int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port)
+{
+ return dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
+ (port - 1) * (1 << dev->caps.log_num_macs);
+}
+EXPORT_SYMBOL_GPL(mlx4_get_base_qpn);
+
+void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
{
- struct mlx4_mac_table *table = &mlx4_priv(dev)->port[port].mac_table;
+ struct mlx4_port_info *info;
+ struct mlx4_mac_table *table;
+ int index;
+ if (port < 1 || port > dev->caps.num_ports) {
+ mlx4_warn(dev, "invalid port number (%d), aborting...\n", port);
+ return;
+ }
+ info = &mlx4_priv(dev)->port[port];
+ table = &info->mac_table;
mutex_lock(&table->mutex);
- if (!table->refs[index]) {
- mlx4_warn(dev, "No MAC entry for index %d\n", index);
+
+ index = find_index(dev, table, mac);
+
+ if (validate_index(dev, table, index))
goto out;
- }
+
if (--table->refs[index]) {
- mlx4_warn(dev, "Have more references for index %d,"
- "no need to modify MAC table\n", index);
+ mlx4_dbg(dev, "Have more references for index %d,"
+ "no need to modify mac table\n", index);
goto out;
}
+
table->entries[index] = 0;
mlx4_set_port_mac_table(dev, port, table->entries);
--table->total;
out:
mutex_unlock(&table->mutex);
}
+EXPORT_SYMBOL_GPL(__mlx4_unregister_mac);
+
+void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
+{
+ u64 out_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ (void) mlx4_cmd_imm(dev, mac, &out_param,
+ ((u32) port) << 8 | (u32) RES_MAC,
+ RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ return;
+ }
+ __mlx4_unregister_mac(dev, port, mac);
+ return;
+}
EXPORT_SYMBOL_GPL(mlx4_unregister_mac);
+int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac)
+{
+ struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+ struct mlx4_mac_table *table = &info->mac_table;
+ int index = qpn - info->base_qpn;
+ int err = 0;
+
+ /* CX1 doesn't support multi-functions */
+ mutex_lock(&table->mutex);
+
+ err = validate_index(dev, table, index);
+ if (err)
+ goto out;
+
+ table->entries[index] = cpu_to_be64(new_mac | MLX4_MAC_VALID);
+
+ err = mlx4_set_port_mac_table(dev, port, table->entries);
+ if (unlikely(err)) {
+ mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
+ (unsigned long long) new_mac);
+ table->entries[index] = 0;
+ }
+out:
+ mutex_unlock(&table->mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(__mlx4_replace_mac);
+
static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port,
__be32 *entries)
{
@@ -185,7 +298,7 @@ static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port,
memcpy(mailbox->buf, entries, MLX4_VLAN_TABLE_SIZE);
in_mod = MLX4_SET_PORT_VLAN_TABLE << 8 | port;
err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
@@ -201,7 +314,7 @@ int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx)
if (table->refs[i] &&
(vid == (MLX4_VLAN_MASK &
be32_to_cpu(table->entries[i])))) {
- /* Vlan already registered, increase refernce count */
+ /* VLAN already registered, increase reference count */
*idx = i;
return 0;
}
@@ -211,13 +324,21 @@ int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx)
}
EXPORT_SYMBOL_GPL(mlx4_find_cached_vlan);
-int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
+int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan,
+ int *index)
{
struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
int i, err = 0;
int free = -1;
mutex_lock(&table->mutex);
+
+ if (table->total == table->max) {
+ /* No free vlan entries */
+ err = -ENOSPC;
+ goto out;
+ }
+
for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) {
if (free < 0 && (table->refs[i] == 0)) {
free = i;
@@ -227,7 +348,7 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
if (table->refs[i] &&
(vlan == (MLX4_VLAN_MASK &
be32_to_cpu(table->entries[i])))) {
- /* Vlan already registered, increase refernce count */
+ /* Vlan already registered, increase references count */
*index = i;
++table->refs[i];
goto out;
@@ -239,13 +360,7 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
goto out;
}
- if (table->total == table->max) {
- /* No free vlan entries */
- err = -ENOSPC;
- goto out;
- }
-
- /* Register new MAC */
+ /* Register new VLAN */
table->refs[free] = 1;
table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID);
@@ -263,25 +378,49 @@ out:
mutex_unlock(&table->mutex);
return err;
}
+
+int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
+{
+ u64 out_param = 0;
+ int err;
+
+ if (vlan > 4095)
+ return -EINVAL;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, vlan, &out_param,
+ ((u32) port) << 8 | (u32) RES_VLAN,
+ RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (!err)
+ *index = get_param_l(&out_param);
+
+ return err;
+ }
+ return __mlx4_register_vlan(dev, port, vlan, index);
+}
EXPORT_SYMBOL_GPL(mlx4_register_vlan);
-void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
+void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
{
struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
+ int index;
- if (index < MLX4_VLAN_REGULAR) {
- mlx4_warn(dev, "Trying to free special vlan index %d\n", index);
- return;
+ mutex_lock(&table->mutex);
+ if (mlx4_find_cached_vlan(dev, port, vlan, &index)) {
+ mlx4_warn(dev, "vlan 0x%x is not in the vlan table\n", vlan);
+ goto out;
}
- mutex_lock(&table->mutex);
- if (!table->refs[index]) {
- mlx4_warn(dev, "No vlan entry for index %d\n", index);
+ if (index < MLX4_VLAN_REGULAR) {
+ mlx4_warn(dev, "Trying to free special vlan index %d\n", index);
goto out;
}
+
if (--table->refs[index]) {
- mlx4_dbg(dev, "Have more references for index %d,"
- "no need to modify vlan table\n", index);
+ mlx4_dbg(dev, "Have %d more references for index %d, "
+ "no need to modify vlan table\n", table->refs[index],
+ index);
goto out;
}
table->entries[index] = 0;
@@ -290,6 +429,21 @@ void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
out:
mutex_unlock(&table->mutex);
}
+
+void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
+{
+ u64 out_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ (void) mlx4_cmd_imm(dev, vlan, &out_param,
+ ((u32) port) << 8 | (u32) RES_VLAN,
+ RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ return;
+ }
+ __mlx4_unregister_vlan(dev, port, vlan);
+}
EXPORT_SYMBOL_GPL(mlx4_unregister_vlan);
int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps)
@@ -320,20 +474,275 @@ int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps)
*(__be32 *) (&inbuf[20]) = cpu_to_be32(port);
err = mlx4_cmd_box(dev, inmailbox->dma, outmailbox->dma, port, 3,
- MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
+ MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
if (!err)
*caps = *(__be32 *) (outbuf + 84);
mlx4_free_cmd_mailbox(dev, inmailbox);
mlx4_free_cmd_mailbox(dev, outmailbox);
return err;
}
+static struct mlx4_roce_gid_entry zgid_entry;
-int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port)
+int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave)
{
- struct mlx4_cmd_mailbox *mailbox;
+ if (slave == 0)
+ return MLX4_ROCE_PF_GIDS;
+ if (slave <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % dev->num_vfs))
+ return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs) + 1;
+ return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs;
+}
+
+int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave)
+{
+ int gids;
+ int vfs;
+
+ gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
+ vfs = dev->num_vfs;
+
+ if (slave == 0)
+ return 0;
+ if (slave <= gids % vfs)
+ return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1);
+
+ return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1));
+}
+
+static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
+ u8 op_mod, struct mlx4_cmd_mailbox *inbox)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_port_info *port_info;
+ struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master;
+ struct mlx4_slave_state *slave_st = &master->slave_state[slave];
+ struct mlx4_set_port_rqp_calc_context *qpn_context;
+ struct mlx4_set_port_general_context *gen_context;
+ struct mlx4_roce_gid_entry *gid_entry_tbl, *gid_entry_mbox, *gid_entry_mb1;
+ int reset_qkey_viols;
+ int port;
+ int is_eth;
+ int num_gids;
+ int base;
+ u32 in_modifier;
+ u32 promisc;
+ u16 mtu, prev_mtu;
int err;
+ int i, j;
+ int offset;
+ __be32 agg_cap_mask;
+ __be32 slave_cap_mask;
+ __be32 new_cap_mask;
+
+ port = in_mod & 0xff;
+ in_modifier = in_mod >> 8;
+ is_eth = op_mod;
+ port_info = &priv->port[port];
+
+ /* Slaves cannot perform SET_PORT operations except changing MTU */
+ if (is_eth) {
+ if (slave != dev->caps.function &&
+ in_modifier != MLX4_SET_PORT_GENERAL &&
+ in_modifier != MLX4_SET_PORT_GID_TABLE) {
+ mlx4_warn(dev, "denying SET_PORT for slave:%d\n",
+ slave);
+ return -EINVAL;
+ }
+ switch (in_modifier) {
+ case MLX4_SET_PORT_RQP_CALC:
+ qpn_context = inbox->buf;
+ qpn_context->base_qpn =
+ cpu_to_be32(port_info->base_qpn);
+ qpn_context->n_mac = 0x7;
+ promisc = be32_to_cpu(qpn_context->promisc) >>
+ SET_PORT_PROMISC_SHIFT;
+ qpn_context->promisc = cpu_to_be32(
+ promisc << SET_PORT_PROMISC_SHIFT |
+ port_info->base_qpn);
+ promisc = be32_to_cpu(qpn_context->mcast) >>
+ SET_PORT_MC_PROMISC_SHIFT;
+ qpn_context->mcast = cpu_to_be32(
+ promisc << SET_PORT_MC_PROMISC_SHIFT |
+ port_info->base_qpn);
+ break;
+ case MLX4_SET_PORT_GENERAL:
+ gen_context = inbox->buf;
+ /* Mtu is configured as the max MTU among all the
+ * the functions on the port. */
+ mtu = be16_to_cpu(gen_context->mtu);
+ mtu = min_t(int, mtu, dev->caps.eth_mtu_cap[port]);
+ prev_mtu = slave_st->mtu[port];
+ slave_st->mtu[port] = mtu;
+ if (mtu > master->max_mtu[port])
+ master->max_mtu[port] = mtu;
+ if (mtu < prev_mtu && prev_mtu ==
+ master->max_mtu[port]) {
+ slave_st->mtu[port] = mtu;
+ master->max_mtu[port] = mtu;
+ for (i = 0; i < dev->num_slaves; i++) {
+ master->max_mtu[port] =
+ max(master->max_mtu[port],
+ master->slave_state[i].mtu[port]);
+ }
+ }
+
+ gen_context->mtu = cpu_to_be16(master->max_mtu[port]);
+ break;
+ case MLX4_SET_PORT_GID_TABLE:
+ /* change to MULTIPLE entries: number of guest's gids
+ * need a FOR-loop here over number of gids the guest has.
+ * 1. Check no duplicates in gids passed by slave
+ */
+ num_gids = mlx4_get_slave_num_gids(dev, slave);
+ base = mlx4_get_base_gid_ix(dev, slave);
+ gid_entry_mbox = (struct mlx4_roce_gid_entry *) (inbox->buf);
+ for (i = 0; i < num_gids; gid_entry_mbox++, i++) {
+ if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw,
+ sizeof(zgid_entry)))
+ continue;
+ gid_entry_mb1 = gid_entry_mbox + 1;
+ for (j = i + 1; j < num_gids; gid_entry_mb1++, j++) {
+ if (!memcmp(gid_entry_mb1->raw,
+ zgid_entry.raw, sizeof(zgid_entry)))
+ continue;
+ if (!memcmp(gid_entry_mb1->raw, gid_entry_mbox->raw,
+ sizeof(gid_entry_mbox->raw))) {
+ /* found duplicate */
+ return -EINVAL;
+ }
+ }
+ }
+
+ /* 2. Check that do not have duplicates in OTHER
+ * entries in the port GID table
+ */
+ for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
+ if (i >= base && i < base + num_gids)
+ continue; /* don't compare to slave's current gids */
+ gid_entry_tbl = &priv->roce_gids[port - 1][i];
+ if (!memcmp(gid_entry_tbl->raw, zgid_entry.raw, sizeof(zgid_entry)))
+ continue;
+ gid_entry_mbox = (struct mlx4_roce_gid_entry *) (inbox->buf);
+ for (j = 0; j < num_gids; gid_entry_mbox++, j++) {
+ if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw,
+ sizeof(zgid_entry)))
+ continue;
+ if (!memcmp(gid_entry_mbox->raw, gid_entry_tbl->raw,
+ sizeof(gid_entry_tbl->raw))) {
+ /* found duplicate */
+ mlx4_warn(dev, "requested gid entry for slave:%d "
+ "is a duplicate of gid at index %d\n",
+ slave, i);
+ return -EINVAL;
+ }
+ }
+ }
+
+ /* insert slave GIDs with memcpy, starting at slave's base index */
+ gid_entry_mbox = (struct mlx4_roce_gid_entry *) (inbox->buf);
+ for (i = 0, offset = base; i < num_gids; gid_entry_mbox++, offset++, i++)
+ memcpy(priv->roce_gids[port - 1][offset].raw, gid_entry_mbox->raw, 16);
+
+ /* Now, copy roce port gids table to current mailbox for passing to FW */
+ gid_entry_mbox = (struct mlx4_roce_gid_entry *) (inbox->buf);
+ for (i = 0; i < MLX4_ROCE_MAX_GIDS; gid_entry_mbox++, i++)
+ memcpy(gid_entry_mbox->raw, priv->roce_gids[port - 1][i].raw, 16);
+
+ break;
+ }
+ return mlx4_cmd(dev, inbox->dma, in_mod, op_mod,
+ MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+ }
+
+ /* For IB, we only consider:
+ * - The capability mask, which is set to the aggregate of all
+ * slave function capabilities
+ * - The QKey violatin counter - reset according to each request.
+ */
+
+ if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
+ reset_qkey_viols = (*(u8 *) inbox->buf) & 0x40;
+ new_cap_mask = ((__be32 *) inbox->buf)[2];
+ } else {
+ reset_qkey_viols = ((u8 *) inbox->buf)[3] & 0x1;
+ new_cap_mask = ((__be32 *) inbox->buf)[1];
+ }
+
+ /* slave may not set the IS_SM capability for the port */
+ if (slave != mlx4_master_func_num(dev) &&
+ (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_IS_SM))
+ return -EINVAL;
+
+ /* No DEV_MGMT in multifunc mode */
+ if (mlx4_is_mfunc(dev) &&
+ (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_DEV_MGMT_SUP))
+ return -EINVAL;
+
+ agg_cap_mask = 0;
+ slave_cap_mask =
+ priv->mfunc.master.slave_state[slave].ib_cap_mask[port];
+ priv->mfunc.master.slave_state[slave].ib_cap_mask[port] = new_cap_mask;
+ for (i = 0; i < dev->num_slaves; i++)
+ agg_cap_mask |=
+ priv->mfunc.master.slave_state[i].ib_cap_mask[port];
- if (dev->caps.port_type[port] != MLX4_PORT_TYPE_IB)
+ /* only clear mailbox for guests. Master may be setting
+ * MTU or PKEY table size
+ */
+ if (slave != dev->caps.function)
+ memset(inbox->buf, 0, 256);
+ if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
+ *(u8 *) inbox->buf |= !!reset_qkey_viols << 6;
+ ((__be32 *) inbox->buf)[2] = agg_cap_mask;
+ } else {
+ ((u8 *) inbox->buf)[3] |= !!reset_qkey_viols;
+ ((__be32 *) inbox->buf)[1] = agg_cap_mask;
+ }
+
+ err = mlx4_cmd(dev, inbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+ if (err)
+ priv->mfunc.master.slave_state[slave].ib_cap_mask[port] =
+ slave_cap_mask;
+ return err;
+}
+
+int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ return mlx4_common_set_port(dev, slave, vhcr->in_modifier,
+ vhcr->op_modifier, inbox);
+}
+
+/* bit locations for set port command with zero op modifier */
+enum {
+ MLX4_SET_PORT_VL_CAP = 4, /* bits 7:4 */
+ MLX4_SET_PORT_MTU_CAP = 12, /* bits 15:12 */
+ MLX4_CHANGE_PORT_PKEY_TBL_SZ = 20,
+ MLX4_CHANGE_PORT_VL_CAP = 21,
+ MLX4_CHANGE_PORT_MTU_CAP = 22,
+};
+
+#define CX3_PPF_DEV_ID 0x1003
+static int vl_cap_start(struct mlx4_dev *dev)
+{
+ /* for non CX3 devices, start with 4 VLs to avoid errors in syslog */
+ if (dev->pdev->device != CX3_PPF_DEV_ID)
+ return 4;
+ return 8;
+}
+
+int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err, vl_cap, pkey_tbl_flag = 0;
+ u32 in_mod;
+
+ if (dev->caps.port_type[port] == MLX4_PORT_TYPE_NONE)
return 0;
mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -342,13 +751,295 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port)
memset(mailbox->buf, 0, 256);
- if (mlx4_ib_set_4k_mtu)
- ((__be32 *) mailbox->buf)[0] |= cpu_to_be32((1 << 22) | (1 << 21) | (5 << 12) | (2 << 4));
+ if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
+ in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, 1,
+ MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ } else {
+ ((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port];
+
+ if (pkey_tbl_sz >= 0 && mlx4_is_master(dev)) {
+ pkey_tbl_flag = 1;
+ ((__be16 *) mailbox->buf)[20] = cpu_to_be16(pkey_tbl_sz);
+ }
+
+ /* IB VL CAP enum isn't used by the firmware, just numerical values */
+ for (vl_cap = vl_cap_start(dev); vl_cap >= 1; vl_cap >>= 1) {
+ ((__be32 *) mailbox->buf)[0] = cpu_to_be32(
+ (1 << MLX4_CHANGE_PORT_MTU_CAP) |
+ (1 << MLX4_CHANGE_PORT_VL_CAP) |
+ (pkey_tbl_flag << MLX4_CHANGE_PORT_PKEY_TBL_SZ) |
+ (dev->caps.port_ib_mtu[port] << MLX4_SET_PORT_MTU_CAP) |
+ (vl_cap << MLX4_SET_PORT_VL_CAP));
+ err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
+ if (err != -ENOMEM)
+ break;
+ }
+ }
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
+ u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_general_context *context;
+ int err;
+ u32 in_mod;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ context = mailbox->buf;
+ memset(context, 0, sizeof *context);
+
+ context->flags = SET_PORT_GEN_ALL_VALID;
+ context->mtu = cpu_to_be16(mtu);
+ context->pptx = (pptx * (!pfctx)) << 7;
+ context->pfctx = pfctx;
+ context->pprx = (pprx * (!pfcrx)) << 7;
+ context->pfcrx = pfcrx;
+
+ in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_general);
+
+int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
+ u8 promisc)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_rqp_calc_context *context;
+ int err;
+ u32 in_mod;
+ u32 m_promisc = (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) ?
+ MCAST_DIRECT : MCAST_DEFAULT;
+/*
+ if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0)
+ return 0;
+*/
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ context = mailbox->buf;
+ memset(context, 0, sizeof *context);
+
+ context->base_qpn = cpu_to_be32(base_qpn);
+ /*
+ * This assignment breaks vlan support - I don't know why. Probablya an A0 issue - shahar Klein
+ * context->n_mac = dev->caps.log_num_macs;
+ */
+ context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_SHIFT |
+ base_qpn);
+ context->mcast = cpu_to_be32(m_promisc << SET_PORT_MC_PROMISC_SHIFT |
+ base_qpn);
+ context->intra_no_vlan = 0;
+ context->no_vlan = MLX4_NO_VLAN_IDX;
+ context->intra_vlan_miss = 0;
+ context->vlan_miss = MLX4_VLAN_MISS_IDX;
+
+ in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_qpn_calc);
+
+int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_prio2tc_context *context;
+ int err;
+ u32 in_mod;
+ int i;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ context = mailbox->buf;
+ memset(context, 0, sizeof *context);
+
+ for (i = 0; i < MLX4_NUM_UP; i += 2)
+ context->prio2tc[i >> 1] = prio2tc[i] << 4 | prio2tc[i + 1];
+
+ in_mod = MLX4_SET_PORT_PRIO2TC << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_PRIO2TC);
+
+int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
+ u8 *pg, u16 *ratelimit)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_scheduler_context *context;
+ int err;
+ u32 in_mod;
+ int i;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ context = mailbox->buf;
+ memset(context, 0, sizeof *context);
+
+ for (i = 0; i < MLX4_NUM_TC; i++) {
+ struct mlx4_port_scheduler_tc_cfg_be *tc = &context->tc[i];
+ u16 r;
+ if (ratelimit && ratelimit[i]) {
+ if (ratelimit[i] <= MLX4_MAX_100M_UNITS_VAL) {
+ r = ratelimit[i];
+ tc->max_bw_units =
+ htons(MLX4_RATELIMIT_100M_UNITS);
+ } else {
+ r = ratelimit[i]/10;
+ tc->max_bw_units =
+ htons(MLX4_RATELIMIT_1G_UNITS);
+ }
+ tc->max_bw_value = htons(r);
+ } else {
+ tc->max_bw_value = htons(MLX4_RATELIMIT_DEFAULT);
+ tc->max_bw_units = htons(MLX4_RATELIMIT_1G_UNITS);
+ }
- ((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port];
- err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B);
+ tc->pg = htons(pg[i]);
+ tc->bw_precentage = htons(tc_tx_bw[i]);
+ }
+
+ in_mod = MLX4_SET_PORT_SCHEDULER << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
+EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER);
+
+int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err = 0;
+
+ return err;
+}
+
+int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port,
+ u64 mac, u64 clear, u8 mode)
+{
+ return mlx4_cmd(dev, (mac | (clear << 63)), port, mode,
+ MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+}
+EXPORT_SYMBOL(mlx4_SET_MCAST_FLTR);
+
+int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err = 0;
+
+ return err;
+}
+
+int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave,
+ u32 in_mod, struct mlx4_cmd_mailbox *outbox)
+{
+ return mlx4_cmd_box(dev, 0, outbox->dma, in_mod, 0,
+ MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+}
+
+int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ if (slave != dev->caps.function)
+ return 0;
+ return mlx4_common_dump_eth_stats(dev, slave,
+ vhcr->in_modifier, outbox);
+}
+
+void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap)
+{
+ if (!mlx4_is_mfunc(dev)) {
+ *stats_bitmap = 0;
+ return;
+ }
+
+ *stats_bitmap = (MLX4_STATS_TRAFFIC_COUNTERS_MASK |
+ MLX4_STATS_TRAFFIC_DROPS_MASK |
+ MLX4_STATS_PORT_COUNTERS_MASK |
+ MLX4_STATS_IF_RX_ERRORS_COUNTERS_MASK);
+
+ if (mlx4_is_master(dev))
+ *stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK;
+}
+EXPORT_SYMBOL(mlx4_set_stats_bitmap);
+
+int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i, found_ix = -1;
+ int vf_gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
+
+ if (!mlx4_is_mfunc(dev))
+ return -EINVAL;
+
+ for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
+ if (!memcmp(priv->roce_gids[port - 1][i].raw, gid, 16)) {
+ found_ix = i;
+ break;
+ }
+ }
+
+ if (found_ix >= 0) {
+ if (found_ix < MLX4_ROCE_PF_GIDS)
+ *slave_id = 0;
+ else if (found_ix < MLX4_ROCE_PF_GIDS + (vf_gids % dev->num_vfs) *
+ (vf_gids / dev->num_vfs + 1))
+ *slave_id = ((found_ix - MLX4_ROCE_PF_GIDS) /
+ (vf_gids / dev->num_vfs + 1)) + 1;
+ else
+ *slave_id =
+ ((found_ix - MLX4_ROCE_PF_GIDS -
+ ((vf_gids % dev->num_vfs) * ((vf_gids / dev->num_vfs + 1)))) /
+ (vf_gids / dev->num_vfs)) + vf_gids % dev->num_vfs + 1;
+ }
+
+ return (found_ix >= 0) ? 0 : -EINVAL;
+}
+EXPORT_SYMBOL(mlx4_get_slave_from_roce_gid);
+
+int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, u8 *gid)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (!mlx4_is_master(dev))
+ return -EINVAL;
+
+ memcpy(gid, priv->roce_gids[port - 1][slave_id].raw, 16);
+ return 0;
+}
+EXPORT_SYMBOL(mlx4_get_roce_gid_from_slave);
+
diff --git a/sys/ofed/drivers/net/mlx4/profile.c b/sys/ofed/drivers/net/mlx4/profile.c
index bd22df9..d3042f0 100644
--- a/sys/ofed/drivers/net/mlx4/profile.c
+++ b/sys/ofed/drivers/net/mlx4/profile.c
@@ -32,7 +32,7 @@
* SOFTWARE.
*/
-#include <linux/init.h>
+#include <linux/slab.h>
#include "mlx4.h"
#include "fw.h"
@@ -76,7 +76,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
u64 size;
u64 start;
int type;
- int num;
+ u32 num;
int log_num;
};
@@ -85,7 +85,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
struct mlx4_resource tmp;
int i, j;
- profile = kzalloc(MLX4_RES_NUM * sizeof *profile, GFP_KERNEL);
+ profile = kcalloc(MLX4_RES_NUM, sizeof(*profile), GFP_KERNEL);
if (!profile)
return -ENOMEM;
@@ -98,8 +98,8 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
profile[MLX4_RES_EQ].size = dev_cap->eqc_entry_sz;
profile[MLX4_RES_DMPT].size = dev_cap->dmpt_entry_sz;
profile[MLX4_RES_CMPT].size = dev_cap->cmpt_entry_sz;
- profile[MLX4_RES_MTT].size = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
- profile[MLX4_RES_MCG].size = MLX4_MGM_ENTRY_SIZE;
+ profile[MLX4_RES_MTT].size = dev_cap->mtt_entry_sz;
+ profile[MLX4_RES_MCG].size = mlx4_get_mgm_entry_size(dev);
profile[MLX4_RES_QP].num = request->num_qp;
profile[MLX4_RES_RDMARC].num = request->num_qp * request->rdmarc_per_qp;
@@ -107,12 +107,12 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
profile[MLX4_RES_AUXC].num = request->num_qp;
profile[MLX4_RES_SRQ].num = request->num_srq;
profile[MLX4_RES_CQ].num = request->num_cq;
- profile[MLX4_RES_EQ].num = min_t(unsigned, dev_cap->max_eqs,
- dev_cap->reserved_eqs +
- num_possible_cpus() + 1);
+ profile[MLX4_RES_EQ].num = mlx4_is_mfunc(dev) ?
+ dev->phys_caps.num_phys_eqs :
+ min_t(unsigned, dev_cap->max_eqs, MAX_MSIX);
profile[MLX4_RES_DMPT].num = request->num_mpt;
profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS;
- profile[MLX4_RES_MTT].num = request->num_mtt;
+ profile[MLX4_RES_MTT].num = request->num_mtt * (1 << log_mtts_per_seg);
profile[MLX4_RES_MCG].num = request->num_mcg;
for (i = 0; i < MLX4_RES_NUM; ++i) {
@@ -198,9 +198,10 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
init_hca->log_num_cqs = profile[i].log_num;
break;
case MLX4_RES_EQ:
- dev->caps.num_eqs = profile[i].num;
+ dev->caps.num_eqs = roundup_pow_of_two(min_t(unsigned, dev_cap->max_eqs,
+ MAX_MSIX));
init_hca->eqc_base = profile[i].start;
- init_hca->log_num_eqs = profile[i].log_num;
+ init_hca->log_num_eqs = ilog2(dev->caps.num_eqs);
break;
case MLX4_RES_DMPT:
dev->caps.num_mpts = profile[i].num;
@@ -212,17 +213,24 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
init_hca->cmpt_base = profile[i].start;
break;
case MLX4_RES_MTT:
- dev->caps.num_mtt_segs = profile[i].num;
+ dev->caps.num_mtts = profile[i].num;
priv->mr_table.mtt_base = profile[i].start;
init_hca->mtt_base = profile[i].start;
break;
case MLX4_RES_MCG:
- dev->caps.num_mgms = profile[i].num >> 1;
- dev->caps.num_amgms = profile[i].num >> 1;
init_hca->mc_base = profile[i].start;
- init_hca->log_mc_entry_sz = ilog2(MLX4_MGM_ENTRY_SIZE);
+ init_hca->log_mc_entry_sz =
+ ilog2(mlx4_get_mgm_entry_size(dev));
init_hca->log_mc_table_sz = profile[i].log_num;
- init_hca->log_mc_hash_sz = profile[i].log_num - 1;
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ dev->caps.num_mgms = profile[i].num;
+ } else {
+ init_hca->log_mc_hash_sz =
+ profile[i].log_num - 1;
+ dev->caps.num_mgms = profile[i].num >> 1;
+ dev->caps.num_amgms = profile[i].num >> 1;
+ }
break;
default:
break;
diff --git a/sys/ofed/drivers/net/mlx4/qp.c b/sys/ofed/drivers/net/mlx4/qp.c
index bf1c117..2386adc 100644
--- a/sys/ofed/drivers/net/mlx4/qp.c
+++ b/sys/ofed/drivers/net/mlx4/qp.c
@@ -41,6 +41,12 @@
#include "mlx4.h"
#include "icm.h"
+/*
+ * QP to support BF should have bits 6,7 cleared
+ */
+#define MLX4_BF_QP_SKIP_MASK 0xc0
+#define MLX4_MAX_BF_QP_RANGE 0x40
+
void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
{
struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
@@ -55,7 +61,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
spin_unlock(&qp_table->lock);
if (!qp) {
- mlx4_warn(dev, "Async event for bogus QP %08x\n", qpn);
+ mlx4_dbg(dev, "Async event for none existent QP %08x\n", qpn);
return;
}
@@ -65,10 +71,25 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
complete(&qp->free);
}
-int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
- enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
- struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar,
- int sqd_event, struct mlx4_qp *qp)
+/* used for INIT/CLOSE port logic */
+static int is_master_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0)
+{
+ /* this procedure is called after we already know we are on the master */
+ /* qp0 is either the proxy qp0, or the real qp0 */
+ u32 pf_proxy_offset = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev);
+ *proxy_qp0 = qp->qpn >= pf_proxy_offset && qp->qpn <= pf_proxy_offset + 1;
+
+ *real_qp0 = qp->qpn >= dev->phys_caps.base_sqpn &&
+ qp->qpn <= dev->phys_caps.base_sqpn + 1;
+
+ return *real_qp0 || *proxy_qp0;
+}
+
+static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
+ struct mlx4_qp_context *context,
+ enum mlx4_qp_optpar optpar,
+ int sqd_event, struct mlx4_qp *qp, int native)
{
static const u16 op[MLX4_QP_NUM_STATE][MLX4_QP_NUM_STATE] = {
[MLX4_QP_STATE_RST] = {
@@ -110,16 +131,31 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
}
};
+ struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox;
int ret = 0;
+ int real_qp0 = 0;
+ int proxy_qp0 = 0;
+ u8 port;
if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE ||
!op[cur_state][new_state])
return -EINVAL;
- if (op[cur_state][new_state] == MLX4_CMD_2RST_QP)
- return mlx4_cmd(dev, 0, qp->qpn, 2,
- MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A);
+ if (op[cur_state][new_state] == MLX4_CMD_2RST_QP) {
+ ret = mlx4_cmd(dev, 0, qp->qpn, 2,
+ MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native);
+ if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR &&
+ cur_state != MLX4_QP_STATE_RST &&
+ is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
+ port = (qp->qpn & 1) + 1;
+ if (proxy_qp0)
+ priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
+ else
+ priv->mfunc.master.qp0_state[port].qp0_active = 0;
+ }
+ return ret;
+ }
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
@@ -138,115 +174,230 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
((struct mlx4_qp_context *) (mailbox->buf + 8))->local_qpn =
cpu_to_be32(qp->qpn);
- ret = mlx4_cmd(dev, mailbox->dma, qp->qpn | (!!sqd_event << 31),
+ ret = mlx4_cmd(dev, mailbox->dma,
+ qp->qpn | (!!sqd_event << 31),
new_state == MLX4_QP_STATE_RST ? 2 : 0,
- op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C);
+ op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native);
+
+ if (mlx4_is_master(dev) && is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
+ port = (qp->qpn & 1) + 1;
+ if (cur_state != MLX4_QP_STATE_ERR &&
+ cur_state != MLX4_QP_STATE_RST &&
+ new_state == MLX4_QP_STATE_ERR) {
+ if (proxy_qp0)
+ priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
+ else
+ priv->mfunc.master.qp0_state[port].qp0_active = 0;
+ } else if (new_state == MLX4_QP_STATE_RTR) {
+ if (proxy_qp0)
+ priv->mfunc.master.qp0_state[port].proxy_qp0_active = 1;
+ else
+ priv->mfunc.master.qp0_state[port].qp0_active = 1;
+ }
+ }
mlx4_free_cmd_mailbox(dev, mailbox);
return ret;
}
+
+int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
+ struct mlx4_qp_context *context,
+ enum mlx4_qp_optpar optpar,
+ int sqd_event, struct mlx4_qp *qp)
+{
+ return __mlx4_qp_modify(dev, mtt, cur_state, new_state, context,
+ optpar, sqd_event, qp, 0);
+}
EXPORT_SYMBOL_GPL(mlx4_qp_modify);
-int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base)
+int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
+ int *base, u8 bf_qp)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_qp_table *qp_table = &priv->qp_table;
- int qpn;
- qpn = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align);
- if (qpn == -1)
+ if (cnt > MLX4_MAX_BF_QP_RANGE && bf_qp)
+ return -ENOMEM;
+
+ *base = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align,
+ bf_qp ? MLX4_BF_QP_SKIP_MASK : 0);
+ if (*base == -1)
return -ENOMEM;
- *base = qpn;
return 0;
}
+
+int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
+ int *base, u8 bf_qp)
+{
+ u64 in_param = 0;
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, (((!!bf_qp) << 31) | (u32)cnt));
+ set_param_h(&in_param, align);
+ err = mlx4_cmd_imm(dev, in_param, &out_param,
+ RES_QP, RES_OP_RESERVE,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ return err;
+
+ *base = get_param_l(&out_param);
+ return 0;
+ }
+ return __mlx4_qp_reserve_range(dev, cnt, align, base, bf_qp);
+}
EXPORT_SYMBOL_GPL(mlx4_qp_reserve_range);
-void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
+void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_qp_table *qp_table = &priv->qp_table;
- if (base_qpn < dev->caps.sqp_start + 8)
- return;
+ if (mlx4_is_qp_reserved(dev, (u32) base_qpn))
+ return;
mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt);
}
+
+void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
+{
+ u64 in_param = 0;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, base_qpn);
+ set_param_h(&in_param, cnt);
+ err = mlx4_cmd(dev, in_param, RES_QP, RES_OP_RESERVE,
+ MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err) {
+ mlx4_warn(dev, "Failed to release qp range"
+ " base:%d cnt:%d\n", base_qpn, cnt);
+ }
+ } else
+ __mlx4_qp_release_range(dev, base_qpn, cnt);
+}
EXPORT_SYMBOL_GPL(mlx4_qp_release_range);
-int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_qp_table *qp_table = &priv->qp_table;
int err;
- if (!qpn)
- return -EINVAL;
-
- qp->qpn = qpn;
-
- err = mlx4_table_get(dev, &qp_table->qp_table, qp->qpn);
+ err = mlx4_table_get(dev, &qp_table->qp_table, qpn);
if (err)
goto err_out;
- err = mlx4_table_get(dev, &qp_table->auxc_table, qp->qpn);
+ err = mlx4_table_get(dev, &qp_table->auxc_table, qpn);
if (err)
goto err_put_qp;
- err = mlx4_table_get(dev, &qp_table->altc_table, qp->qpn);
+ err = mlx4_table_get(dev, &qp_table->altc_table, qpn);
if (err)
goto err_put_auxc;
- err = mlx4_table_get(dev, &qp_table->rdmarc_table, qp->qpn);
+ err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn);
if (err)
goto err_put_altc;
- err = mlx4_table_get(dev, &qp_table->cmpt_table, qp->qpn);
+ err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn);
if (err)
goto err_put_rdmarc;
- spin_lock_irq(&qp_table->lock);
- err = radix_tree_insert(&dev->qp_table_tree, qp->qpn & (dev->caps.num_qps - 1), qp);
- spin_unlock_irq(&qp_table->lock);
- if (err)
- goto err_put_cmpt;
-
- atomic_set(&qp->refcount, 1);
- init_completion(&qp->free);
-
return 0;
-err_put_cmpt:
- mlx4_table_put(dev, &qp_table->cmpt_table, qp->qpn);
-
err_put_rdmarc:
- mlx4_table_put(dev, &qp_table->rdmarc_table, qp->qpn);
+ mlx4_table_put(dev, &qp_table->rdmarc_table, qpn);
err_put_altc:
- mlx4_table_put(dev, &qp_table->altc_table, qp->qpn);
+ mlx4_table_put(dev, &qp_table->altc_table, qpn);
err_put_auxc:
- mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn);
+ mlx4_table_put(dev, &qp_table->auxc_table, qpn);
err_put_qp:
- mlx4_table_put(dev, &qp_table->qp_table, qp->qpn);
+ mlx4_table_put(dev, &qp_table->qp_table, qpn);
err_out:
return err;
}
-EXPORT_SYMBOL_GPL(mlx4_qp_alloc);
-struct mlx4_qp *mlx4_qp_lookup_lock(struct mlx4_dev *dev, u32 qpn)
+static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
{
- struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
- unsigned long flags;
- struct mlx4_qp *qp;
+ u64 param = 0;
- spin_lock_irqsave(&qp_table->lock, flags);
- qp = radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1));
- spin_unlock_irqrestore(&qp_table->lock, flags);
- return qp;
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&param, qpn);
+ return mlx4_cmd_imm(dev, param, &param, RES_QP, RES_OP_MAP_ICM,
+ MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ }
+ return __mlx4_qp_alloc_icm(dev, qpn);
+}
+
+void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_qp_table *qp_table = &priv->qp_table;
+
+ mlx4_table_put(dev, &qp_table->cmpt_table, qpn);
+ mlx4_table_put(dev, &qp_table->rdmarc_table, qpn);
+ mlx4_table_put(dev, &qp_table->altc_table, qpn);
+ mlx4_table_put(dev, &qp_table->auxc_table, qpn);
+ mlx4_table_put(dev, &qp_table->qp_table, qpn);
}
-EXPORT_SYMBOL_GPL(mlx4_qp_lookup_lock);
+
+static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
+{
+ u64 in_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, qpn);
+ if (mlx4_cmd(dev, in_param, RES_QP, RES_OP_MAP_ICM,
+ MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED))
+ mlx4_warn(dev, "Failed to free icm of qp:%d\n", qpn);
+ } else
+ __mlx4_qp_free_icm(dev, qpn);
+}
+
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_qp_table *qp_table = &priv->qp_table;
+ int err;
+
+ if (!qpn)
+ return -EINVAL;
+
+ qp->qpn = qpn;
+
+ err = mlx4_qp_alloc_icm(dev, qpn);
+ if (err)
+ return err;
+
+ spin_lock_irq(&qp_table->lock);
+ err = radix_tree_insert(&dev->qp_table_tree, qp->qpn &
+ (dev->caps.num_qps - 1), qp);
+ spin_unlock_irq(&qp_table->lock);
+ if (err)
+ goto err_icm;
+
+ atomic_set(&qp->refcount, 1);
+ init_completion(&qp->free);
+
+ return 0;
+
+err_icm:
+ mlx4_qp_free_icm(dev, qpn);
+ return err;
+}
+
+EXPORT_SYMBOL_GPL(mlx4_qp_alloc);
void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp)
{
@@ -261,25 +412,18 @@ EXPORT_SYMBOL_GPL(mlx4_qp_remove);
void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp)
{
- struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
-
if (atomic_dec_and_test(&qp->refcount))
complete(&qp->free);
wait_for_completion(&qp->free);
- mlx4_table_put(dev, &qp_table->cmpt_table, qp->qpn);
- mlx4_table_put(dev, &qp_table->rdmarc_table, qp->qpn);
- mlx4_table_put(dev, &qp_table->altc_table, qp->qpn);
- mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn);
- mlx4_table_put(dev, &qp_table->qp_table, qp->qpn);
+ mlx4_qp_free_icm(dev, qp->qpn);
}
EXPORT_SYMBOL_GPL(mlx4_qp_free);
static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn)
{
- return mlx4_cmd(dev, 0, base_qpn,
- (dev->caps.flags & MLX4_DEV_CAP_FLAG_RAW_ETY) ? 4 : 0,
- MLX4_CMD_CONF_SPECIAL_QP, MLX4_CMD_TIME_CLASS_B);
+ return mlx4_cmd(dev, 0, base_qpn, 0, MLX4_CMD_CONF_SPECIAL_QP,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
}
int mlx4_init_qp_table(struct mlx4_dev *dev)
@@ -287,18 +431,23 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
int err;
int reserved_from_top = 0;
+ int reserved_from_bot;
+ int k;
spin_lock_init(&qp_table->lock);
INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
+ if (mlx4_is_slave(dev))
+ return 0;
/*
* We reserve 2 extra QPs per port for the special QPs. The
* block of special QPs must be aligned to a multiple of 8, so
* round up.
+ *
* We also reserve the MSB of the 24-bit QP number to indicate
- * an XRC qp.
+ * that a QP is an XRC QP.
*/
- dev->caps.sqp_start =
+ dev->phys_caps.base_sqpn =
ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8);
{
@@ -329,34 +478,82 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
}
+ /* Reserve 8 real SQPs in both native and SRIOV modes.
+ * In addition, in SRIOV mode, reserve 8 proxy SQPs per function
+ * (for all PFs and VFs), and 8 corresponding tunnel QPs.
+ * Each proxy SQP works opposite its own tunnel QP.
+ *
+ * The QPs are arranged as follows:
+ * a. 8 real SQPs
+ * b. All the proxy SQPs (8 per function)
+ * c. All the tunnel QPs (8 per function)
+ */
+ reserved_from_bot = mlx4_num_reserved_sqps(dev);
+ if (reserved_from_bot + reserved_from_top > dev->caps.num_qps) {
+ mlx4_err(dev, "Number of reserved QPs is higher than number "
+ "of QPs, increase the value of log_num_qp\n");
+ return -EINVAL;
+ }
+
err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps,
- (1 << 23) - 1, dev->caps.sqp_start + 8,
+ (1 << 23) - 1, reserved_from_bot,
reserved_from_top);
if (err)
return err;
- return mlx4_CONF_SPECIAL_QP(dev, dev->caps.sqp_start);
+ if (mlx4_is_mfunc(dev)) {
+ /* for PPF use */
+ dev->phys_caps.base_proxy_sqpn = dev->phys_caps.base_sqpn + 8;
+ dev->phys_caps.base_tunnel_sqpn = dev->phys_caps.base_sqpn + 8 + 8 * MLX4_MFUNC_MAX;
+
+ /* In mfunc, calculate proxy and tunnel qp offsets for the PF here,
+ * since the PF does not call mlx4_slave_caps */
+ dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+ dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+ dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+ dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+
+ if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
+ !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
+ err = -ENOMEM;
+ goto err_mem;
+ }
+
+ for (k = 0; k < dev->caps.num_ports; k++) {
+ dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn +
+ 8 * mlx4_master_func_num(dev) + k;
+ dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX;
+ dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn +
+ 8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k;
+ dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX;
+ }
+ }
+
+
+ err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn);
+ if (err)
+ goto err_mem;
+ return 0;
+
+err_mem:
+ kfree(dev->caps.qp0_tunnel);
+ kfree(dev->caps.qp0_proxy);
+ kfree(dev->caps.qp1_tunnel);
+ kfree(dev->caps.qp1_proxy);
+ dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
+ dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
+ return err;
}
void mlx4_cleanup_qp_table(struct mlx4_dev *dev)
{
+ if (mlx4_is_slave(dev))
+ return;
+
mlx4_CONF_SPECIAL_QP(dev, 0);
mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap);
}
-int mlx4_qp_get_region(struct mlx4_dev *dev, enum mlx4_qp_region region,
- int *base_qpn, int *cnt)
-{
- if ((region < 0) || (region >= MLX4_NUM_QP_REGION))
- return -EINVAL;
-
- *base_qpn = dev->caps.reserved_qps_base[region];
- *cnt = dev->caps.reserved_qps_cnt[region];
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(mlx4_qp_get_region);
-
int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
struct mlx4_qp_context *context)
{
@@ -368,7 +565,8 @@ int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
return PTR_ERR(mailbox);
err = mlx4_cmd_box(dev, 0, mailbox->dma, qp->qpn, 0,
- MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
if (!err)
memcpy(context, mailbox->buf + 8, sizeof *context);
diff --git a/sys/ofed/drivers/net/mlx4/reset.c b/sys/ofed/drivers/net/mlx4/reset.c
index 3951b88..d8d796a 100644
--- a/sys/ofed/drivers/net/mlx4/reset.c
+++ b/sys/ofed/drivers/net/mlx4/reset.c
@@ -121,7 +121,7 @@ int mlx4_reset(struct mlx4_dev *dev)
iounmap(reset);
/* Docs say to wait one second before accessing device */
- msleep(1000);
+ msleep(2000);
end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES;
do {
@@ -139,11 +139,12 @@ int mlx4_reset(struct mlx4_dev *dev)
goto out;
}
+
/* Now restore the PCI headers */
if (pcie_cap) {
devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4];
if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_DEVCTL,
- devctl)) {
+ devctl)) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA PCI Express "
"Device Control register, aborting.\n");
@@ -151,7 +152,7 @@ int mlx4_reset(struct mlx4_dev *dev)
}
linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4];
if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_LNKCTL,
- linkctl)) {
+ linkctl)) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA PCI Express "
"Link control register, aborting.\n");
diff --git a/sys/ofed/drivers/net/mlx4/resource_tracker.c b/sys/ofed/drivers/net/mlx4/resource_tracker.c
new file mode 100644
index 0000000..aa101cd
--- /dev/null
+++ b/sys/ofed/drivers/net/mlx4/resource_tracker.c
@@ -0,0 +1,4315 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies.
+ * All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/qp.h>
+#include <linux/if_ether.h>
+#include <linux/compat.h>
+
+#include "mlx4.h"
+#include "fw.h"
+
+#define MLX4_MAC_VALID (1ull << 63)
+
+struct mac_res {
+ struct list_head list;
+ u64 mac;
+ int ref_count;
+ u8 smac_index;
+ u8 port;
+};
+
+struct vlan_res {
+ struct list_head list;
+ u16 vlan;
+ int ref_count;
+ int vlan_index;
+ u8 port;
+};
+
+struct res_common {
+ struct list_head list;
+ struct rb_node node;
+ u64 res_id;
+ int owner;
+ int state;
+ int from_state;
+ int to_state;
+ int removing;
+};
+
+enum {
+ RES_ANY_BUSY = 1
+};
+
+struct res_gid {
+ struct list_head list;
+ u8 gid[16];
+ enum mlx4_protocol prot;
+ enum mlx4_steer_type steer;
+};
+
+enum res_qp_states {
+ RES_QP_BUSY = RES_ANY_BUSY,
+
+ /* QP number was allocated */
+ RES_QP_RESERVED,
+
+ /* ICM memory for QP context was mapped */
+ RES_QP_MAPPED,
+
+ /* QP is in hw ownership */
+ RES_QP_HW
+};
+
+struct res_qp {
+ struct res_common com;
+ struct res_mtt *mtt;
+ struct res_cq *rcq;
+ struct res_cq *scq;
+ struct res_srq *srq;
+ struct list_head mcg_list;
+ spinlock_t mcg_spl;
+ int local_qpn;
+};
+
+enum res_mtt_states {
+ RES_MTT_BUSY = RES_ANY_BUSY,
+ RES_MTT_ALLOCATED,
+};
+
+static inline const char *mtt_states_str(enum res_mtt_states state)
+{
+ switch (state) {
+ case RES_MTT_BUSY: return "RES_MTT_BUSY";
+ case RES_MTT_ALLOCATED: return "RES_MTT_ALLOCATED";
+ default: return "Unknown";
+ }
+}
+
+struct res_mtt {
+ struct res_common com;
+ int order;
+ atomic_t ref_count;
+};
+
+enum res_mpt_states {
+ RES_MPT_BUSY = RES_ANY_BUSY,
+ RES_MPT_RESERVED,
+ RES_MPT_MAPPED,
+ RES_MPT_HW,
+};
+
+struct res_mpt {
+ struct res_common com;
+ struct res_mtt *mtt;
+ int key;
+};
+
+enum res_eq_states {
+ RES_EQ_BUSY = RES_ANY_BUSY,
+ RES_EQ_RESERVED,
+ RES_EQ_HW,
+};
+
+struct res_eq {
+ struct res_common com;
+ struct res_mtt *mtt;
+};
+
+enum res_cq_states {
+ RES_CQ_BUSY = RES_ANY_BUSY,
+ RES_CQ_ALLOCATED,
+ RES_CQ_HW,
+};
+
+struct res_cq {
+ struct res_common com;
+ struct res_mtt *mtt;
+ atomic_t ref_count;
+};
+
+enum res_srq_states {
+ RES_SRQ_BUSY = RES_ANY_BUSY,
+ RES_SRQ_ALLOCATED,
+ RES_SRQ_HW,
+};
+
+struct res_srq {
+ struct res_common com;
+ struct res_mtt *mtt;
+ struct res_cq *cq;
+ atomic_t ref_count;
+};
+
+enum res_counter_states {
+ RES_COUNTER_BUSY = RES_ANY_BUSY,
+ RES_COUNTER_ALLOCATED,
+};
+
+struct res_counter {
+ struct res_common com;
+ int port;
+};
+
+enum res_xrcdn_states {
+ RES_XRCD_BUSY = RES_ANY_BUSY,
+ RES_XRCD_ALLOCATED,
+};
+
+struct res_xrcdn {
+ struct res_common com;
+ int port;
+};
+
+enum res_fs_rule_states {
+ RES_FS_RULE_BUSY = RES_ANY_BUSY,
+ RES_FS_RULE_ALLOCATED,
+};
+
+struct res_fs_rule {
+ struct res_common com;
+};
+
+static int mlx4_is_eth(struct mlx4_dev *dev, int port)
+{
+ return dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB ? 0 : 1;
+}
+
+static void *res_tracker_lookup(struct rb_root *root, u64 res_id)
+{
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct res_common *res = container_of(node, struct res_common,
+ node);
+
+ if (res_id < res->res_id)
+ node = node->rb_left;
+ else if (res_id > res->res_id)
+ node = node->rb_right;
+ else
+ return res;
+ }
+ return NULL;
+}
+
+static int res_tracker_insert(struct rb_root *root, struct res_common *res)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct res_common *this = container_of(*new, struct res_common,
+ node);
+
+ parent = *new;
+ if (res->res_id < this->res_id)
+ new = &((*new)->rb_left);
+ else if (res->res_id > this->res_id)
+ new = &((*new)->rb_right);
+ else
+ return -EEXIST;
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&res->node, parent, new);
+ rb_insert_color(&res->node, root);
+
+ return 0;
+}
+
+enum qp_transition {
+ QP_TRANS_INIT2RTR,
+ QP_TRANS_RTR2RTS,
+ QP_TRANS_RTS2RTS,
+ QP_TRANS_SQERR2RTS,
+ QP_TRANS_SQD2SQD,
+ QP_TRANS_SQD2RTS
+};
+
+/* For Debug uses */
+static const char *ResourceType(enum mlx4_resource rt)
+{
+ switch (rt) {
+ case RES_QP: return "RES_QP";
+ case RES_CQ: return "RES_CQ";
+ case RES_SRQ: return "RES_SRQ";
+ case RES_MPT: return "RES_MPT";
+ case RES_MTT: return "RES_MTT";
+ case RES_MAC: return "RES_MAC";
+ case RES_VLAN: return "RES_VLAN";
+ case RES_EQ: return "RES_EQ";
+ case RES_COUNTER: return "RES_COUNTER";
+ case RES_FS_RULE: return "RES_FS_RULE";
+ case RES_XRCD: return "RES_XRCD";
+ default: return "Unknown resource type !!!";
+ };
+}
+
+static void rem_slave_vlans(struct mlx4_dev *dev, int slave);
+static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource res_type, int count,
+ int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct resource_allocator *res_alloc =
+ &priv->mfunc.master.res_tracker.res_alloc[res_type];
+ int err = -EINVAL;
+ int allocated, free, reserved, guaranteed, from_free;
+
+ spin_lock(&res_alloc->alloc_lock);
+ allocated = (port > 0) ?
+ res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] :
+ res_alloc->allocated[slave];
+ free = (port > 0) ? res_alloc->res_port_free[port - 1] :
+ res_alloc->res_free;
+ reserved = (port > 0) ? res_alloc->res_port_rsvd[port - 1] :
+ res_alloc->res_reserved;
+ guaranteed = res_alloc->guaranteed[slave];
+
+ if (allocated + count > res_alloc->quota[slave])
+ goto out;
+
+ if (allocated + count <= guaranteed) {
+ err = 0;
+ } else {
+ /* portion may need to be obtained from free area */
+ if (guaranteed - allocated > 0)
+ from_free = count - (guaranteed - allocated);
+ else
+ from_free = count;
+
+ if (free - from_free > reserved)
+ err = 0;
+ }
+
+ if (!err) {
+ /* grant the request */
+ if (port > 0) {
+ res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] += count;
+ res_alloc->res_port_free[port - 1] -= count;
+ } else {
+ res_alloc->allocated[slave] += count;
+ res_alloc->res_free -= count;
+ }
+ }
+
+out:
+ spin_unlock(&res_alloc->alloc_lock);
+ return err;
+
+}
+
+static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource res_type, int count,
+ int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct resource_allocator *res_alloc =
+ &priv->mfunc.master.res_tracker.res_alloc[res_type];
+
+ spin_lock(&res_alloc->alloc_lock);
+ if (port > 0) {
+ res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] -= count;
+ res_alloc->res_port_free[port - 1] += count;
+ } else {
+ res_alloc->allocated[slave] -= count;
+ res_alloc->res_free += count;
+ }
+
+ spin_unlock(&res_alloc->alloc_lock);
+ return;
+}
+
+static inline void initialize_res_quotas(struct mlx4_dev *dev,
+ struct resource_allocator *res_alloc,
+ enum mlx4_resource res_type,
+ int vf, int num_instances)
+{
+ res_alloc->guaranteed[vf] = num_instances / (2 * (dev->num_vfs + 1));
+ res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf];
+ if (vf == mlx4_master_func_num(dev)) {
+ res_alloc->res_free = num_instances;
+ if (res_type == RES_MTT) {
+ /* reserved mtts will be taken out of the PF allocation */
+ res_alloc->res_free += dev->caps.reserved_mtts;
+ res_alloc->guaranteed[vf] += dev->caps.reserved_mtts;
+ res_alloc->quota[vf] += dev->caps.reserved_mtts;
+ }
+ }
+}
+
+void mlx4_init_quotas(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int pf;
+
+ /* quotas for VFs are initialized in mlx4_slave_cap */
+ if (mlx4_is_slave(dev))
+ return;
+
+ if (!mlx4_is_mfunc(dev)) {
+ dev->quotas.qp = dev->caps.num_qps - dev->caps.reserved_qps -
+ mlx4_num_reserved_sqps(dev);
+ dev->quotas.cq = dev->caps.num_cqs - dev->caps.reserved_cqs;
+ dev->quotas.srq = dev->caps.num_srqs - dev->caps.reserved_srqs;
+ dev->quotas.mtt = dev->caps.num_mtts - dev->caps.reserved_mtts;
+ dev->quotas.mpt = dev->caps.num_mpts - dev->caps.reserved_mrws;
+ return;
+ }
+
+ pf = mlx4_master_func_num(dev);
+ dev->quotas.qp =
+ priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[pf];
+ dev->quotas.cq =
+ priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[pf];
+ dev->quotas.srq =
+ priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[pf];
+ dev->quotas.mtt =
+ priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[pf];
+ dev->quotas.mpt =
+ priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf];
+}
+int mlx4_init_resource_tracker(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i, j;
+ int t;
+
+ priv->mfunc.master.res_tracker.slave_list =
+ kzalloc(dev->num_slaves * sizeof(struct slave_list),
+ GFP_KERNEL);
+ if (!priv->mfunc.master.res_tracker.slave_list)
+ return -ENOMEM;
+
+ for (i = 0 ; i < dev->num_slaves; i++) {
+ for (t = 0; t < MLX4_NUM_OF_RESOURCE_TYPE; ++t)
+ INIT_LIST_HEAD(&priv->mfunc.master.res_tracker.
+ slave_list[i].res_list[t]);
+ mutex_init(&priv->mfunc.master.res_tracker.slave_list[i].mutex);
+ }
+
+ mlx4_dbg(dev, "Started init_resource_tracker: %ld slaves\n",
+ dev->num_slaves);
+ for (i = 0 ; i < MLX4_NUM_OF_RESOURCE_TYPE; i++)
+ priv->mfunc.master.res_tracker.res_tree[i] = RB_ROOT;
+
+ for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
+ struct resource_allocator *res_alloc =
+ &priv->mfunc.master.res_tracker.res_alloc[i];
+ res_alloc->quota = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
+ res_alloc->guaranteed = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
+ if (i == RES_MAC || i == RES_VLAN)
+ res_alloc->allocated = kzalloc(MLX4_MAX_PORTS *
+ (dev->num_vfs + 1) * sizeof(int),
+ GFP_KERNEL);
+ else
+ res_alloc->allocated = kzalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
+
+ if (!res_alloc->quota || !res_alloc->guaranteed ||
+ !res_alloc->allocated)
+ goto no_mem_err;
+
+ spin_lock_init(&res_alloc->alloc_lock);
+ for (t = 0; t < dev->num_vfs + 1; t++) {
+ switch (i) {
+ case RES_QP:
+ initialize_res_quotas(dev, res_alloc, RES_QP,
+ t, dev->caps.num_qps -
+ dev->caps.reserved_qps -
+ mlx4_num_reserved_sqps(dev));
+ break;
+ case RES_CQ:
+ initialize_res_quotas(dev, res_alloc, RES_CQ,
+ t, dev->caps.num_cqs -
+ dev->caps.reserved_cqs);
+ break;
+ case RES_SRQ:
+ initialize_res_quotas(dev, res_alloc, RES_SRQ,
+ t, dev->caps.num_srqs -
+ dev->caps.reserved_srqs);
+ break;
+ case RES_MPT:
+ initialize_res_quotas(dev, res_alloc, RES_MPT,
+ t, dev->caps.num_mpts -
+ dev->caps.reserved_mrws);
+ break;
+ case RES_MTT:
+ initialize_res_quotas(dev, res_alloc, RES_MTT,
+ t, dev->caps.num_mtts -
+ dev->caps.reserved_mtts);
+ break;
+ case RES_MAC:
+ if (t == mlx4_master_func_num(dev)) {
+ res_alloc->quota[t] =
+ MLX4_MAX_MAC_NUM - 2 * dev->num_vfs;
+ res_alloc->guaranteed[t] = res_alloc->quota[t];
+ for (j = 0; j < MLX4_MAX_PORTS; j++)
+ res_alloc->res_port_free[j] = MLX4_MAX_MAC_NUM;
+ } else {
+ res_alloc->quota[t] = 2;
+ res_alloc->guaranteed[t] = 2;
+ }
+ break;
+ case RES_VLAN:
+ if (t == mlx4_master_func_num(dev)) {
+ res_alloc->quota[t] = MLX4_MAX_VLAN_NUM;
+ res_alloc->guaranteed[t] = MLX4_MAX_VLAN_NUM / 2;
+ for (j = 0; j < MLX4_MAX_PORTS; j++)
+ res_alloc->res_port_free[j] =
+ res_alloc->quota[t];
+ } else {
+ res_alloc->quota[t] = MLX4_MAX_VLAN_NUM / 2;
+ res_alloc->guaranteed[t] = 0;
+ }
+ break;
+ case RES_COUNTER:
+ res_alloc->quota[t] = dev->caps.max_counters;
+ res_alloc->guaranteed[t] = 0;
+ if (t == mlx4_master_func_num(dev))
+ res_alloc->res_free = res_alloc->quota[t];
+ break;
+ default:
+ break;
+ }
+ if (i == RES_MAC || i == RES_VLAN) {
+ for (j = 0; j < MLX4_MAX_PORTS; j++)
+ res_alloc->res_port_rsvd[j] +=
+ res_alloc->guaranteed[t];
+ } else {
+ res_alloc->res_reserved += res_alloc->guaranteed[t];
+ }
+ }
+ }
+ spin_lock_init(&priv->mfunc.master.res_tracker.lock);
+ return 0;
+
+no_mem_err:
+ for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated);
+ priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL;
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed);
+ priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL;
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota);
+ priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL;
+ }
+ return -ENOMEM;
+}
+
+void mlx4_free_resource_tracker(struct mlx4_dev *dev,
+ enum mlx4_res_tracker_free_type type)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i;
+
+ if (priv->mfunc.master.res_tracker.slave_list) {
+ if (type != RES_TR_FREE_STRUCTS_ONLY) {
+ for (i = 0; i < dev->num_slaves; i++) {
+ if (type == RES_TR_FREE_ALL ||
+ dev->caps.function != i)
+ mlx4_delete_all_resources_for_slave(dev, i);
+ }
+ /* free master's vlans */
+ i = dev->caps.function;
+ mutex_lock(&priv->mfunc.master.res_tracker.slave_list[i].mutex);
+ rem_slave_vlans(dev, i);
+ mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[i].mutex);
+ }
+
+ if (type != RES_TR_FREE_SLAVES_ONLY) {
+ for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated);
+ priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL;
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed);
+ priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL;
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota);
+ priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL;
+ }
+ kfree(priv->mfunc.master.res_tracker.slave_list);
+ priv->mfunc.master.res_tracker.slave_list = NULL;
+ }
+ }
+}
+
+static void update_pkey_index(struct mlx4_dev *dev, int slave,
+ struct mlx4_cmd_mailbox *inbox)
+{
+ u8 sched = *(u8 *)(inbox->buf + 64);
+ u8 orig_index = *(u8 *)(inbox->buf + 35);
+ u8 new_index;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int port;
+
+ port = (sched >> 6 & 1) + 1;
+
+ new_index = priv->virt2phys_pkey[slave][port - 1][orig_index];
+ *(u8 *)(inbox->buf + 35) = new_index;
+}
+
+static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox,
+ u8 slave)
+{
+ struct mlx4_qp_context *qp_ctx = inbox->buf + 8;
+ enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *) inbox->buf);
+ u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
+ int port;
+
+ if (MLX4_QP_ST_UD == ts) {
+ port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+ if (mlx4_is_eth(dev, port))
+ qp_ctx->pri_path.mgid_index = mlx4_get_base_gid_ix(dev, slave) | 0x80;
+ else
+ qp_ctx->pri_path.mgid_index = 0x80 | slave;
+
+ } else if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_UC == ts) {
+ if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) {
+ port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+ if (mlx4_is_eth(dev, port)) {
+ qp_ctx->pri_path.mgid_index += mlx4_get_base_gid_ix(dev, slave);
+ qp_ctx->pri_path.mgid_index &= 0x7f;
+ } else {
+ qp_ctx->pri_path.mgid_index = slave & 0x7F;
+ }
+ }
+ if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) {
+ port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1;
+ if (mlx4_is_eth(dev, port)) {
+ qp_ctx->alt_path.mgid_index += mlx4_get_base_gid_ix(dev, slave);
+ qp_ctx->alt_path.mgid_index &= 0x7f;
+ } else {
+ qp_ctx->alt_path.mgid_index = slave & 0x7F;
+ }
+ }
+ }
+}
+
+static int update_vport_qp_param(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *inbox,
+ u8 slave)
+{
+ struct mlx4_qp_context *qpc = inbox->buf + 8;
+ struct mlx4_vport_oper_state *vp_oper;
+ struct mlx4_priv *priv;
+ u32 qp_type;
+ int port;
+
+ port = (qpc->pri_path.sched_queue & 0x40) ? 2 : 1;
+ priv = mlx4_priv(dev);
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+
+ if (MLX4_VGT != vp_oper->state.default_vlan) {
+ qp_type = (be32_to_cpu(qpc->flags) >> 16) & 0xff;
+ if (MLX4_QP_ST_RC == qp_type)
+ return -EINVAL;
+
+ qpc->srqn |= cpu_to_be32(1 << 25); /*set cqe vlan mask */
+ qpc->pri_path.vlan_index = vp_oper->vlan_idx;
+ qpc->pri_path.fl = 1 << 6; /* set cv bit*/
+ qpc->pri_path.feup |= 1 << 3; /* set fvl bit */
+ qpc->pri_path.sched_queue &= 0xC7;
+ qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3;
+ mlx4_dbg(dev, "qp %d port %d Q 0x%x set vlan to %d vidx %d feup %x fl %x\n",
+ be32_to_cpu(qpc->local_qpn) & 0xffffff, port,
+ (int)(qpc->pri_path.sched_queue), vp_oper->state.default_vlan,
+ vp_oper->vlan_idx, (int)(qpc->pri_path.feup),
+ (int)(qpc->pri_path.fl));
+ }
+ if (vp_oper->state.spoofchk) {
+ qpc->pri_path.feup |= 1 << 5; /* set fsm bit */;
+ qpc->pri_path.grh_mylmc = (0x80 & qpc->pri_path.grh_mylmc) + vp_oper->mac_idx;
+ mlx4_dbg(dev, "spoof qp %d port %d feup 0x%x, myLmc 0x%x mindx %d\n",
+ be32_to_cpu(qpc->local_qpn) & 0xffffff, port,
+ (int)qpc->pri_path.feup, (int)qpc->pri_path.grh_mylmc,
+ vp_oper->mac_idx);
+ }
+ return 0;
+}
+
+static int mpt_mask(struct mlx4_dev *dev)
+{
+ return dev->caps.num_mpts - 1;
+}
+
+static void *find_res(struct mlx4_dev *dev, int res_id,
+ enum mlx4_resource type)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ return res_tracker_lookup(&priv->mfunc.master.res_tracker.res_tree[type],
+ res_id);
+}
+
+static int get_res(struct mlx4_dev *dev, int slave, u64 res_id,
+ enum mlx4_resource type,
+ void *res)
+{
+ struct res_common *r;
+ int err = 0;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = find_res(dev, res_id, type);
+ if (!r) {
+ err = -ENOENT;
+ goto exit;
+ }
+
+ if (r->state == RES_ANY_BUSY) {
+ err = -EBUSY;
+ goto exit;
+ }
+
+ if (r->owner != slave) {
+ err = -EPERM;
+ goto exit;
+ }
+
+ r->from_state = r->state;
+ r->state = RES_ANY_BUSY;
+
+ if (res)
+ *((struct res_common **)res) = r;
+
+exit:
+ spin_unlock_irq(mlx4_tlock(dev));
+ return err;
+}
+
+int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev,
+ enum mlx4_resource type,
+ u64 res_id, int *slave)
+{
+
+ struct res_common *r;
+ int err = -ENOENT;
+ int id = res_id;
+
+ if (type == RES_QP)
+ id &= 0x7fffff;
+ spin_lock(mlx4_tlock(dev));
+
+ r = find_res(dev, id, type);
+ if (r) {
+ *slave = r->owner;
+ err = 0;
+ }
+ spin_unlock(mlx4_tlock(dev));
+
+ return err;
+}
+
+static void put_res(struct mlx4_dev *dev, int slave, u64 res_id,
+ enum mlx4_resource type)
+{
+ struct res_common *r;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = find_res(dev, res_id, type);
+ if (r)
+ r->state = r->from_state;
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static struct res_common *alloc_qp_tr(int id)
+{
+ struct res_qp *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_QP_RESERVED;
+ ret->local_qpn = id;
+ INIT_LIST_HEAD(&ret->mcg_list);
+ spin_lock_init(&ret->mcg_spl);
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_mtt_tr(int id, int order)
+{
+ struct res_mtt *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->order = order;
+ ret->com.state = RES_MTT_ALLOCATED;
+ atomic_set(&ret->ref_count, 0);
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_mpt_tr(int id, int key)
+{
+ struct res_mpt *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_MPT_RESERVED;
+ ret->key = key;
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_eq_tr(int id)
+{
+ struct res_eq *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_EQ_RESERVED;
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_cq_tr(int id)
+{
+ struct res_cq *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_CQ_ALLOCATED;
+ atomic_set(&ret->ref_count, 0);
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_srq_tr(int id)
+{
+ struct res_srq *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_SRQ_ALLOCATED;
+ atomic_set(&ret->ref_count, 0);
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_counter_tr(int id)
+{
+ struct res_counter *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_COUNTER_ALLOCATED;
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_xrcdn_tr(int id)
+{
+ struct res_xrcdn *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_XRCD_ALLOCATED;
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_fs_rule_tr(u64 id)
+{
+ struct res_fs_rule *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_FS_RULE_ALLOCATED;
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave,
+ int extra)
+{
+ struct res_common *ret;
+
+ switch (type) {
+ case RES_QP:
+ ret = alloc_qp_tr(id);
+ break;
+ case RES_MPT:
+ ret = alloc_mpt_tr(id, extra);
+ break;
+ case RES_MTT:
+ ret = alloc_mtt_tr(id, extra);
+ break;
+ case RES_EQ:
+ ret = alloc_eq_tr(id);
+ break;
+ case RES_CQ:
+ ret = alloc_cq_tr(id);
+ break;
+ case RES_SRQ:
+ ret = alloc_srq_tr(id);
+ break;
+ case RES_MAC:
+ printk(KERN_ERR "implementation missing\n");
+ return NULL;
+ case RES_COUNTER:
+ ret = alloc_counter_tr(id);
+ break;
+ case RES_XRCD:
+ ret = alloc_xrcdn_tr(id);
+ break;
+ case RES_FS_RULE:
+ ret = alloc_fs_rule_tr(id);
+ break;
+ default:
+ return NULL;
+ }
+ if (ret)
+ ret->owner = slave;
+
+ return ret;
+}
+
+static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count,
+ enum mlx4_resource type, int extra)
+{
+ int i;
+ int err;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct res_common **res_arr;
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct rb_root *root = &tracker->res_tree[type];
+
+ res_arr = kzalloc(count * sizeof *res_arr, GFP_KERNEL);
+ if (!res_arr)
+ return -ENOMEM;
+
+ for (i = 0; i < count; ++i) {
+ res_arr[i] = alloc_tr(base + i, type, slave, extra);
+ if (!res_arr[i]) {
+ for (--i; i >= 0; --i)
+ kfree(res_arr[i]);
+
+ kfree(res_arr);
+ return -ENOMEM;
+ }
+ }
+
+ spin_lock_irq(mlx4_tlock(dev));
+ for (i = 0; i < count; ++i) {
+ if (find_res(dev, base + i, type)) {
+ err = -EEXIST;
+ goto undo;
+ }
+ err = res_tracker_insert(root, res_arr[i]);
+ if (err)
+ goto undo;
+ list_add_tail(&res_arr[i]->list,
+ &tracker->slave_list[slave].res_list[type]);
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+ kfree(res_arr);
+
+ return 0;
+
+undo:
+ for (--i; i >= base; --i)
+ rb_erase(&res_arr[i]->node, root);
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ for (i = 0; i < count; ++i)
+ kfree(res_arr[i]);
+
+ kfree(res_arr);
+
+ return err;
+}
+
+static int remove_qp_ok(struct res_qp *res)
+{
+ if (res->com.state == RES_QP_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_QP_RESERVED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_mtt_ok(struct res_mtt *res, int order)
+{
+ if (res->com.state == RES_MTT_BUSY ||
+ atomic_read(&res->ref_count)) {
+ printk(KERN_DEBUG "%s-%d: state %s, ref_count %d\n",
+ __func__, __LINE__,
+ mtt_states_str(res->com.state),
+ atomic_read(&res->ref_count));
+ return -EBUSY;
+ } else if (res->com.state != RES_MTT_ALLOCATED)
+ return -EPERM;
+ else if (res->order != order)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int remove_mpt_ok(struct res_mpt *res)
+{
+ if (res->com.state == RES_MPT_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_MPT_RESERVED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_eq_ok(struct res_eq *res)
+{
+ if (res->com.state == RES_MPT_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_MPT_RESERVED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_counter_ok(struct res_counter *res)
+{
+ if (res->com.state == RES_COUNTER_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_COUNTER_ALLOCATED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_xrcdn_ok(struct res_xrcdn *res)
+{
+ if (res->com.state == RES_XRCD_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_XRCD_ALLOCATED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_fs_rule_ok(struct res_fs_rule *res)
+{
+ if (res->com.state == RES_FS_RULE_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_FS_RULE_ALLOCATED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_cq_ok(struct res_cq *res)
+{
+ if (res->com.state == RES_CQ_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_CQ_ALLOCATED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_srq_ok(struct res_srq *res)
+{
+ if (res->com.state == RES_SRQ_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_SRQ_ALLOCATED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_ok(struct res_common *res, enum mlx4_resource type, int extra)
+{
+ switch (type) {
+ case RES_QP:
+ return remove_qp_ok((struct res_qp *)res);
+ case RES_CQ:
+ return remove_cq_ok((struct res_cq *)res);
+ case RES_SRQ:
+ return remove_srq_ok((struct res_srq *)res);
+ case RES_MPT:
+ return remove_mpt_ok((struct res_mpt *)res);
+ case RES_MTT:
+ return remove_mtt_ok((struct res_mtt *)res, extra);
+ case RES_MAC:
+ return -ENOSYS;
+ case RES_EQ:
+ return remove_eq_ok((struct res_eq *)res);
+ case RES_COUNTER:
+ return remove_counter_ok((struct res_counter *)res);
+ case RES_XRCD:
+ return remove_xrcdn_ok((struct res_xrcdn *)res);
+ case RES_FS_RULE:
+ return remove_fs_rule_ok((struct res_fs_rule *)res);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int rem_res_range(struct mlx4_dev *dev, int slave, u64 base, int count,
+ enum mlx4_resource type, int extra)
+{
+ u64 i;
+ int err;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_common *r;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ for (i = base; i < base + count; ++i) {
+ r = res_tracker_lookup(&tracker->res_tree[type], i);
+ if (!r) {
+ err = -ENOENT;
+ goto out;
+ }
+ if (r->owner != slave) {
+ err = -EPERM;
+ goto out;
+ }
+ err = remove_ok(r, type, extra);
+ if (err)
+ goto out;
+ }
+
+ for (i = base; i < base + count; ++i) {
+ r = res_tracker_lookup(&tracker->res_tree[type], i);
+ rb_erase(&r->node, &tracker->res_tree[type]);
+ list_del(&r->list);
+ kfree(r);
+ }
+ err = 0;
+
+out:
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn,
+ enum res_qp_states state, struct res_qp **qp,
+ int alloc)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_qp *r;
+ int err = 0;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[RES_QP], qpn);
+ if (!r)
+ err = -ENOENT;
+ else if (r->com.owner != slave)
+ err = -EPERM;
+ else {
+ switch (state) {
+ case RES_QP_BUSY:
+ mlx4_dbg(dev, "%s: failed RES_QP, 0x%llx\n",
+ __func__, r->com.res_id);
+ err = -EBUSY;
+ break;
+
+ case RES_QP_RESERVED:
+ if (r->com.state == RES_QP_MAPPED && !alloc)
+ break;
+
+ mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", r->com.res_id);
+ err = -EINVAL;
+ break;
+
+ case RES_QP_MAPPED:
+ if ((r->com.state == RES_QP_RESERVED && alloc) ||
+ r->com.state == RES_QP_HW)
+ break;
+ else {
+ mlx4_dbg(dev, "failed RES_QP, 0x%llx\n",
+ r->com.res_id);
+ err = -EINVAL;
+ }
+
+ break;
+
+ case RES_QP_HW:
+ if (r->com.state != RES_QP_MAPPED)
+ err = -EINVAL;
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_QP_BUSY;
+ if (qp)
+ *qp = r;
+ }
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int mr_res_start_move_to(struct mlx4_dev *dev, int slave, int index,
+ enum res_mpt_states state, struct res_mpt **mpt)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_mpt *r;
+ int err = 0;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[RES_MPT], index);
+ if (!r)
+ err = -ENOENT;
+ else if (r->com.owner != slave)
+ err = -EPERM;
+ else {
+ switch (state) {
+ case RES_MPT_BUSY:
+ err = -EINVAL;
+ break;
+
+ case RES_MPT_RESERVED:
+ if (r->com.state != RES_MPT_MAPPED)
+ err = -EINVAL;
+ break;
+
+ case RES_MPT_MAPPED:
+ if (r->com.state != RES_MPT_RESERVED &&
+ r->com.state != RES_MPT_HW)
+ err = -EINVAL;
+ break;
+
+ case RES_MPT_HW:
+ if (r->com.state != RES_MPT_MAPPED)
+ err = -EINVAL;
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_MPT_BUSY;
+ if (mpt)
+ *mpt = r;
+ }
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int eq_res_start_move_to(struct mlx4_dev *dev, int slave, int index,
+ enum res_eq_states state, struct res_eq **eq)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_eq *r;
+ int err = 0;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[RES_EQ], index);
+ if (!r)
+ err = -ENOENT;
+ else if (r->com.owner != slave)
+ err = -EPERM;
+ else {
+ switch (state) {
+ case RES_EQ_BUSY:
+ err = -EINVAL;
+ break;
+
+ case RES_EQ_RESERVED:
+ if (r->com.state != RES_EQ_HW)
+ err = -EINVAL;
+ break;
+
+ case RES_EQ_HW:
+ if (r->com.state != RES_EQ_RESERVED)
+ err = -EINVAL;
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_EQ_BUSY;
+ if (eq)
+ *eq = r;
+ }
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int cq_res_start_move_to(struct mlx4_dev *dev, int slave, int cqn,
+ enum res_cq_states state, struct res_cq **cq)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_cq *r;
+ int err;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[RES_CQ], cqn);
+ if (!r)
+ err = -ENOENT;
+ else if (r->com.owner != slave)
+ err = -EPERM;
+ else {
+ switch (state) {
+ case RES_CQ_BUSY:
+ err = -EBUSY;
+ break;
+
+ case RES_CQ_ALLOCATED:
+ if (r->com.state != RES_CQ_HW)
+ err = -EINVAL;
+ else if (atomic_read(&r->ref_count))
+ err = -EBUSY;
+ else
+ err = 0;
+ break;
+
+ case RES_CQ_HW:
+ if (r->com.state != RES_CQ_ALLOCATED)
+ err = -EINVAL;
+ else
+ err = 0;
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_CQ_BUSY;
+ if (cq)
+ *cq = r;
+ }
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int srq_res_start_move_to(struct mlx4_dev *dev, int slave, int index,
+ enum res_srq_states state, struct res_srq **srq)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_srq *r;
+ int err = 0;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[RES_SRQ], index);
+ if (!r)
+ err = -ENOENT;
+ else if (r->com.owner != slave)
+ err = -EPERM;
+ else {
+ switch (state) {
+ case RES_SRQ_BUSY:
+ err = -EINVAL;
+ break;
+
+ case RES_SRQ_ALLOCATED:
+ if (r->com.state != RES_SRQ_HW)
+ err = -EINVAL;
+ else if (atomic_read(&r->ref_count))
+ err = -EBUSY;
+ break;
+
+ case RES_SRQ_HW:
+ if (r->com.state != RES_SRQ_ALLOCATED)
+ err = -EINVAL;
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_SRQ_BUSY;
+ if (srq)
+ *srq = r;
+ }
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static void res_abort_move(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource type, int id)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_common *r;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[type], id);
+ if (r && (r->owner == slave))
+ r->state = r->from_state;
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void res_end_move(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource type, int id)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_common *r;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[type], id);
+ if (r && (r->owner == slave))
+ r->state = r->to_state;
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static int valid_reserved(struct mlx4_dev *dev, int slave, int qpn)
+{
+ return mlx4_is_qp_reserved(dev, qpn) &&
+ (mlx4_is_master(dev) || mlx4_is_guest_proxy(dev, slave, qpn));
+}
+
+static int fw_reserved(struct mlx4_dev *dev, int qpn)
+{
+ return qpn < dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
+}
+
+static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int err;
+ int count;
+ int align;
+ int base;
+ int qpn;
+ u8 bf_qp;
+
+ switch (op) {
+ case RES_OP_RESERVE:
+ count = get_param_l(&in_param) & 0xffffff;
+ bf_qp = get_param_l(&in_param) >> 31;
+ align = get_param_h(&in_param);
+ err = mlx4_grant_resource(dev, slave, RES_QP, count, 0);
+ if (err)
+ return err;
+
+ err = __mlx4_qp_reserve_range(dev, count, align, &base, bf_qp);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_QP, count, 0);
+ return err;
+ }
+
+ err = add_res_range(dev, slave, base, count, RES_QP, 0);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_QP, count, 0);
+ __mlx4_qp_release_range(dev, base, count);
+ return err;
+ }
+ set_param_l(out_param, base);
+ break;
+ case RES_OP_MAP_ICM:
+ qpn = get_param_l(&in_param) & 0x7fffff;
+ if (valid_reserved(dev, slave, qpn)) {
+ err = add_res_range(dev, slave, qpn, 1, RES_QP, 0);
+ if (err)
+ return err;
+ }
+
+ err = qp_res_start_move_to(dev, slave, qpn, RES_QP_MAPPED,
+ NULL, 1);
+ if (err)
+ return err;
+
+ if (!fw_reserved(dev, qpn)) {
+ err = __mlx4_qp_alloc_icm(dev, qpn);
+ if (err) {
+ res_abort_move(dev, slave, RES_QP, qpn);
+ return err;
+ }
+ }
+
+ res_end_move(dev, slave, RES_QP, qpn);
+ break;
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+ return err;
+}
+
+static int mtt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int err = -EINVAL;
+ int base;
+ int order;
+
+ if (op != RES_OP_RESERVE_AND_MAP)
+ return err;
+
+ order = get_param_l(&in_param);
+
+ err = mlx4_grant_resource(dev, slave, RES_MTT, 1 << order, 0);
+ if (err)
+ return err;
+
+ base = __mlx4_alloc_mtt_range(dev, order);
+ if (base == -1) {
+ mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0);
+ return -ENOMEM;
+ }
+
+ err = add_res_range(dev, slave, base, 1, RES_MTT, order);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0);
+ __mlx4_free_mtt_range(dev, base, order);
+ } else
+ set_param_l(out_param, base);
+
+ return err;
+}
+
+static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int err = -EINVAL;
+ int index;
+ int id;
+ struct res_mpt *mpt;
+
+ switch (op) {
+ case RES_OP_RESERVE:
+ err = mlx4_grant_resource(dev, slave, RES_MPT, 1, 0);
+ if (err)
+ break;
+
+ index = __mlx4_mr_reserve(dev);
+ if (index == -1) {
+ mlx4_release_resource(dev, slave, RES_MPT, 1, 0);
+ break;
+ }
+ id = index & mpt_mask(dev);
+
+ err = add_res_range(dev, slave, id, 1, RES_MPT, index);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_MPT, 1, 0);
+ __mlx4_mr_release(dev, index);
+ break;
+ }
+ set_param_l(out_param, index);
+ break;
+ case RES_OP_MAP_ICM:
+ index = get_param_l(&in_param);
+ id = index & mpt_mask(dev);
+ err = mr_res_start_move_to(dev, slave, id,
+ RES_MPT_MAPPED, &mpt);
+ if (err)
+ return err;
+
+ err = __mlx4_mr_alloc_icm(dev, mpt->key);
+ if (err) {
+ res_abort_move(dev, slave, RES_MPT, id);
+ return err;
+ }
+
+ res_end_move(dev, slave, RES_MPT, id);
+ break;
+ }
+ return err;
+}
+
+static int cq_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int cqn;
+ int err;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ err = mlx4_grant_resource(dev, slave, RES_CQ, 1, 0);
+ if (err)
+ break;
+
+ err = __mlx4_cq_alloc_icm(dev, &cqn);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_CQ, 1, 0);
+ break;
+ }
+
+ err = add_res_range(dev, slave, cqn, 1, RES_CQ, 0);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_CQ, 1, 0);
+ __mlx4_cq_free_icm(dev, cqn);
+ break;
+ }
+
+ set_param_l(out_param, cqn);
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int srq_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int srqn;
+ int err;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ err = mlx4_grant_resource(dev, slave, RES_SRQ, 1, 0);
+ if (err)
+ break;
+
+ err = __mlx4_srq_alloc_icm(dev, &srqn);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_SRQ, 1, 0);
+ break;
+ }
+
+ err = add_res_range(dev, slave, srqn, 1, RES_SRQ, 0);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_SRQ, 1, 0);
+ __mlx4_srq_free_icm(dev, srqn);
+ break;
+ }
+
+ set_param_l(out_param, srqn);
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int mac_find_smac_ix_in_slave(struct mlx4_dev *dev, int slave, int port,
+ u8 smac_index, u64 *mac)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *mac_list =
+ &tracker->slave_list[slave].res_list[RES_MAC];
+ struct mac_res *res, *tmp;
+
+ list_for_each_entry_safe(res, tmp, mac_list, list) {
+ if (res->smac_index == smac_index && res->port == (u8) port) {
+ *mac = res->mac;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port, u8 smac_index)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *mac_list =
+ &tracker->slave_list[slave].res_list[RES_MAC];
+ struct mac_res *res, *tmp;
+
+ list_for_each_entry_safe(res, tmp, mac_list, list) {
+ if (res->mac == mac && res->port == (u8) port) {
+ /* mac found. update ref count */
+ ++res->ref_count;
+ return 0;
+ }
+ }
+
+ if (mlx4_grant_resource(dev, slave, RES_MAC, 1, port))
+ return -EINVAL;
+ res = kzalloc(sizeof *res, GFP_KERNEL);
+ if (!res) {
+ mlx4_release_resource(dev, slave, RES_MAC, 1, port);
+ return -ENOMEM;
+ }
+ res->mac = mac;
+ res->port = (u8) port;
+ res->smac_index = smac_index;
+ res->ref_count = 1;
+ list_add_tail(&res->list,
+ &tracker->slave_list[slave].res_list[RES_MAC]);
+ return 0;
+}
+
+
+static void mac_del_from_slave(struct mlx4_dev *dev, int slave, u64 mac,
+ int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *mac_list =
+ &tracker->slave_list[slave].res_list[RES_MAC];
+ struct mac_res *res, *tmp;
+
+ list_for_each_entry_safe(res, tmp, mac_list, list) {
+ if (res->mac == mac && res->port == (u8) port) {
+ if (!--res->ref_count) {
+ list_del(&res->list);
+ mlx4_release_resource(dev, slave, RES_MAC, 1, port);
+ kfree(res);
+ }
+ break;
+ }
+ }
+}
+
+static void rem_slave_macs(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *mac_list =
+ &tracker->slave_list[slave].res_list[RES_MAC];
+ struct mac_res *res, *tmp;
+ int i;
+
+ list_for_each_entry_safe(res, tmp, mac_list, list) {
+ list_del(&res->list);
+ /* dereference the mac the num times the slave referenced it */
+ for (i = 0; i < res->ref_count; i++)
+ __mlx4_unregister_mac(dev, res->port, res->mac);
+ mlx4_release_resource(dev, slave, RES_MAC, 1, res->port);
+ kfree(res);
+ }
+}
+
+static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param, int in_port)
+{
+ int err = -EINVAL;
+ int port;
+ u64 mac;
+ u8 smac_index = 0;
+
+ if (op != RES_OP_RESERVE_AND_MAP)
+ return err;
+
+ port = !in_port ? get_param_l(out_param) : in_port;
+ mac = in_param;
+
+ err = __mlx4_register_mac(dev, port, mac);
+ if (err >= 0) {
+ smac_index = err;
+ set_param_l(out_param, err);
+ err = 0;
+ }
+
+ if (!err) {
+ err = mac_add_to_slave(dev, slave, mac, port, smac_index);
+ if (err)
+ __mlx4_unregister_mac(dev, port, mac);
+ }
+ return err;
+}
+
+static int vlan_add_to_slave(struct mlx4_dev *dev, int slave, u16 vlan,
+ int port, int vlan_index)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *vlan_list =
+ &tracker->slave_list[slave].res_list[RES_VLAN];
+ struct vlan_res *res, *tmp;
+
+ list_for_each_entry_safe(res, tmp, vlan_list, list) {
+ if (res->vlan == vlan && res->port == (u8) port) {
+ /* vlan found. update ref count */
+ ++res->ref_count;
+ return 0;
+ }
+ }
+
+ if (mlx4_grant_resource(dev, slave, RES_VLAN, 1, port))
+ return -EINVAL;
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res) {
+ mlx4_release_resource(dev, slave, RES_VLAN, 1, port);
+ return -ENOMEM;
+ }
+ res->vlan = vlan;
+ res->port = (u8) port;
+ res->vlan_index = vlan_index;
+ res->ref_count = 1;
+ list_add_tail(&res->list,
+ &tracker->slave_list[slave].res_list[RES_VLAN]);
+ return 0;
+}
+
+
+static void vlan_del_from_slave(struct mlx4_dev *dev, int slave, u16 vlan,
+ int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *vlan_list =
+ &tracker->slave_list[slave].res_list[RES_VLAN];
+ struct vlan_res *res, *tmp;
+
+ list_for_each_entry_safe(res, tmp, vlan_list, list) {
+ if (res->vlan == vlan && res->port == (u8) port) {
+ if (!--res->ref_count) {
+ list_del(&res->list);
+ mlx4_release_resource(dev, slave, RES_VLAN,
+ 1, port);
+ kfree(res);
+ }
+ break;
+ }
+ }
+}
+
+static void rem_slave_vlans(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *vlan_list =
+ &tracker->slave_list[slave].res_list[RES_VLAN];
+ struct vlan_res *res, *tmp;
+ int i;
+
+ list_for_each_entry_safe(res, tmp, vlan_list, list) {
+ list_del(&res->list);
+ /* dereference the vlan the num times the slave referenced it */
+ for (i = 0; i < res->ref_count; i++)
+ __mlx4_unregister_vlan(dev, res->port, res->vlan);
+ mlx4_release_resource(dev, slave, RES_VLAN, 1, res->port);
+ kfree(res);
+ }
+}
+
+static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param, int port)
+{
+ int err = -EINVAL;
+ u16 vlan;
+ int vlan_index;
+
+ if (!port)
+ return err;
+
+ if (op != RES_OP_RESERVE_AND_MAP)
+ return err;
+
+ vlan = (u16) in_param;
+
+ err = __mlx4_register_vlan(dev, port, vlan, &vlan_index);
+ if (!err) {
+ set_param_l(out_param, (u32) vlan_index);
+ err = vlan_add_to_slave(dev, slave, vlan, port, vlan_index);
+ if (err)
+ __mlx4_unregister_vlan(dev, port, vlan);
+ }
+ return err;
+}
+
+static int counter_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ u32 index;
+ int err;
+
+ if (op != RES_OP_RESERVE)
+ return -EINVAL;
+
+ err = mlx4_grant_resource(dev, slave, RES_COUNTER, 1, 0);
+ if (err)
+ return err;
+
+ err = __mlx4_counter_alloc(dev, &index);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0);
+ return err;
+ }
+
+ err = add_res_range(dev, slave, index, 1, RES_COUNTER, 0);
+ if (err) {
+ __mlx4_counter_free(dev, index);
+ mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0);
+ } else {
+ set_param_l(out_param, index);
+ }
+
+ return err;
+}
+
+static int xrcdn_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ u32 xrcdn;
+ int err;
+
+ if (op != RES_OP_RESERVE)
+ return -EINVAL;
+
+ err = __mlx4_xrcd_alloc(dev, &xrcdn);
+ if (err)
+ return err;
+
+ err = add_res_range(dev, slave, xrcdn, 1, RES_XRCD, 0);
+ if (err)
+ __mlx4_xrcd_free(dev, xrcdn);
+ else
+ set_param_l(out_param, xrcdn);
+
+ return err;
+}
+
+int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int alop = vhcr->op_modifier;
+
+ switch (vhcr->in_modifier & 0xFF) {
+ case RES_QP:
+ err = qp_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_MTT:
+ err = mtt_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_MPT:
+ err = mpt_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_CQ:
+ err = cq_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_SRQ:
+ err = srq_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_MAC:
+ err = mac_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param,
+ (vhcr->in_modifier >> 8) & 0xFF);
+ break;
+
+ case RES_VLAN:
+ err = vlan_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param,
+ (vhcr->in_modifier >> 8) & 0xFF);
+ break;
+
+ case RES_COUNTER:
+ err = counter_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_XRCD:
+ err = xrcdn_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static int qp_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param)
+{
+ int err;
+ int count;
+ int base;
+ int qpn;
+
+ switch (op) {
+ case RES_OP_RESERVE:
+ base = get_param_l(&in_param) & 0x7fffff;
+ count = get_param_h(&in_param);
+ err = rem_res_range(dev, slave, base, count, RES_QP, 0);
+ if (err)
+ break;
+ mlx4_release_resource(dev, slave, RES_QP, count, 0);
+ __mlx4_qp_release_range(dev, base, count);
+ break;
+ case RES_OP_MAP_ICM:
+ qpn = get_param_l(&in_param) & 0x7fffff;
+ err = qp_res_start_move_to(dev, slave, qpn, RES_QP_RESERVED,
+ NULL, 0);
+ if (err)
+ return err;
+
+ if (!fw_reserved(dev, qpn))
+ __mlx4_qp_free_icm(dev, qpn);
+
+ res_end_move(dev, slave, RES_QP, qpn);
+
+ if (valid_reserved(dev, slave, qpn))
+ err = rem_res_range(dev, slave, qpn, 1, RES_QP, 0);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+ return err;
+}
+
+static int mtt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int err = -EINVAL;
+ int base;
+ int order;
+
+ if (op != RES_OP_RESERVE_AND_MAP)
+ return err;
+
+ base = get_param_l(&in_param);
+ order = get_param_h(&in_param);
+ err = rem_res_range(dev, slave, base, 1, RES_MTT, order);
+ if (!err) {
+ mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0);
+ __mlx4_free_mtt_range(dev, base, order);
+ }
+ return err;
+}
+
+static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param)
+{
+ int err = -EINVAL;
+ int index;
+ int id;
+ struct res_mpt *mpt;
+
+ switch (op) {
+ case RES_OP_RESERVE:
+ index = get_param_l(&in_param);
+ id = index & mpt_mask(dev);
+ err = get_res(dev, slave, id, RES_MPT, &mpt);
+ if (err)
+ break;
+ index = mpt->key;
+ put_res(dev, slave, id, RES_MPT);
+
+ err = rem_res_range(dev, slave, id, 1, RES_MPT, 0);
+ if (err)
+ break;
+ mlx4_release_resource(dev, slave, RES_MPT, 1, 0);
+ __mlx4_mr_release(dev, index);
+ break;
+ case RES_OP_MAP_ICM:
+ index = get_param_l(&in_param);
+ id = index & mpt_mask(dev);
+ err = mr_res_start_move_to(dev, slave, id,
+ RES_MPT_RESERVED, &mpt);
+ if (err)
+ return err;
+
+ __mlx4_mr_free_icm(dev, mpt->key);
+ res_end_move(dev, slave, RES_MPT, id);
+ return err;
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+ return err;
+}
+
+static int cq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int cqn;
+ int err;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ cqn = get_param_l(&in_param);
+ err = rem_res_range(dev, slave, cqn, 1, RES_CQ, 0);
+ if (err)
+ break;
+
+ mlx4_release_resource(dev, slave, RES_CQ, 1, 0);
+ __mlx4_cq_free_icm(dev, cqn);
+ break;
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static int srq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int srqn;
+ int err;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ srqn = get_param_l(&in_param);
+ err = rem_res_range(dev, slave, srqn, 1, RES_SRQ, 0);
+ if (err)
+ break;
+
+ mlx4_release_resource(dev, slave, RES_SRQ, 1, 0);
+ __mlx4_srq_free_icm(dev, srqn);
+ break;
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param, int in_port)
+{
+ int port;
+ int err = 0;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ port = !in_port ? get_param_l(out_param) : in_port;
+ mac_del_from_slave(dev, slave, in_param, port);
+ __mlx4_unregister_mac(dev, port, in_param);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+
+}
+
+static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param, int port)
+{
+ int err = 0;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ if (!port)
+ return -EINVAL;
+ vlan_del_from_slave(dev, slave, in_param, port);
+ __mlx4_unregister_vlan(dev, port, in_param);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static int counter_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int index;
+ int err;
+
+ if (op != RES_OP_RESERVE)
+ return -EINVAL;
+
+ index = get_param_l(&in_param);
+ err = rem_res_range(dev, slave, index, 1, RES_COUNTER, 0);
+ if (err)
+ return err;
+
+ __mlx4_counter_free(dev, index);
+ mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0);
+
+ return err;
+}
+
+static int xrcdn_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int xrcdn;
+ int err;
+
+ if (op != RES_OP_RESERVE)
+ return -EINVAL;
+
+ xrcdn = get_param_l(&in_param);
+ err = rem_res_range(dev, slave, xrcdn, 1, RES_XRCD, 0);
+ if (err)
+ return err;
+
+ __mlx4_xrcd_free(dev, xrcdn);
+
+ return err;
+}
+
+int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err = -EINVAL;
+ int alop = vhcr->op_modifier;
+
+ switch (vhcr->in_modifier & 0xFF) {
+ case RES_QP:
+ err = qp_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param);
+ break;
+
+ case RES_MTT:
+ err = mtt_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_MPT:
+ err = mpt_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param);
+ break;
+
+ case RES_CQ:
+ err = cq_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_SRQ:
+ err = srq_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_MAC:
+ err = mac_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param,
+ (vhcr->in_modifier >> 8) & 0xFF);
+ break;
+
+ case RES_VLAN:
+ err = vlan_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param,
+ (vhcr->in_modifier >> 8) & 0xFF);
+ break;
+
+ case RES_COUNTER:
+ err = counter_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_XRCD:
+ err = xrcdn_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+
+ default:
+ break;
+ }
+ return err;
+}
+
+/* ugly but other choices are uglier */
+static int mr_phys_mpt(struct mlx4_mpt_entry *mpt)
+{
+ return (be32_to_cpu(mpt->flags) >> 9) & 1;
+}
+
+static int mr_get_mtt_addr(struct mlx4_mpt_entry *mpt)
+{
+ return (int)be64_to_cpu(mpt->mtt_addr) & 0xfffffff8;
+}
+
+static int mr_get_mtt_size(struct mlx4_mpt_entry *mpt)
+{
+ return be32_to_cpu(mpt->mtt_sz);
+}
+
+static int qp_get_mtt_addr(struct mlx4_qp_context *qpc)
+{
+ return be32_to_cpu(qpc->mtt_base_addr_l) & 0xfffffff8;
+}
+
+static int srq_get_mtt_addr(struct mlx4_srq_context *srqc)
+{
+ return be32_to_cpu(srqc->mtt_base_addr_l) & 0xfffffff8;
+}
+
+static int qp_get_mtt_size(struct mlx4_qp_context *qpc)
+{
+ int page_shift = (qpc->log_page_size & 0x3f) + 12;
+ int log_sq_size = (qpc->sq_size_stride >> 3) & 0xf;
+ int log_sq_sride = qpc->sq_size_stride & 7;
+ int log_rq_size = (qpc->rq_size_stride >> 3) & 0xf;
+ int log_rq_stride = qpc->rq_size_stride & 7;
+ int srq = (be32_to_cpu(qpc->srqn) >> 24) & 1;
+ int rss = (be32_to_cpu(qpc->flags) >> 13) & 1;
+ int xrc = (be32_to_cpu(qpc->local_qpn) >> 23) & 1;
+ int sq_size;
+ int rq_size;
+ int total_pages;
+ int total_mem;
+ int page_offset = (be32_to_cpu(qpc->params2) >> 6) & 0x3f;
+
+ sq_size = 1 << (log_sq_size + log_sq_sride + 4);
+ rq_size = (srq|rss|xrc) ? 0 : (1 << (log_rq_size + log_rq_stride + 4));
+ total_mem = sq_size + rq_size;
+ total_pages =
+ roundup_pow_of_two((total_mem + (page_offset << 6)) >>
+ page_shift);
+
+ return total_pages;
+}
+
+static int check_mtt_range(struct mlx4_dev *dev, int slave, int start,
+ int size, struct res_mtt *mtt)
+{
+ int res_start = mtt->com.res_id;
+ int res_size = (1 << mtt->order);
+
+ if (start < res_start || start + size > res_start + res_size)
+ return -EPERM;
+ return 0;
+}
+
+int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int index = vhcr->in_modifier;
+ struct res_mtt *mtt;
+ struct res_mpt *mpt;
+ int mtt_base = mr_get_mtt_addr(inbox->buf) / dev->caps.mtt_entry_sz;
+ int phys;
+ int id;
+
+ id = index & mpt_mask(dev);
+ err = mr_res_start_move_to(dev, slave, id, RES_MPT_HW, &mpt);
+ if (err)
+ return err;
+
+ phys = mr_phys_mpt(inbox->buf);
+ if (!phys) {
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto ex_abort;
+
+ err = check_mtt_range(dev, slave, mtt_base,
+ mr_get_mtt_size(inbox->buf), mtt);
+ if (err)
+ goto ex_put;
+
+ mpt->mtt = mtt;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_put;
+
+ if (!phys) {
+ atomic_inc(&mtt->ref_count);
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ }
+
+ res_end_move(dev, slave, RES_MPT, id);
+ return 0;
+
+ex_put:
+ if (!phys)
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ex_abort:
+ res_abort_move(dev, slave, RES_MPT, id);
+
+ return err;
+}
+
+int mlx4_HW2SW_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int index = vhcr->in_modifier;
+ struct res_mpt *mpt;
+ int id;
+
+ id = index & mpt_mask(dev);
+ err = mr_res_start_move_to(dev, slave, id, RES_MPT_MAPPED, &mpt);
+ if (err)
+ return err;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_abort;
+
+ if (mpt->mtt)
+ atomic_dec(&mpt->mtt->ref_count);
+
+ res_end_move(dev, slave, RES_MPT, id);
+ return 0;
+
+ex_abort:
+ res_abort_move(dev, slave, RES_MPT, id);
+
+ return err;
+}
+
+int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int index = vhcr->in_modifier;
+ struct res_mpt *mpt;
+ int id;
+
+ id = index & mpt_mask(dev);
+ err = get_res(dev, slave, id, RES_MPT, &mpt);
+ if (err)
+ return err;
+
+ if (mpt->com.from_state != RES_MPT_HW) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+
+out:
+ put_res(dev, slave, id, RES_MPT);
+ return err;
+}
+
+static int qp_get_rcqn(struct mlx4_qp_context *qpc)
+{
+ return be32_to_cpu(qpc->cqn_recv) & 0xffffff;
+}
+
+static int qp_get_scqn(struct mlx4_qp_context *qpc)
+{
+ return be32_to_cpu(qpc->cqn_send) & 0xffffff;
+}
+
+static u32 qp_get_srqn(struct mlx4_qp_context *qpc)
+{
+ return be32_to_cpu(qpc->srqn) & 0x1ffffff;
+}
+
+static void adjust_proxy_tun_qkey(struct mlx4_dev *dev, struct mlx4_vhcr *vhcr,
+ struct mlx4_qp_context *context)
+{
+ u32 qpn = vhcr->in_modifier & 0xffffff;
+ u32 qkey = 0;
+
+ if (mlx4_get_parav_qkey(dev, qpn, &qkey))
+ return;
+
+ /* adjust qkey in qp context */
+ context->qkey = cpu_to_be32(qkey);
+}
+
+int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int qpn = vhcr->in_modifier & 0x7fffff;
+ struct res_mtt *mtt;
+ struct res_qp *qp;
+ struct mlx4_qp_context *qpc = inbox->buf + 8;
+ int mtt_base = qp_get_mtt_addr(qpc) / dev->caps.mtt_entry_sz;
+ int mtt_size = qp_get_mtt_size(qpc);
+ struct res_cq *rcq;
+ struct res_cq *scq;
+ int rcqn = qp_get_rcqn(qpc);
+ int scqn = qp_get_scqn(qpc);
+ u32 srqn = qp_get_srqn(qpc) & 0xffffff;
+ int use_srq = (qp_get_srqn(qpc) >> 24) & 1;
+ struct res_srq *srq;
+ int local_qpn = be32_to_cpu(qpc->local_qpn) & 0xffffff;
+
+ err = qp_res_start_move_to(dev, slave, qpn, RES_QP_HW, &qp, 0);
+ if (err)
+ return err;
+ qp->local_qpn = local_qpn;
+
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto ex_abort;
+
+ err = check_mtt_range(dev, slave, mtt_base, mtt_size, mtt);
+ if (err)
+ goto ex_put_mtt;
+
+ err = get_res(dev, slave, rcqn, RES_CQ, &rcq);
+ if (err)
+ goto ex_put_mtt;
+
+ if (scqn != rcqn) {
+ err = get_res(dev, slave, scqn, RES_CQ, &scq);
+ if (err)
+ goto ex_put_rcq;
+ } else
+ scq = rcq;
+
+ if (use_srq) {
+ err = get_res(dev, slave, srqn, RES_SRQ, &srq);
+ if (err)
+ goto ex_put_scq;
+ }
+
+ adjust_proxy_tun_qkey(dev, vhcr, qpc);
+ update_pkey_index(dev, slave, inbox);
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_put_srq;
+ atomic_inc(&mtt->ref_count);
+ qp->mtt = mtt;
+ atomic_inc(&rcq->ref_count);
+ qp->rcq = rcq;
+ atomic_inc(&scq->ref_count);
+ qp->scq = scq;
+
+ if (scqn != rcqn)
+ put_res(dev, slave, scqn, RES_CQ);
+
+ if (use_srq) {
+ atomic_inc(&srq->ref_count);
+ put_res(dev, slave, srqn, RES_SRQ);
+ qp->srq = srq;
+ }
+ put_res(dev, slave, rcqn, RES_CQ);
+ put_res(dev, slave, mtt_base, RES_MTT);
+ res_end_move(dev, slave, RES_QP, qpn);
+
+ return 0;
+
+ex_put_srq:
+ if (use_srq)
+ put_res(dev, slave, srqn, RES_SRQ);
+ex_put_scq:
+ if (scqn != rcqn)
+ put_res(dev, slave, scqn, RES_CQ);
+ex_put_rcq:
+ put_res(dev, slave, rcqn, RES_CQ);
+ex_put_mtt:
+ put_res(dev, slave, mtt_base, RES_MTT);
+ex_abort:
+ res_abort_move(dev, slave, RES_QP, qpn);
+
+ return err;
+}
+
+static int eq_get_mtt_addr(struct mlx4_eq_context *eqc)
+{
+ return be32_to_cpu(eqc->mtt_base_addr_l) & 0xfffffff8;
+}
+
+static int eq_get_mtt_size(struct mlx4_eq_context *eqc)
+{
+ int log_eq_size = eqc->log_eq_size & 0x1f;
+ int page_shift = (eqc->log_page_size & 0x3f) + 12;
+
+ if (log_eq_size + 5 < page_shift)
+ return 1;
+
+ return 1 << (log_eq_size + 5 - page_shift);
+}
+
+static int cq_get_mtt_addr(struct mlx4_cq_context *cqc)
+{
+ return be32_to_cpu(cqc->mtt_base_addr_l) & 0xfffffff8;
+}
+
+static int cq_get_mtt_size(struct mlx4_cq_context *cqc)
+{
+ int log_cq_size = (be32_to_cpu(cqc->logsize_usrpage) >> 24) & 0x1f;
+ int page_shift = (cqc->log_page_size & 0x3f) + 12;
+
+ if (log_cq_size + 5 < page_shift)
+ return 1;
+
+ return 1 << (log_cq_size + 5 - page_shift);
+}
+
+int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int eqn = vhcr->in_modifier;
+ int res_id = (slave << 8) | eqn;
+ struct mlx4_eq_context *eqc = inbox->buf;
+ int mtt_base = eq_get_mtt_addr(eqc) / dev->caps.mtt_entry_sz;
+ int mtt_size = eq_get_mtt_size(eqc);
+ struct res_eq *eq;
+ struct res_mtt *mtt;
+
+ err = add_res_range(dev, slave, res_id, 1, RES_EQ, 0);
+ if (err)
+ return err;
+ err = eq_res_start_move_to(dev, slave, res_id, RES_EQ_HW, &eq);
+ if (err)
+ goto out_add;
+
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto out_move;
+
+ err = check_mtt_range(dev, slave, mtt_base, mtt_size, mtt);
+ if (err)
+ goto out_put;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto out_put;
+
+ atomic_inc(&mtt->ref_count);
+ eq->mtt = mtt;
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ res_end_move(dev, slave, RES_EQ, res_id);
+ return 0;
+
+out_put:
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+out_move:
+ res_abort_move(dev, slave, RES_EQ, res_id);
+out_add:
+ rem_res_range(dev, slave, res_id, 1, RES_EQ, 0);
+ return err;
+}
+
+static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start,
+ int len, struct res_mtt **res)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_mtt *mtt;
+ int err = -EINVAL;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry(mtt, &tracker->slave_list[slave].res_list[RES_MTT],
+ com.list) {
+ if (!check_mtt_range(dev, slave, start, len, mtt)) {
+ *res = mtt;
+ mtt->com.from_state = mtt->com.state;
+ mtt->com.state = RES_MTT_BUSY;
+ err = 0;
+ break;
+ }
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int verify_qp_parameters(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *inbox,
+ enum qp_transition transition, u8 slave)
+{
+ u32 qp_type;
+ struct mlx4_qp_context *qp_ctx;
+ enum mlx4_qp_optpar optpar;
+ int port;
+ int num_gids;
+
+ qp_ctx = inbox->buf + 8;
+ qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
+ optpar = be32_to_cpu(*(__be32 *) inbox->buf);
+
+ switch (qp_type) {
+ case MLX4_QP_ST_RC:
+ case MLX4_QP_ST_UC:
+ switch (transition) {
+ case QP_TRANS_INIT2RTR:
+ case QP_TRANS_RTR2RTS:
+ case QP_TRANS_RTS2RTS:
+ case QP_TRANS_SQD2SQD:
+ case QP_TRANS_SQD2RTS:
+ if (slave != mlx4_master_func_num(dev))
+ if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) {
+ port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+ if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB)
+ num_gids = mlx4_get_slave_num_gids(dev, slave);
+ else
+ num_gids = 1;
+ if (qp_ctx->pri_path.mgid_index >= num_gids)
+ return -EINVAL;
+ }
+ if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) {
+ port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1;
+ if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB)
+ num_gids = mlx4_get_slave_num_gids(dev, slave);
+ else
+ num_gids = 1;
+ if (qp_ctx->alt_path.mgid_index >= num_gids)
+ return -EINVAL;
+ }
+ break;
+ default:
+ break;
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_mtt mtt;
+ __be64 *page_list = inbox->buf;
+ u64 *pg_list = (u64 *)page_list;
+ int i;
+ struct res_mtt *rmtt = NULL;
+ int start = be64_to_cpu(page_list[0]);
+ int npages = vhcr->in_modifier;
+ int err;
+
+ err = get_containing_mtt(dev, slave, start, npages, &rmtt);
+ if (err)
+ return err;
+
+ /* Call the SW implementation of write_mtt:
+ * - Prepare a dummy mtt struct
+ * - Translate inbox contents to simple addresses in host endianess */
+ mtt.offset = 0; /* TBD this is broken but I don't handle it since
+ we don't really use it */
+ mtt.order = 0;
+ mtt.page_shift = 0;
+ for (i = 0; i < npages; ++i)
+ pg_list[i + 2] = (be64_to_cpu(page_list[i + 2]) & ~1ULL);
+
+ err = __mlx4_write_mtt(dev, &mtt, be64_to_cpu(page_list[0]), npages,
+ ((u64 *)page_list + 2));
+
+ if (rmtt)
+ put_res(dev, slave, rmtt->com.res_id, RES_MTT);
+
+ return err;
+}
+
+int mlx4_HW2SW_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int eqn = vhcr->in_modifier;
+ int res_id = eqn | (slave << 8);
+ struct res_eq *eq;
+ int err;
+
+ err = eq_res_start_move_to(dev, slave, res_id, RES_EQ_RESERVED, &eq);
+ if (err)
+ return err;
+
+ err = get_res(dev, slave, eq->mtt->com.res_id, RES_MTT, NULL);
+ if (err)
+ goto ex_abort;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_put;
+
+ atomic_dec(&eq->mtt->ref_count);
+ put_res(dev, slave, eq->mtt->com.res_id, RES_MTT);
+ res_end_move(dev, slave, RES_EQ, res_id);
+ rem_res_range(dev, slave, res_id, 1, RES_EQ, 0);
+
+ return 0;
+
+ex_put:
+ put_res(dev, slave, eq->mtt->com.res_id, RES_MTT);
+ex_abort:
+ res_abort_move(dev, slave, RES_EQ, res_id);
+
+ return err;
+}
+
+int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_event_eq_info *event_eq;
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 in_modifier = 0;
+ int err;
+ int res_id;
+ struct res_eq *req;
+
+ if (!priv->mfunc.master.slave_state)
+ return -EINVAL;
+
+ event_eq = &priv->mfunc.master.slave_state[slave].event_eq[eqe->type];
+
+ /* Create the event only if the slave is registered */
+ if (event_eq->eqn < 0)
+ return 0;
+
+ mutex_lock(&priv->mfunc.master.gen_eqe_mutex[slave]);
+ res_id = (slave << 8) | event_eq->eqn;
+ err = get_res(dev, slave, res_id, RES_EQ, &req);
+ if (err)
+ goto unlock;
+
+ if (req->com.from_state != RES_EQ_HW) {
+ err = -EINVAL;
+ goto put;
+ }
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto put;
+ }
+
+ if (eqe->type == MLX4_EVENT_TYPE_CMD) {
+ ++event_eq->token;
+ eqe->event.cmd.token = cpu_to_be16(event_eq->token);
+ }
+
+ memcpy(mailbox->buf, (u8 *) eqe, 28);
+
+ in_modifier = (slave & 0xff) | ((event_eq->eqn & 0xff) << 16);
+
+ err = mlx4_cmd(dev, mailbox->dma, in_modifier, 0,
+ MLX4_CMD_GEN_EQE, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+
+ put_res(dev, slave, res_id, RES_EQ);
+ mutex_unlock(&priv->mfunc.master.gen_eqe_mutex[slave]);
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+
+put:
+ put_res(dev, slave, res_id, RES_EQ);
+
+unlock:
+ mutex_unlock(&priv->mfunc.master.gen_eqe_mutex[slave]);
+ return err;
+}
+
+int mlx4_QUERY_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int eqn = vhcr->in_modifier;
+ int res_id = eqn | (slave << 8);
+ struct res_eq *eq;
+ int err;
+
+ err = get_res(dev, slave, res_id, RES_EQ, &eq);
+ if (err)
+ return err;
+
+ if (eq->com.from_state != RES_EQ_HW) {
+ err = -EINVAL;
+ goto ex_put;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+
+ex_put:
+ put_res(dev, slave, res_id, RES_EQ);
+ return err;
+}
+
+int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int cqn = vhcr->in_modifier;
+ struct mlx4_cq_context *cqc = inbox->buf;
+ int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz;
+ struct res_cq *cq;
+ struct res_mtt *mtt;
+
+ err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_HW, &cq);
+ if (err)
+ return err;
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto out_move;
+ err = check_mtt_range(dev, slave, mtt_base, cq_get_mtt_size(cqc), mtt);
+ if (err)
+ goto out_put;
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto out_put;
+ atomic_inc(&mtt->ref_count);
+ cq->mtt = mtt;
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ res_end_move(dev, slave, RES_CQ, cqn);
+ return 0;
+
+out_put:
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+out_move:
+ res_abort_move(dev, slave, RES_CQ, cqn);
+ return err;
+}
+
+int mlx4_HW2SW_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int cqn = vhcr->in_modifier;
+ struct res_cq *cq;
+
+ err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_ALLOCATED, &cq);
+ if (err)
+ return err;
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto out_move;
+ atomic_dec(&cq->mtt->ref_count);
+ res_end_move(dev, slave, RES_CQ, cqn);
+ return 0;
+
+out_move:
+ res_abort_move(dev, slave, RES_CQ, cqn);
+ return err;
+}
+
+int mlx4_QUERY_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int cqn = vhcr->in_modifier;
+ struct res_cq *cq;
+ int err;
+
+ err = get_res(dev, slave, cqn, RES_CQ, &cq);
+ if (err)
+ return err;
+
+ if (cq->com.from_state != RES_CQ_HW)
+ goto ex_put;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ex_put:
+ put_res(dev, slave, cqn, RES_CQ);
+
+ return err;
+}
+
+static int handle_resize(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd,
+ struct res_cq *cq)
+{
+ int err;
+ struct res_mtt *orig_mtt;
+ struct res_mtt *mtt;
+ struct mlx4_cq_context *cqc = inbox->buf;
+ int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz;
+
+ err = get_res(dev, slave, cq->mtt->com.res_id, RES_MTT, &orig_mtt);
+ if (err)
+ return err;
+
+ if (orig_mtt != cq->mtt) {
+ err = -EINVAL;
+ goto ex_put;
+ }
+
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto ex_put;
+
+ err = check_mtt_range(dev, slave, mtt_base, cq_get_mtt_size(cqc), mtt);
+ if (err)
+ goto ex_put1;
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_put1;
+ atomic_dec(&orig_mtt->ref_count);
+ put_res(dev, slave, orig_mtt->com.res_id, RES_MTT);
+ atomic_inc(&mtt->ref_count);
+ cq->mtt = mtt;
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ return 0;
+
+ex_put1:
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ex_put:
+ put_res(dev, slave, orig_mtt->com.res_id, RES_MTT);
+
+ return err;
+
+}
+
+int mlx4_MODIFY_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int cqn = vhcr->in_modifier;
+ struct res_cq *cq;
+ int err;
+
+ err = get_res(dev, slave, cqn, RES_CQ, &cq);
+ if (err)
+ return err;
+
+ if (cq->com.from_state != RES_CQ_HW)
+ goto ex_put;
+
+ if (vhcr->op_modifier == 0) {
+ err = handle_resize(dev, slave, vhcr, inbox, outbox, cmd, cq);
+ goto ex_put;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ex_put:
+ put_res(dev, slave, cqn, RES_CQ);
+
+ return err;
+}
+
+static int srq_get_mtt_size(struct mlx4_srq_context *srqc)
+{
+ int log_srq_size = (be32_to_cpu(srqc->state_logsize_srqn) >> 24) & 0xf;
+ int log_rq_stride = srqc->logstride & 7;
+ int page_shift = (srqc->log_page_size & 0x3f) + 12;
+
+ if (log_srq_size + log_rq_stride + 4 < page_shift)
+ return 1;
+
+ return 1 << (log_srq_size + log_rq_stride + 4 - page_shift);
+}
+
+int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int srqn = vhcr->in_modifier;
+ struct res_mtt *mtt;
+ struct res_srq *srq;
+ struct mlx4_srq_context *srqc = inbox->buf;
+ int mtt_base = srq_get_mtt_addr(srqc) / dev->caps.mtt_entry_sz;
+
+ if (srqn != (be32_to_cpu(srqc->state_logsize_srqn) & 0xffffff))
+ return -EINVAL;
+
+ err = srq_res_start_move_to(dev, slave, srqn, RES_SRQ_HW, &srq);
+ if (err)
+ return err;
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto ex_abort;
+ err = check_mtt_range(dev, slave, mtt_base, srq_get_mtt_size(srqc),
+ mtt);
+ if (err)
+ goto ex_put_mtt;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_put_mtt;
+
+ atomic_inc(&mtt->ref_count);
+ srq->mtt = mtt;
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ res_end_move(dev, slave, RES_SRQ, srqn);
+ return 0;
+
+ex_put_mtt:
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ex_abort:
+ res_abort_move(dev, slave, RES_SRQ, srqn);
+
+ return err;
+}
+
+int mlx4_HW2SW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int srqn = vhcr->in_modifier;
+ struct res_srq *srq;
+
+ err = srq_res_start_move_to(dev, slave, srqn, RES_SRQ_ALLOCATED, &srq);
+ if (err)
+ return err;
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_abort;
+ atomic_dec(&srq->mtt->ref_count);
+ if (srq->cq)
+ atomic_dec(&srq->cq->ref_count);
+ res_end_move(dev, slave, RES_SRQ, srqn);
+
+ return 0;
+
+ex_abort:
+ res_abort_move(dev, slave, RES_SRQ, srqn);
+
+ return err;
+}
+
+int mlx4_QUERY_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int srqn = vhcr->in_modifier;
+ struct res_srq *srq;
+
+ err = get_res(dev, slave, srqn, RES_SRQ, &srq);
+ if (err)
+ return err;
+ if (srq->com.from_state != RES_SRQ_HW) {
+ err = -EBUSY;
+ goto out;
+ }
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+out:
+ put_res(dev, slave, srqn, RES_SRQ);
+ return err;
+}
+
+int mlx4_ARM_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int srqn = vhcr->in_modifier;
+ struct res_srq *srq;
+
+ err = get_res(dev, slave, srqn, RES_SRQ, &srq);
+ if (err)
+ return err;
+
+ if (srq->com.from_state != RES_SRQ_HW) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+out:
+ put_res(dev, slave, srqn, RES_SRQ);
+ return err;
+}
+
+int mlx4_GEN_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int qpn = vhcr->in_modifier & 0x7fffff;
+ struct res_qp *qp;
+
+ err = get_res(dev, slave, qpn, RES_QP, &qp);
+ if (err)
+ return err;
+ if (qp->com.from_state != RES_QP_HW) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+out:
+ put_res(dev, slave, qpn, RES_QP);
+ return err;
+}
+
+int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_qp_context *context = inbox->buf + 8;
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ update_pkey_index(dev, slave, inbox);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+static int roce_verify_mac(struct mlx4_dev *dev, int slave,
+ struct mlx4_qp_context *qpc,
+ struct mlx4_cmd_mailbox *inbox)
+{
+ u64 mac;
+ int port;
+ u32 ts = (be32_to_cpu(qpc->flags) >> 16) & 0xff;
+ u8 sched = *(u8 *)(inbox->buf + 64);
+ u8 smac_ix;
+
+ port = (sched >> 6 & 1) + 1;
+ if (mlx4_is_eth(dev, port) && (ts != MLX4_QP_ST_MLX)) {
+ smac_ix = qpc->pri_path.grh_mylmc & 0x7f;
+ if (mac_find_smac_ix_in_slave(dev, slave, port, smac_ix, &mac))
+ return -ENOENT;
+ }
+ return 0;
+}
+
+int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct mlx4_qp_context *qpc = inbox->buf + 8;
+
+ err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave);
+ if (err)
+ return err;
+
+ if (roce_verify_mac(dev, slave, qpc, inbox))
+ return -EINVAL;
+
+ update_pkey_index(dev, slave, inbox);
+ update_gid(dev, inbox, (u8)slave);
+ adjust_proxy_tun_qkey(dev, vhcr, qpc);
+ err = update_vport_qp_param(dev, inbox, slave);
+ if (err)
+ return err;
+
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct mlx4_qp_context *context = inbox->buf + 8;
+
+ err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave);
+ if (err)
+ return err;
+
+ update_pkey_index(dev, slave, inbox);
+ update_gid(dev, inbox, (u8)slave);
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct mlx4_qp_context *context = inbox->buf + 8;
+
+ err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave);
+ if (err)
+ return err;
+
+ update_pkey_index(dev, slave, inbox);
+ update_gid(dev, inbox, (u8)slave);
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+
+int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_qp_context *context = inbox->buf + 8;
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct mlx4_qp_context *context = inbox->buf + 8;
+
+ err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave);
+ if (err)
+ return err;
+
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ update_gid(dev, inbox, (u8)slave);
+ update_pkey_index(dev, slave, inbox);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct mlx4_qp_context *context = inbox->buf + 8;
+
+ err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave);
+ if (err)
+ return err;
+
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ update_gid(dev, inbox, (u8)slave);
+ update_pkey_index(dev, slave, inbox);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int qpn = vhcr->in_modifier & 0x7fffff;
+ struct res_qp *qp;
+
+ err = qp_res_start_move_to(dev, slave, qpn, RES_QP_MAPPED, &qp, 0);
+ if (err)
+ return err;
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_abort;
+
+ atomic_dec(&qp->mtt->ref_count);
+ atomic_dec(&qp->rcq->ref_count);
+ atomic_dec(&qp->scq->ref_count);
+ if (qp->srq)
+ atomic_dec(&qp->srq->ref_count);
+ res_end_move(dev, slave, RES_QP, qpn);
+ return 0;
+
+ex_abort:
+ res_abort_move(dev, slave, RES_QP, qpn);
+
+ return err;
+}
+
+static struct res_gid *find_gid(struct mlx4_dev *dev, int slave,
+ struct res_qp *rqp, u8 *gid)
+{
+ struct res_gid *res;
+
+ list_for_each_entry(res, &rqp->mcg_list, list) {
+ if (!memcmp(res->gid, gid, 16))
+ return res;
+ }
+ return NULL;
+}
+
+static int add_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp,
+ u8 *gid, enum mlx4_protocol prot,
+ enum mlx4_steer_type steer)
+{
+ struct res_gid *res;
+ int err;
+
+ res = kzalloc(sizeof *res, GFP_KERNEL);
+ if (!res)
+ return -ENOMEM;
+
+ spin_lock_irq(&rqp->mcg_spl);
+ if (find_gid(dev, slave, rqp, gid)) {
+ kfree(res);
+ err = -EEXIST;
+ } else {
+ memcpy(res->gid, gid, 16);
+ res->prot = prot;
+ res->steer = steer;
+ list_add_tail(&res->list, &rqp->mcg_list);
+ err = 0;
+ }
+ spin_unlock_irq(&rqp->mcg_spl);
+
+ return err;
+}
+
+static int rem_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp,
+ u8 *gid, enum mlx4_protocol prot,
+ enum mlx4_steer_type steer)
+{
+ struct res_gid *res;
+ int err;
+
+ spin_lock_irq(&rqp->mcg_spl);
+ res = find_gid(dev, slave, rqp, gid);
+ if (!res || res->prot != prot || res->steer != steer)
+ err = -EINVAL;
+ else {
+ list_del(&res->list);
+ kfree(res);
+ err = 0;
+ }
+ spin_unlock_irq(&rqp->mcg_spl);
+
+ return err;
+}
+
+int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_qp qp; /* dummy for calling attach/detach */
+ u8 *gid = inbox->buf;
+ enum mlx4_protocol prot = (vhcr->in_modifier >> 28) & 0x7;
+ int err;
+ int qpn;
+ struct res_qp *rqp;
+ int attach = vhcr->op_modifier;
+ int block_loopback = vhcr->in_modifier >> 31;
+ u8 steer_type_mask = 2;
+ enum mlx4_steer_type type = (gid[7] & steer_type_mask) >> 1;
+
+ qpn = vhcr->in_modifier & 0xffffff;
+ err = get_res(dev, slave, qpn, RES_QP, &rqp);
+ if (err)
+ return err;
+
+ qp.qpn = qpn;
+ if (attach) {
+ err = add_mcg_res(dev, slave, rqp, gid, prot, type);
+ if (err)
+ goto ex_put;
+
+ err = mlx4_qp_attach_common(dev, &qp, gid,
+ block_loopback, prot, type);
+ if (err)
+ goto ex_rem;
+ } else {
+ err = rem_mcg_res(dev, slave, rqp, gid, prot, type);
+ if (err)
+ goto ex_put;
+ err = mlx4_qp_detach_common(dev, &qp, gid, prot, type);
+ }
+
+ put_res(dev, slave, qpn, RES_QP);
+ return 0;
+
+ex_rem:
+ /* ignore error return below, already in error */
+ (void) rem_mcg_res(dev, slave, rqp, gid, prot, type);
+ex_put:
+ put_res(dev, slave, qpn, RES_QP);
+
+ return err;
+}
+
+/*
+ * MAC validation for Flow Steering rules.
+ * VF can attach rules only with a mac address which is assigned to it.
+ */
+
+static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header,
+ struct list_head *rlist)
+{
+ struct mac_res *res, *tmp;
+ __be64 be_mac;
+
+ /* make sure it isn't multicast or broadcast mac*/
+ if (!is_multicast_ether_addr(eth_header->eth.dst_mac) &&
+ !is_broadcast_ether_addr(eth_header->eth.dst_mac)) {
+ list_for_each_entry_safe(res, tmp, rlist, list) {
+ be_mac = cpu_to_be64(res->mac << 16);
+ if (!memcmp(&be_mac, eth_header->eth.dst_mac, ETH_ALEN))
+ return 0;
+ }
+ pr_err("MAC %pM doesn't belong to VF %d, Steering rule rejected\n",
+ eth_header->eth.dst_mac, slave);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * In case of missing eth header, append eth header with a MAC address
+ * assigned to the VF.
+ */
+static int add_eth_header(struct mlx4_dev *dev, int slave,
+ struct mlx4_cmd_mailbox *inbox,
+ struct list_head *rlist, int header_id)
+{
+ struct mac_res *res, *tmp;
+ u8 port;
+ struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+ struct mlx4_net_trans_rule_hw_eth *eth_header;
+ struct mlx4_net_trans_rule_hw_ipv4 *ip_header;
+ struct mlx4_net_trans_rule_hw_tcp_udp *l4_header;
+ __be64 be_mac = 0;
+ __be64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+ ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf;
+ port = ctrl->port;
+ eth_header = (struct mlx4_net_trans_rule_hw_eth *)(ctrl + 1);
+
+ /* Clear a space in the inbox for eth header */
+ switch (header_id) {
+ case MLX4_NET_TRANS_RULE_ID_IPV4:
+ ip_header =
+ (struct mlx4_net_trans_rule_hw_ipv4 *)(eth_header + 1);
+ memmove(ip_header, eth_header,
+ sizeof(*ip_header) + sizeof(*l4_header));
+ break;
+ case MLX4_NET_TRANS_RULE_ID_TCP:
+ case MLX4_NET_TRANS_RULE_ID_UDP:
+ l4_header = (struct mlx4_net_trans_rule_hw_tcp_udp *)
+ (eth_header + 1);
+ memmove(l4_header, eth_header, sizeof(*l4_header));
+ break;
+ default:
+ return -EINVAL;
+ }
+ list_for_each_entry_safe(res, tmp, rlist, list) {
+ if (port == res->port) {
+ be_mac = cpu_to_be64(res->mac << 16);
+ break;
+ }
+ }
+ if (!be_mac) {
+ pr_err("Failed adding eth header to FS rule, Can't find matching MAC for port %d .\n",
+ port);
+ return -EINVAL;
+ }
+
+ memset(eth_header, 0, sizeof(*eth_header));
+ eth_header->size = sizeof(*eth_header) >> 2;
+ eth_header->id = cpu_to_be16(__sw_id_hw[MLX4_NET_TRANS_RULE_ID_ETH]);
+ memcpy(eth_header->dst_mac, &be_mac, ETH_ALEN);
+ memcpy(eth_header->dst_mac_msk, &mac_msk, ETH_ALEN);
+
+ return 0;
+
+}
+
+int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *rlist = &tracker->slave_list[slave].res_list[RES_MAC];
+ int err;
+ struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+ struct _rule_hw *rule_header;
+ int header_id;
+
+ if (dev->caps.steering_mode !=
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return -EOPNOTSUPP;
+
+ ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf;
+ rule_header = (struct _rule_hw *)(ctrl + 1);
+ header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id));
+
+ switch (header_id) {
+ case MLX4_NET_TRANS_RULE_ID_ETH:
+ if (validate_eth_header_mac(slave, rule_header, rlist))
+ return -EINVAL;
+ break;
+ case MLX4_NET_TRANS_RULE_ID_IB:
+ break;
+ case MLX4_NET_TRANS_RULE_ID_IPV4:
+ case MLX4_NET_TRANS_RULE_ID_TCP:
+ case MLX4_NET_TRANS_RULE_ID_UDP:
+ pr_warn("Can't attach FS rule without L2 headers, adding L2 header.\n");
+ if (add_eth_header(dev, slave, inbox, rlist, header_id))
+ return -EINVAL;
+ vhcr->in_modifier +=
+ sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2;
+ break;
+ default:
+ pr_err("Corrupted mailbox.\n");
+ return -EINVAL;
+ }
+
+ err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param,
+ vhcr->in_modifier, 0,
+ MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+
+ err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, 0);
+ if (err) {
+ mlx4_err(dev, "Fail to add flow steering resources.\n ");
+ /* detach rule*/
+ mlx4_cmd(dev, vhcr->out_param, 0, 0,
+ MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ }
+ return err;
+}
+
+int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+
+ if (dev->caps.steering_mode !=
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return -EOPNOTSUPP;
+
+ err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, 0);
+ if (err) {
+ mlx4_err(dev, "Fail to remove flow steering resources.\n ");
+ return err;
+ }
+
+ err = mlx4_cmd(dev, vhcr->in_param, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ return err;
+}
+
+enum {
+ BUSY_MAX_RETRIES = 10
+};
+
+int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int index = vhcr->in_modifier & 0xffff;
+
+ err = get_res(dev, slave, index, RES_COUNTER, NULL);
+ if (err)
+ return err;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ put_res(dev, slave, index, RES_COUNTER);
+ return err;
+}
+
+static void detach_qp(struct mlx4_dev *dev, int slave, struct res_qp *rqp)
+{
+ struct res_gid *rgid;
+ struct res_gid *tmp;
+ struct mlx4_qp qp; /* dummy for calling attach/detach */
+
+ list_for_each_entry_safe(rgid, tmp, &rqp->mcg_list, list) {
+ qp.qpn = rqp->local_qpn;
+ (void) mlx4_qp_detach_common(dev, &qp, rgid->gid, rgid->prot,
+ rgid->steer);
+ list_del(&rgid->list);
+ kfree(rgid);
+ }
+}
+
+static int _move_all_busy(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource type, int print)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker =
+ &priv->mfunc.master.res_tracker;
+ struct list_head *rlist = &tracker->slave_list[slave].res_list[type];
+ struct res_common *r;
+ struct res_common *tmp;
+ int busy;
+
+ busy = 0;
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(r, tmp, rlist, list) {
+ if (r->owner == slave) {
+ if (!r->removing) {
+ if (r->state == RES_ANY_BUSY) {
+ if (print)
+ mlx4_dbg(dev,
+ "%s id 0x%llx is busy\n",
+ ResourceType(type),
+ r->res_id);
+ ++busy;
+ } else {
+ r->from_state = r->state;
+ r->state = RES_ANY_BUSY;
+ r->removing = 1;
+ }
+ }
+ }
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return busy;
+}
+
+static int move_all_busy(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource type)
+{
+ unsigned long begin;
+ int busy;
+
+ begin = jiffies;
+ do {
+ busy = _move_all_busy(dev, slave, type, 0);
+ if (time_after(jiffies, begin + 5 * HZ))
+ break;
+ if (busy)
+ cond_resched();
+ } while (busy);
+
+ if (busy)
+ busy = _move_all_busy(dev, slave, type, 1);
+
+ return busy;
+}
+static void rem_slave_qps(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *qp_list =
+ &tracker->slave_list[slave].res_list[RES_QP];
+ struct res_qp *qp;
+ struct res_qp *tmp;
+ int state;
+ u64 in_param;
+ int qpn;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_QP);
+ if (err)
+ mlx4_warn(dev, "rem_slave_qps: Could not move all qps to busy"
+ "for slave %d\n", slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(qp, tmp, qp_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (qp->com.owner == slave) {
+ qpn = qp->com.res_id;
+ detach_qp(dev, slave, qp);
+ state = qp->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_QP_RESERVED:
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&qp->com.node,
+ &tracker->res_tree[RES_QP]);
+ list_del(&qp->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ kfree(qp);
+ state = 0;
+ break;
+ case RES_QP_MAPPED:
+ if (!valid_reserved(dev, slave, qpn))
+ __mlx4_qp_free_icm(dev, qpn);
+ state = RES_QP_RESERVED;
+ break;
+ case RES_QP_HW:
+ in_param = slave;
+ err = mlx4_cmd(dev, in_param,
+ qp->local_qpn, 2,
+ MLX4_CMD_2RST_QP,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ mlx4_dbg(dev, "rem_slave_qps: failed"
+ " to move slave %d qpn %d to"
+ " reset\n", slave,
+ qp->local_qpn);
+ atomic_dec(&qp->rcq->ref_count);
+ atomic_dec(&qp->scq->ref_count);
+ atomic_dec(&qp->mtt->ref_count);
+ if (qp->srq)
+ atomic_dec(&qp->srq->ref_count);
+ state = RES_QP_MAPPED;
+ break;
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_srqs(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *srq_list =
+ &tracker->slave_list[slave].res_list[RES_SRQ];
+ struct res_srq *srq;
+ struct res_srq *tmp;
+ int state;
+ u64 in_param;
+ LIST_HEAD(tlist);
+ int srqn;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_SRQ);
+ if (err)
+ mlx4_warn(dev, "rem_slave_srqs: Could not move all srqs to "
+ "busy for slave %d\n", slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(srq, tmp, srq_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (srq->com.owner == slave) {
+ srqn = srq->com.res_id;
+ state = srq->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_SRQ_ALLOCATED:
+ __mlx4_srq_free_icm(dev, srqn);
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&srq->com.node,
+ &tracker->res_tree[RES_SRQ]);
+ list_del(&srq->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ kfree(srq);
+ state = 0;
+ break;
+
+ case RES_SRQ_HW:
+ in_param = slave;
+ err = mlx4_cmd(dev, in_param, srqn, 1,
+ MLX4_CMD_HW2SW_SRQ,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ mlx4_dbg(dev, "rem_slave_srqs: failed"
+ " to move slave %d srq %d to"
+ " SW ownership\n",
+ slave, srqn);
+
+ atomic_dec(&srq->mtt->ref_count);
+ if (srq->cq)
+ atomic_dec(&srq->cq->ref_count);
+ state = RES_SRQ_ALLOCATED;
+ break;
+
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_cqs(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *cq_list =
+ &tracker->slave_list[slave].res_list[RES_CQ];
+ struct res_cq *cq;
+ struct res_cq *tmp;
+ int state;
+ u64 in_param;
+ LIST_HEAD(tlist);
+ int cqn;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_CQ);
+ if (err)
+ mlx4_warn(dev, "rem_slave_cqs: Could not move all cqs to "
+ "busy for slave %d\n", slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(cq, tmp, cq_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (cq->com.owner == slave && !atomic_read(&cq->ref_count)) {
+ cqn = cq->com.res_id;
+ state = cq->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_CQ_ALLOCATED:
+ __mlx4_cq_free_icm(dev, cqn);
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&cq->com.node,
+ &tracker->res_tree[RES_CQ]);
+ list_del(&cq->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ kfree(cq);
+ state = 0;
+ break;
+
+ case RES_CQ_HW:
+ in_param = slave;
+ err = mlx4_cmd(dev, in_param, cqn, 1,
+ MLX4_CMD_HW2SW_CQ,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ mlx4_dbg(dev, "rem_slave_cqs: failed"
+ " to move slave %d cq %d to"
+ " SW ownership\n",
+ slave, cqn);
+ atomic_dec(&cq->mtt->ref_count);
+ state = RES_CQ_ALLOCATED;
+ break;
+
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_mrs(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *mpt_list =
+ &tracker->slave_list[slave].res_list[RES_MPT];
+ struct res_mpt *mpt;
+ struct res_mpt *tmp;
+ int state;
+ u64 in_param;
+ LIST_HEAD(tlist);
+ int mptn;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_MPT);
+ if (err)
+ mlx4_warn(dev, "rem_slave_mrs: Could not move all mpts to "
+ "busy for slave %d\n", slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(mpt, tmp, mpt_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (mpt->com.owner == slave) {
+ mptn = mpt->com.res_id;
+ state = mpt->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_MPT_RESERVED:
+ __mlx4_mr_release(dev, mpt->key);
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&mpt->com.node,
+ &tracker->res_tree[RES_MPT]);
+ list_del(&mpt->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ kfree(mpt);
+ state = 0;
+ break;
+
+ case RES_MPT_MAPPED:
+ __mlx4_mr_free_icm(dev, mpt->key);
+ state = RES_MPT_RESERVED;
+ break;
+
+ case RES_MPT_HW:
+ in_param = slave;
+ err = mlx4_cmd(dev, in_param, mptn, 0,
+ MLX4_CMD_HW2SW_MPT,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ mlx4_dbg(dev, "rem_slave_mrs: failed"
+ " to move slave %d mpt %d to"
+ " SW ownership\n",
+ slave, mptn);
+ if (mpt->mtt)
+ atomic_dec(&mpt->mtt->ref_count);
+ state = RES_MPT_MAPPED;
+ break;
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_mtts(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker =
+ &priv->mfunc.master.res_tracker;
+ struct list_head *mtt_list =
+ &tracker->slave_list[slave].res_list[RES_MTT];
+ struct res_mtt *mtt;
+ struct res_mtt *tmp;
+ int state;
+ LIST_HEAD(tlist);
+ int base;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_MTT);
+ if (err)
+ mlx4_warn(dev, "rem_slave_mtts: Could not move all mtts to "
+ "busy for slave %d\n", slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(mtt, tmp, mtt_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (mtt->com.owner == slave) {
+ base = mtt->com.res_id;
+ state = mtt->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_MTT_ALLOCATED:
+ __mlx4_free_mtt_range(dev, base,
+ mtt->order);
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&mtt->com.node,
+ &tracker->res_tree[RES_MTT]);
+ list_del(&mtt->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ kfree(mtt);
+ state = 0;
+ break;
+
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_fs_rule(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker =
+ &priv->mfunc.master.res_tracker;
+ struct list_head *fs_rule_list =
+ &tracker->slave_list[slave].res_list[RES_FS_RULE];
+ struct res_fs_rule *fs_rule;
+ struct res_fs_rule *tmp;
+ int state;
+ u64 base;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_FS_RULE);
+ if (err)
+ mlx4_warn(dev, "rem_slave_fs_rule: Could not move all mtts to busy for slave %d\n",
+ slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(fs_rule, tmp, fs_rule_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (fs_rule->com.owner == slave) {
+ base = fs_rule->com.res_id;
+ state = fs_rule->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_FS_RULE_ALLOCATED:
+ /* detach rule */
+ err = mlx4_cmd(dev, base, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&fs_rule->com.node,
+ &tracker->res_tree[RES_FS_RULE]);
+ list_del(&fs_rule->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ kfree(fs_rule);
+ state = 0;
+ break;
+
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_eqs(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *eq_list =
+ &tracker->slave_list[slave].res_list[RES_EQ];
+ struct res_eq *eq;
+ struct res_eq *tmp;
+ int err;
+ int state;
+ LIST_HEAD(tlist);
+ int eqn;
+ struct mlx4_cmd_mailbox *mailbox;
+
+ err = move_all_busy(dev, slave, RES_EQ);
+ if (err)
+ mlx4_warn(dev, "rem_slave_eqs: Could not move all eqs to "
+ "busy for slave %d\n", slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(eq, tmp, eq_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (eq->com.owner == slave) {
+ eqn = eq->com.res_id;
+ state = eq->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_EQ_RESERVED:
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&eq->com.node,
+ &tracker->res_tree[RES_EQ]);
+ list_del(&eq->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ kfree(eq);
+ state = 0;
+ break;
+
+ case RES_EQ_HW:
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ cond_resched();
+ continue;
+ }
+ err = mlx4_cmd_box(dev, slave, 0,
+ eqn & 0xff, 0,
+ MLX4_CMD_HW2SW_EQ,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ mlx4_dbg(dev, "rem_slave_eqs: failed"
+ " to move slave %d eqs %d to"
+ " SW ownership\n", slave, eqn);
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ atomic_dec(&eq->mtt->ref_count);
+ state = RES_EQ_RESERVED;
+ break;
+
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_counters(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *counter_list =
+ &tracker->slave_list[slave].res_list[RES_COUNTER];
+ struct res_counter *counter;
+ struct res_counter *tmp;
+ int err;
+ int index;
+
+ err = move_all_busy(dev, slave, RES_COUNTER);
+ if (err)
+ mlx4_warn(dev, "rem_slave_counters: Could not move all counters to "
+ "busy for slave %d\n", slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(counter, tmp, counter_list, com.list) {
+ if (counter->com.owner == slave) {
+ index = counter->com.res_id;
+ rb_erase(&counter->com.node,
+ &tracker->res_tree[RES_COUNTER]);
+ list_del(&counter->com.list);
+ kfree(counter);
+ __mlx4_counter_free(dev, index);
+ }
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *xrcdn_list =
+ &tracker->slave_list[slave].res_list[RES_XRCD];
+ struct res_xrcdn *xrcd;
+ struct res_xrcdn *tmp;
+ int err;
+ int xrcdn;
+
+ err = move_all_busy(dev, slave, RES_XRCD);
+ if (err)
+ mlx4_warn(dev, "rem_slave_xrcdns: Could not move all xrcdns to "
+ "busy for slave %d\n", slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(xrcd, tmp, xrcdn_list, com.list) {
+ if (xrcd->com.owner == slave) {
+ xrcdn = xrcd->com.res_id;
+ rb_erase(&xrcd->com.node, &tracker->res_tree[RES_XRCD]);
+ list_del(&xrcd->com.list);
+ kfree(xrcd);
+ __mlx4_xrcd_free(dev, xrcdn);
+ }
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ mutex_lock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex);
+ rem_slave_macs(dev, slave);
+ rem_slave_vlans(dev, slave);
+ rem_slave_qps(dev, slave);
+ rem_slave_srqs(dev, slave);
+ rem_slave_cqs(dev, slave);
+ rem_slave_mrs(dev, slave);
+ rem_slave_eqs(dev, slave);
+ rem_slave_mtts(dev, slave);
+ rem_slave_counters(dev, slave);
+ rem_slave_xrcdns(dev, slave);
+ rem_slave_fs_rule(dev, slave);
+ mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex);
+}
diff --git a/sys/ofed/drivers/net/mlx4/sense.c b/sys/ofed/drivers/net/mlx4/sense.c
index 0fcf025..ba1fb43 100644
--- a/sys/ofed/drivers/net/mlx4/sense.c
+++ b/sys/ofed/drivers/net/mlx4/sense.c
@@ -38,14 +38,15 @@
#include "mlx4.h"
-static int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
- enum mlx4_port_type *type)
+int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
+ enum mlx4_port_type *type)
{
u64 out_param;
int err = 0;
err = mlx4_cmd_imm(dev, 0, &out_param, port, 0,
- MLX4_CMD_SENSE_PORT, MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_SENSE_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
if (err) {
mlx4_err(dev, "Sense command failed for port: %d\n", port);
return err;
@@ -53,7 +54,7 @@ static int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
if (out_param > 2) {
mlx4_err(dev, "Sense returned illegal value: 0x%llx\n", out_param);
- return EINVAL;
+ return -EINVAL;
}
*type = out_param;
@@ -80,20 +81,6 @@ void mlx4_do_sense_ports(struct mlx4_dev *dev,
}
/*
- * Adjust port configuration:
- * If port 1 sensed nothing and port 2 is IB, set both as IB
- * If port 2 sensed nothing and port 1 is Eth, set both as Eth
- */
- if (stype[0] == MLX4_PORT_TYPE_ETH) {
- for (i = 1; i < dev->caps.num_ports; i++)
- stype[i] = stype[i] ? stype[i] : MLX4_PORT_TYPE_ETH;
- }
- if (stype[dev->caps.num_ports - 1] == MLX4_PORT_TYPE_IB) {
- for (i = 0; i < dev->caps.num_ports - 1; i++)
- stype[i] = stype[i] ? stype[i] : MLX4_PORT_TYPE_IB;
- }
-
- /*
* If sensed nothing, remain in current configuration.
*/
for (i = 0; i < dev->caps.num_ports; i++)
@@ -139,18 +126,26 @@ void mlx4_start_sense(struct mlx4_dev *dev)
round_jiffies(MLX4_SENSE_RANGE));
}
-
void mlx4_stop_sense(struct mlx4_dev *dev)
{
mlx4_priv(dev)->sense.resched = 0;
}
-int mlx4_sense_init(struct mlx4_dev *dev)
+void mlx4_sense_cleanup(struct mlx4_dev *dev)
+{
+ mlx4_stop_sense(dev);
+ cancel_delayed_work(&mlx4_priv(dev)->sense.sense_poll);
+ destroy_workqueue(mlx4_priv(dev)->sense.sense_wq);
+}
+
+
+int mlx4_sense_init(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_sense *sense = &priv->sense;
int port;
+
sense->dev = dev;
sense->sense_wq = create_singlethread_workqueue("mlx4_sense");
if (!sense->sense_wq)
@@ -159,14 +154,7 @@ int mlx4_sense_init(struct mlx4_dev *dev)
for (port = 1; port <= dev->caps.num_ports; port++)
sense->do_sense_port[port] = 1;
- INIT_DELAYED_WORK_DEFERRABLE(&sense->sense_poll, mlx4_sense_port);
- return 0;
-}
+ INIT_DEFERRABLE_WORK(&sense->sense_poll, mlx4_sense_port);
-void mlx4_sense_cleanup(struct mlx4_dev *dev)
-{
- mlx4_stop_sense(dev);
- cancel_delayed_work(&mlx4_priv(dev)->sense.sense_poll);
- destroy_workqueue(mlx4_priv(dev)->sense.sense_wq);
+ return 0;
}
-
diff --git a/sys/ofed/drivers/net/mlx4/srq.c b/sys/ofed/drivers/net/mlx4/srq.c
index f856b8d..321c238 100644
--- a/sys/ofed/drivers/net/mlx4/srq.c
+++ b/sys/ofed/drivers/net/mlx4/srq.c
@@ -34,31 +34,11 @@
#include <linux/init.h>
#include <linux/mlx4/cmd.h>
-#include <linux/mlx4/srq.h>
+#include <linux/gfp.h>
#include "mlx4.h"
#include "icm.h"
-struct mlx4_srq_context {
- __be32 state_logsize_srqn;
- u8 logstride;
- u8 reserved1;
- __be16 xrc_domain;
- __be32 pg_offset_cqn;
- u32 reserved2;
- u8 log_page_size;
- u8 reserved3[2];
- u8 mtt_base_addr_h;
- __be32 mtt_base_addr_l;
- __be32 pd;
- __be16 limit_watermark;
- __be16 wqe_cnt;
- u16 reserved4;
- __be16 wqe_counter;
- u32 reserved5;
- __be64 db_rec_addr;
-};
-
void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
@@ -66,8 +46,7 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
spin_lock(&srq_table->lock);
- srq = radix_tree_lookup(&dev->srq_table_tree,
- srqn & (dev->caps.num_srqs - 1));
+ srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1));
if (srq)
atomic_inc(&srq->refcount);
@@ -87,8 +66,9 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
static int mlx4_SW2HW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int srq_num)
{
- return mlx4_cmd(dev, mailbox->dma, srq_num, 0, MLX4_CMD_SW2HW_SRQ,
- MLX4_CMD_TIME_CLASS_A);
+ return mlx4_cmd(dev, mailbox->dma, srq_num, 0,
+ MLX4_CMD_SW2HW_SRQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
}
static int mlx4_HW2SW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
@@ -96,48 +76,109 @@ static int mlx4_HW2SW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
{
return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, srq_num,
mailbox ? 0 : 1, MLX4_CMD_HW2SW_SRQ,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
}
static int mlx4_ARM_SRQ(struct mlx4_dev *dev, int srq_num, int limit_watermark)
{
return mlx4_cmd(dev, limit_watermark, srq_num, 0, MLX4_CMD_ARM_SRQ,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
}
static int mlx4_QUERY_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int srq_num)
{
return mlx4_cmd_box(dev, 0, mailbox->dma, srq_num, 0, MLX4_CMD_QUERY_SRQ,
- MLX4_CMD_TIME_CLASS_A);
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
}
-int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
- struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq)
+int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
- struct mlx4_cmd_mailbox *mailbox;
- struct mlx4_srq_context *srq_context;
- u64 mtt_addr;
int err;
- srq->srqn = mlx4_bitmap_alloc(&srq_table->bitmap);
- if (srq->srqn == -1)
+
+ *srqn = mlx4_bitmap_alloc(&srq_table->bitmap);
+ if (*srqn == -1)
return -ENOMEM;
- err = mlx4_table_get(dev, &srq_table->table, srq->srqn);
+ err = mlx4_table_get(dev, &srq_table->table, *srqn);
if (err)
goto err_out;
- err = mlx4_table_get(dev, &srq_table->cmpt_table, srq->srqn);
+ err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn);
if (err)
goto err_put;
+ return 0;
+
+err_put:
+ mlx4_table_put(dev, &srq_table->table, *srqn);
+
+err_out:
+ mlx4_bitmap_free(&srq_table->bitmap, *srqn);
+ return err;
+}
+
+static int mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn)
+{
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, 0, &out_param, RES_SRQ,
+ RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (!err)
+ *srqn = get_param_l(&out_param);
+
+ return err;
+ }
+ return __mlx4_srq_alloc_icm(dev, srqn);
+}
+
+void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn)
+{
+ struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
+
+ mlx4_table_put(dev, &srq_table->cmpt_table, srqn);
+ mlx4_table_put(dev, &srq_table->table, srqn);
+ mlx4_bitmap_free(&srq_table->bitmap, srqn);
+}
+
+static void mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn)
+{
+ u64 in_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, srqn);
+ if (mlx4_cmd(dev, in_param, RES_SRQ, RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED))
+ mlx4_warn(dev, "Failed freeing cq:%d\n", srqn);
+ return;
+ }
+ __mlx4_srq_free_icm(dev, srqn);
+}
+
+int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
+ struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq)
+{
+ struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_srq_context *srq_context;
+ u64 mtt_addr;
+ int err;
+
+ err = mlx4_srq_alloc_icm(dev, &srq->srqn);
+ if (err)
+ return err;
spin_lock_irq(&srq_table->lock);
- err = radix_tree_insert(&dev->srq_table_tree, srq->srqn, srq);
+ err = radix_tree_insert(&srq_table->tree, srq->srqn, srq);
spin_unlock_irq(&srq_table->lock);
if (err)
- goto err_cmpt_put;
+ goto err_icm;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
@@ -151,7 +192,7 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
srq_context->state_logsize_srqn = cpu_to_be32((ilog2(srq->max) << 24) |
srq->srqn);
srq_context->logstride = srq->wqe_shift - 4;
- srq_context->xrc_domain = cpu_to_be16(xrcd);
+ srq_context->xrcd = cpu_to_be16(xrcd);
srq_context->pg_offset_cqn = cpu_to_be32(cqn & 0xffffff);
srq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
@@ -173,52 +214,33 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
err_radix:
spin_lock_irq(&srq_table->lock);
- radix_tree_delete(&dev->srq_table_tree, srq->srqn);
+ radix_tree_delete(&srq_table->tree, srq->srqn);
spin_unlock_irq(&srq_table->lock);
-err_cmpt_put:
- mlx4_table_put(dev, &srq_table->cmpt_table, srq->srqn);
-
-err_put:
- mlx4_table_put(dev, &srq_table->table, srq->srqn);
-
-err_out:
- mlx4_bitmap_free(&srq_table->bitmap, srq->srqn);
-
+err_icm:
+ mlx4_srq_free_icm(dev, srq->srqn);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_srq_alloc);
-void mlx4_srq_invalidate(struct mlx4_dev *dev, struct mlx4_srq *srq)
+void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq)
{
+ struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
int err;
err = mlx4_HW2SW_SRQ(dev, NULL, srq->srqn);
if (err)
mlx4_warn(dev, "HW2SW_SRQ failed (%d) for SRQN %06x\n", err, srq->srqn);
-}
-EXPORT_SYMBOL_GPL(mlx4_srq_invalidate);
-
-void mlx4_srq_remove(struct mlx4_dev *dev, struct mlx4_srq *srq)
-{
- struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
spin_lock_irq(&srq_table->lock);
- radix_tree_delete(&dev->srq_table_tree, srq->srqn);
+ radix_tree_delete(&srq_table->tree, srq->srqn);
spin_unlock_irq(&srq_table->lock);
-}
-EXPORT_SYMBOL_GPL(mlx4_srq_remove);
-
-void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq)
-{
- struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
if (atomic_dec_and_test(&srq->refcount))
complete(&srq->free);
wait_for_completion(&srq->free);
- mlx4_table_put(dev, &srq_table->table, srq->srqn);
- mlx4_bitmap_free(&srq_table->bitmap, srq->srqn);
+ mlx4_srq_free_icm(dev, srq->srqn);
}
EXPORT_SYMBOL_GPL(mlx4_srq_free);
@@ -257,7 +279,9 @@ int mlx4_init_srq_table(struct mlx4_dev *dev)
int err;
spin_lock_init(&srq_table->lock);
- INIT_RADIX_TREE(&dev->srq_table_tree, GFP_ATOMIC);
+ INIT_RADIX_TREE(&srq_table->tree, GFP_ATOMIC);
+ if (mlx4_is_slave(dev))
+ return 0;
err = mlx4_bitmap_init(&srq_table->bitmap, dev->caps.num_srqs,
dev->caps.num_srqs - 1, dev->caps.reserved_srqs, 0);
@@ -269,5 +293,7 @@ int mlx4_init_srq_table(struct mlx4_dev *dev)
void mlx4_cleanup_srq_table(struct mlx4_dev *dev)
{
+ if (mlx4_is_slave(dev))
+ return;
mlx4_bitmap_cleanup(&mlx4_priv(dev)->srq_table.bitmap);
}
diff --git a/sys/ofed/drivers/net/mlx4/sys_tune.c b/sys/ofed/drivers/net/mlx4/sys_tune.c
new file mode 100644
index 0000000..0675e90
--- /dev/null
+++ b/sys/ofed/drivers/net/mlx4/sys_tune.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright (c) 2010 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/mutex.h>
+#include <asm/atomic.h>
+
+#include "mlx4.h"
+
+#if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE)
+
+
+
+/* Each CPU is put into a group. In most cases, the group number is
+ * equal to the CPU number of one of the CPUs in the group. The
+ * exception is group NR_CPUS which is the default group. This is
+ * protected by sys_tune_startup_mutex. */
+DEFINE_PER_CPU(int, idle_cpu_group) = NR_CPUS;
+
+/* For each group, a count of the number of CPUs in the group which
+ * are known to be busy. A busy CPU might be running the busy loop
+ * below or general kernel code. The count is decremented on entry to
+ * the old pm_idle handler and incremented on exit. The aim is to
+ * avoid the count going to zero or negative. This situation can
+ * occur temporarily during module unload or CPU hot-plug but
+ * normality will be restored when the affected CPUs next exit the
+ * idle loop. */
+static atomic_t busy_cpu_count[NR_CPUS+1];
+
+/* A workqueue item to be executed to cause the CPU to exit from the
+ * idle loop. */
+DEFINE_PER_CPU(struct work_struct, sys_tune_cpu_work);
+
+#define sys_tune_set_state(CPU,STATE) \
+ do { } while(0)
+
+
+/* A mutex to protect most of the module datastructures. */
+static DEFINE_MUTEX(sys_tune_startup_mutex);
+
+/* The old pm_idle handler. */
+static void (*old_pm_idle)(void) = NULL;
+
+static void sys_tune_pm_idle(void)
+{
+ atomic_t *busy_cpus_ptr;
+ int busy_cpus;
+ int cpu = smp_processor_id();
+
+ busy_cpus_ptr = &(busy_cpu_count[per_cpu(idle_cpu_group, cpu)]);
+
+ sys_tune_set_state(cpu, 2);
+
+ local_irq_enable();
+ while (!need_resched()) {
+ busy_cpus = atomic_read(busy_cpus_ptr);
+
+ /* If other CPUs in this group are busy then let this
+ * CPU go idle. We mustn't let the number of busy
+ * CPUs drop below 1. */
+ if ( busy_cpus > 1 &&
+ old_pm_idle != NULL &&
+ ( atomic_cmpxchg(busy_cpus_ptr, busy_cpus,
+ busy_cpus-1) == busy_cpus ) ) {
+ local_irq_disable();
+ sys_tune_set_state(cpu, 3);
+ /* This check might not be necessary, but it
+ * seems safest to include it because there
+ * might be a kernel version which requires
+ * it. */
+ if (need_resched())
+ local_irq_enable();
+ else
+ old_pm_idle();
+ /* This CPU is busy again. */
+ sys_tune_set_state(cpu, 1);
+ atomic_add(1, busy_cpus_ptr);
+ return;
+ }
+
+ cpu_relax();
+ }
+ sys_tune_set_state(cpu, 0);
+}
+
+
+void sys_tune_work_func(struct work_struct *work)
+{
+ /* Do nothing. Since this function is running in process
+ * context, the idle thread isn't running on this CPU. */
+}
+
+
+#ifdef CONFIG_SMP
+static void sys_tune_smp_call(void *info)
+{
+ schedule_work(&get_cpu_var(sys_tune_cpu_work));
+ put_cpu_var(sys_tune_cpu_work);
+}
+#endif
+
+
+#ifdef CONFIG_SMP
+static void sys_tune_refresh(void)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
+ on_each_cpu(&sys_tune_smp_call, NULL, 0, 1);
+#else
+ on_each_cpu(&sys_tune_smp_call, NULL, 1);
+#endif
+}
+#else
+static void sys_tune_refresh(void)
+{
+ /* The current thread is executing on the one and only CPU so
+ * the idle thread isn't running. */
+}
+#endif
+
+
+
+static int sys_tune_cpu_group(int cpu)
+{
+#ifdef CONFIG_SMP
+ const cpumask_t *mask;
+ int other_cpu;
+ int group;
+
+#if defined(topology_thread_cpumask) && defined(ST_HAVE_EXPORTED_CPU_SIBLING_MAP)
+ /* Keep one hyperthread busy per core. */
+ mask = topology_thread_cpumask(cpu);
+#else
+ return cpu;
+#endif
+ for_each_cpu_mask(cpu, *(mask)) {
+ group = per_cpu(idle_cpu_group, other_cpu);
+ if (group != NR_CPUS)
+ return group;
+ }
+#endif
+
+ return cpu;
+}
+
+
+static void sys_tune_add_cpu(int cpu)
+{
+ int group;
+
+ /* Do nothing if this CPU has already been added. */
+ if (per_cpu(idle_cpu_group, cpu) != NR_CPUS)
+ return;
+
+ group = sys_tune_cpu_group(cpu);
+ per_cpu(idle_cpu_group, cpu) = group;
+ atomic_inc(&(busy_cpu_count[group]));
+
+}
+
+static void sys_tune_del_cpu(int cpu)
+{
+
+ int group;
+
+ if (per_cpu(idle_cpu_group, cpu) == NR_CPUS)
+ return;
+
+ group = per_cpu(idle_cpu_group, cpu);
+ /* If the CPU was busy, this can cause the count to drop to
+ * zero. To rectify this, we need to cause one of the other
+ * CPUs in the group to exit the idle loop. If the CPU was
+ * not busy then this causes the contribution for this CPU to
+ * go to -1 which can cause the overall count to drop to zero
+ * or go negative. To rectify this situation we need to cause
+ * this CPU to exit the idle loop. */
+ atomic_dec(&(busy_cpu_count[group]));
+ per_cpu(idle_cpu_group, cpu) = NR_CPUS;
+
+}
+
+
+static int sys_tune_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ int cpu = (long)hcpu;
+
+ switch(action) {
+#ifdef CPU_ONLINE_FROZEN
+ case CPU_ONLINE_FROZEN:
+#endif
+ case CPU_ONLINE:
+ mutex_lock(&sys_tune_startup_mutex);
+ sys_tune_add_cpu(cpu);
+ mutex_unlock(&sys_tune_startup_mutex);
+ /* The CPU might have already entered the idle loop in
+ * the wrong group. Make sure it exits the idle loop
+ * so that it picks up the correct group. */
+ sys_tune_refresh();
+ break;
+
+#ifdef CPU_DEAD_FROZEN
+ case CPU_DEAD_FROZEN:
+#endif
+ case CPU_DEAD:
+ mutex_lock(&sys_tune_startup_mutex);
+ sys_tune_del_cpu(cpu);
+ mutex_unlock(&sys_tune_startup_mutex);
+ /* The deleted CPU may have been the only busy CPU in
+ * the group. Make sure one of the other CPUs in the
+ * group exits the idle loop. */
+ sys_tune_refresh();
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+
+static struct notifier_block sys_tune_cpu_nb = {
+ .notifier_call = sys_tune_cpu_notify,
+};
+
+
+static void sys_tune_ensure_init(void)
+{
+ BUG_ON (old_pm_idle != NULL);
+
+ /* Atomically update pm_idle to &sys_tune_pm_idle. The old value
+ * is stored in old_pm_idle before installing the new
+ * handler. */
+ do {
+ old_pm_idle = pm_idle;
+ } while (cmpxchg(&pm_idle, old_pm_idle, &sys_tune_pm_idle) !=
+ old_pm_idle);
+}
+#endif
+
+void sys_tune_fini(void)
+{
+#if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE)
+ void (*old)(void);
+ int cpu;
+
+ unregister_cpu_notifier(&sys_tune_cpu_nb);
+
+ mutex_lock(&sys_tune_startup_mutex);
+
+
+ old = cmpxchg(&pm_idle, &sys_tune_pm_idle, old_pm_idle);
+
+ for_each_online_cpu(cpu)
+ sys_tune_del_cpu(cpu);
+
+ mutex_unlock(&sys_tune_startup_mutex);
+
+ /* Our handler may still be executing on other CPUs.
+ * Schedule this thread on all CPUs to make sure all
+ * idle threads get interrupted. */
+ sys_tune_refresh();
+
+ /* Make sure the work item has finished executing on all CPUs.
+ * This in turn ensures that all idle threads have been
+ * interrupted. */
+ flush_scheduled_work();
+#endif /* CONFIG_X86 */
+}
+
+void sys_tune_init(void)
+{
+#if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE)
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ INIT_WORK(&per_cpu(sys_tune_cpu_work, cpu),
+ sys_tune_work_func);
+ }
+
+ /* Start by registering the handler to ensure we don't miss
+ * any updates. */
+ register_cpu_notifier(&sys_tune_cpu_nb);
+
+ mutex_lock(&sys_tune_startup_mutex);
+
+ for_each_online_cpu(cpu)
+ sys_tune_add_cpu(cpu);
+
+ sys_tune_ensure_init();
+
+
+ mutex_unlock(&sys_tune_startup_mutex);
+
+ /* Ensure our idle handler starts to run. */
+ sys_tune_refresh();
+#endif
+}
+
diff --git a/sys/ofed/include/asm/atomic.h b/sys/ofed/include/asm/atomic.h
index 5c5caa0..46e0370 100644
--- a/sys/ofed/include/asm/atomic.h
+++ b/sys/ofed/include/asm/atomic.h
@@ -45,6 +45,7 @@ typedef struct {
#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0)
+#define atomic_dec_return(v) atomic_sub_return(1, (v))
static inline int
atomic_add_return(int i, atomic_t *v)
@@ -82,4 +83,25 @@ atomic_dec(atomic_t *v)
return atomic_fetchadd_int(&v->counter, -1) - 1;
}
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
+{
+ int c, old;
+ c = atomic_read(v);
+ for (;;) {
+ if (unlikely(c == (u)))
+ break;
+ // old = atomic_cmpxchg((v), c, c + (a)); /*Linux*/
+ old = atomic_cmpset_int(&v->counter, c, c + (a));
+ if (likely(old == c))
+ break;
+ c = old;
+ }
+ return c != (u);
+}
+
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
+
+
+
#endif /* _ASM_ATOMIC_H_ */
diff --git a/sys/ofed/include/asm/byteorder.h b/sys/ofed/include/asm/byteorder.h
index 341c548..b59e973 100644
--- a/sys/ofed/include/asm/byteorder.h
+++ b/sys/ofed/include/asm/byteorder.h
@@ -30,6 +30,7 @@
#include <sys/types.h>
#include <sys/endian.h>
+#include <asm/types.h>
#if BYTE_ORDER == LITTLE_ENDIAN
#define __LITTLE_ENDIAN
diff --git a/sys/ofed/include/linux/atomic.h b/sys/ofed/include/linux/atomic.h
new file mode 100644
index 0000000..0d689c1
--- /dev/null
+++ b/sys/ofed/include/linux/atomic.h
@@ -0,0 +1,53 @@
+#ifndef _COMPAT_LINUX_ATOMIC_H
+#define _COMPAT_LINUX_ATOMIC_H 1
+
+/*
+#include <linux/version.h>
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,36))
+#include_next <linux/atomic.h>
+#else
+*/
+
+#include <asm/atomic.h>
+
+/* Shahar Klein: atomic_inc_not_zero_hint do we need it? */
+#if 0
+
+/**
+ * atomic_inc_not_zero_hint - increment if not null
+ * @v: pointer of type atomic_t
+ * @hint: probable value of the atomic before the increment
+ *
+ * This version of atomic_inc_not_zero() gives a hint of probable
+ * value of the atomic. This helps processor to not read the memory
+ * before doing the atomic read/modify/write cycle, lowering
+ * number of bus transactions on some arches.
+ *
+ * Returns: 0 if increment was not done, 1 otherwise.
+ */
+
+#ifndef atomic_inc_not_zero_hint
+static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint)
+{
+ int val, c = hint;
+
+ /* sanity test, should be removed by compiler if hint is a constant */
+ if (!hint)
+ return atomic_inc_not_zero(v);
+
+ do {
+ val = atomic_cmpxchg(v, c, c + 1);
+ if (val == c)
+ return 1;
+ c = val;
+ } while (c);
+
+ return 0;
+}
+#endif
+#endif
+
+//#endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,36)) */
+
+#endif /* _COMPAT_LINUX_ATOMIC_H */
diff --git a/sys/ofed/include/linux/bitops.h b/sys/ofed/include/linux/bitops.h
index 658c32e..4ada708 100644
--- a/sys/ofed/include/linux/bitops.h
+++ b/sys/ofed/include/linux/bitops.h
@@ -35,6 +35,7 @@
#endif
#define BIT_MASK(n) (~0UL >> (BITS_PER_LONG - (n)))
#define BITS_TO_LONGS(n) howmany((n), BITS_PER_LONG)
+#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
static inline int
__ffs(int mask)
@@ -63,6 +64,16 @@ __flsl(long mask)
#define ffz(mask) __ffs(~(mask))
+static inline int get_count_order(unsigned int count)
+{
+ int order;
+
+ order = fls(count) - 1;
+ if (count & (count - 1))
+ order++;
+ return order;
+}
+
static inline unsigned long
find_first_bit(unsigned long *addr, unsigned long size)
{
@@ -314,4 +325,159 @@ test_and_set_bit(long bit, long *var)
return !!(val & bit);
}
+
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
+#define BITMAP_LAST_WORD_MASK(nbits) \
+( \
+ ((nbits) % BITS_PER_LONG) ? \
+ (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \
+)
+
+
+static inline void
+bitmap_set(unsigned long *map, int start, int nr)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const int size = start + nr;
+ int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+ while (nr - bits_to_set >= 0) {
+ *p |= mask_to_set;
+ nr -= bits_to_set;
+ bits_to_set = BITS_PER_LONG;
+ mask_to_set = ~0UL;
+ p++;
+ }
+ if (nr) {
+ mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+ *p |= mask_to_set;
+ }
+}
+
+static inline void
+bitmap_clear(unsigned long *map, int start, int nr)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const int size = start + nr;
+ int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+ while (nr - bits_to_clear >= 0) {
+ *p &= ~mask_to_clear;
+ nr -= bits_to_clear;
+ bits_to_clear = BITS_PER_LONG;
+ mask_to_clear = ~0UL;
+ p++;
+ }
+ if (nr) {
+ mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+ *p &= ~mask_to_clear;
+ }
+}
+
+enum {
+ REG_OP_ISFREE, /* true if region is all zero bits */
+ REG_OP_ALLOC, /* set all bits in region */
+ REG_OP_RELEASE, /* clear all bits in region */
+};
+
+static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op)
+{
+ int nbits_reg; /* number of bits in region */
+ int index; /* index first long of region in bitmap */
+ int offset; /* bit offset region in bitmap[index] */
+ int nlongs_reg; /* num longs spanned by region in bitmap */
+ int nbitsinlong; /* num bits of region in each spanned long */
+ unsigned long mask; /* bitmask for one long of region */
+ int i; /* scans bitmap by longs */
+ int ret = 0; /* return value */
+
+ /*
+ * Either nlongs_reg == 1 (for small orders that fit in one long)
+ * or (offset == 0 && mask == ~0UL) (for larger multiword orders.)
+ */
+ nbits_reg = 1 << order;
+ index = pos / BITS_PER_LONG;
+ offset = pos - (index * BITS_PER_LONG);
+ nlongs_reg = BITS_TO_LONGS(nbits_reg);
+ nbitsinlong = min(nbits_reg, BITS_PER_LONG);
+
+ /*
+ * Can't do "mask = (1UL << nbitsinlong) - 1", as that
+ * overflows if nbitsinlong == BITS_PER_LONG.
+ */
+ mask = (1UL << (nbitsinlong - 1));
+ mask += mask - 1;
+ mask <<= offset;
+
+ switch (reg_op) {
+ case REG_OP_ISFREE:
+ for (i = 0; i < nlongs_reg; i++) {
+ if (bitmap[index + i] & mask)
+ goto done;
+ }
+ ret = 1; /* all bits in region free (zero) */
+ break;
+
+ case REG_OP_ALLOC:
+ for (i = 0; i < nlongs_reg; i++)
+ bitmap[index + i] |= mask;
+ break;
+
+ case REG_OP_RELEASE:
+ for (i = 0; i < nlongs_reg; i++)
+ bitmap[index + i] &= ~mask;
+ break;
+ }
+done:
+ return ret;
+}
+
+/**
+ * bitmap_find_free_region - find a contiguous aligned mem region
+ * @bitmap: array of unsigned longs corresponding to the bitmap
+ * @bits: number of bits in the bitmap
+ * @order: region size (log base 2 of number of bits) to find
+ *
+ * Find a region of free (zero) bits in a @bitmap of @bits bits and
+ * allocate them (set them to one). Only consider regions of length
+ * a power (@order) of two, aligned to that power of two, which
+ * makes the search algorithm much faster.
+ *
+ * Return the bit offset in bitmap of the allocated region,
+ * or -errno on failure.
+ */
+static inline int
+bitmap_find_free_region(unsigned long *bitmap, int bits, int order)
+{
+ int pos, end; /* scans bitmap by regions of size order */
+
+ for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) {
+ if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
+ continue;
+ __reg_op(bitmap, pos, order, REG_OP_ALLOC);
+ return pos;
+ }
+ return -ENOMEM;
+}
+
+/**
+ * bitmap_release_region - release allocated bitmap region
+ * @bitmap: array of unsigned longs corresponding to the bitmap
+ * @pos: beginning of bit region to release
+ * @order: region size (log base 2 of number of bits) to release
+ *
+ * This is the complement to __bitmap_find_free_region() and releases
+ * the found region (by clearing it in the bitmap).
+ *
+ * No return value.
+ */
+static inline void
+bitmap_release_region(unsigned long *bitmap, int pos, int order)
+{
+ __reg_op(bitmap, pos, order, REG_OP_RELEASE);
+}
+
+
#endif /* _LINUX_BITOPS_H_ */
diff --git a/sys/ofed/include/linux/clocksource.h b/sys/ofed/include/linux/clocksource.h
new file mode 100644
index 0000000..e74cc62
--- /dev/null
+++ b/sys/ofed/include/linux/clocksource.h
@@ -0,0 +1,17 @@
+/* linux/include/linux/clocksource.h
+ *
+ * MLX4_CORE_PORT
+ *
+ * This file contains the structure definitions for clocksources.
+ *
+ * If you are not a clocksource, or timekeeping code, you should
+ * not be including this file!
+ */
+#ifndef _LINUX_CLOCKSOURCE_H
+#define _LINUX_CLOCKSOURCE_H
+
+/* clocksource cycle base type */
+typedef u64 cycle_t;
+
+
+#endif /* _LINUX_CLOCKSOURCE_H */
diff --git a/sys/ofed/include/linux/compat.h b/sys/ofed/include/linux/compat.h
index cfb1671..7af826c 100644
--- a/sys/ofed/include/linux/compat.h
+++ b/sys/ofed/include/linux/compat.h
@@ -29,5 +29,8 @@
#ifndef _LINUX_COMPAT_H_
#define _LINUX_COMPAT_H_
+#define is_multicast_ether_addr(x) 0
+#define is_broadcast_ether_addr(x) 0
+
#endif /* _LINUX_COMPAT_H_ */
diff --git a/sys/ofed/include/linux/device.h b/sys/ofed/include/linux/device.h
index cce46ca..37a7720 100644
--- a/sys/ofed/include/linux/device.h
+++ b/sys/ofed/include/linux/device.h
@@ -385,4 +385,10 @@ class_remove_file(struct class *class, const struct class_attribute *attr)
sysfs_remove_file(&class->kobj, &attr->attr);
}
+static inline int dev_to_node(struct device *dev)
+{
+ return -1;
+}
+
+
#endif /* _LINUX_DEVICE_H_ */
diff --git a/sys/ofed/include/linux/dma-mapping.h b/sys/ofed/include/linux/dma-mapping.h
index 0f0ad9d..065745c 100644
--- a/sys/ofed/include/linux/dma-mapping.h
+++ b/sys/ofed/include/linux/dma-mapping.h
@@ -245,6 +245,13 @@ dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
return (0);
}
+static inline unsigned int dma_set_max_seg_size(struct device *dev,
+ unsigned int size)
+{
+ return (0);
+}
+
+
#define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, NULL)
#define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, NULL)
#define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, NULL)
diff --git a/sys/ofed/include/linux/gfp.h b/sys/ofed/include/linux/gfp.h
index 8d2b228..e88df78 100644
--- a/sys/ofed/include/linux/gfp.h
+++ b/sys/ofed/include/linux/gfp.h
@@ -121,4 +121,8 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
return (virt_to_page(page));
}
+#define alloc_pages_node(node, mask, order) alloc_pages(mask, order)
+
+#define kmalloc_node(chunk, mask, node) kmalloc(chunk, mask)
+
#endif /* _LINUX_GFP_H_ */
diff --git a/sys/ofed/include/linux/idr.h b/sys/ofed/include/linux/idr.h
index 40b25b6..b778e64 100644
--- a/sys/ofed/include/linux/idr.h
+++ b/sys/ofed/include/linux/idr.h
@@ -40,6 +40,10 @@
#define MAX_ID_MASK (MAX_ID_BIT - 1)
#define MAX_LEVEL (MAX_ID_SHIFT + IDR_BITS - 1) / IDR_BITS
+#define MAX_IDR_SHIFT (sizeof(int)*8 - 1)
+#define MAX_IDR_BIT (1U << MAX_IDR_SHIFT)
+#define MAX_IDR_MASK (MAX_IDR_BIT - 1)
+
struct idr_layer {
unsigned long bitmap;
struct idr_layer *ary[IDR_SIZE];
diff --git a/sys/ofed/include/linux/if_ether.h b/sys/ofed/include/linux/if_ether.h
index 9608657..f10df2e 100644
--- a/sys/ofed/include/linux/if_ether.h
+++ b/sys/ofed/include/linux/if_ether.h
@@ -34,4 +34,9 @@
#define ETH_P_8021Q ETHERTYPE_VLAN
+/*
+ * defined Ethernet Protocol ID's.
+ */
+#define ETH_P_IP 0x0800 /* Internet Protocol packet */
+
#endif /* _LINUX_IF_ETHER_H_ */
diff --git a/sys/ofed/include/linux/in6.h b/sys/ofed/include/linux/in6.h
index 925c7ed..2032b61 100644
--- a/sys/ofed/include/linux/in6.h
+++ b/sys/ofed/include/linux/in6.h
@@ -29,6 +29,8 @@
#ifndef _LINUX_IN6_H_
#define _LINUX_IN6_H_
+#ifndef KLD_MODULE
#include "opt_inet6.h"
+#endif
#endif /* _LINUX_IN6_H_ */
diff --git a/sys/ofed/include/linux/kernel.h b/sys/ofed/include/linux/kernel.h
index f49036e..55b71f6 100644
--- a/sys/ofed/include/linux/kernel.h
+++ b/sys/ofed/include/linux/kernel.h
@@ -47,6 +47,7 @@
#include <linux/log2.h>
#include <asm/byteorder.h>
+#define KERN_CONT ""
#define KERN_EMERG "<0>"
#define KERN_ALERT "<1>"
#define KERN_CRIT "<2>"
@@ -68,6 +69,60 @@
#define pr_debug(fmt, ...) printk(KERN_DEBUG # fmt, ##__VA_ARGS__)
#define udelay(t) DELAY(t)
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+/*
+ * Print a one-time message (analogous to WARN_ONCE() et al):
+ */
+#define printk_once(x...) ({ \
+ static bool __print_once; \
+ \
+ if (!__print_once) { \
+ __print_once = true; \
+ printk(x); \
+ } \
+})
+
+
+
+#define pr_emerg(fmt, ...) \
+ printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_alert(fmt, ...) \
+ printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_crit(fmt, ...) \
+ printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err(fmt, ...) \
+ printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning(fmt, ...) \
+ printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warn pr_warning
+#define pr_notice(fmt, ...) \
+ printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info(fmt, ...) \
+ printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_cont(fmt, ...) \
+ printk(KERN_CONT fmt, ##__VA_ARGS__)
+
+/* pr_devel() should produce zero code unless DEBUG is defined */
+#ifdef DEBUG
+#define pr_devel(fmt, ...) \
+ printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_devel(fmt, ...) \
+ ({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; })
+#endif
+
+#ifndef WARN
+#define WARN(condition, format...) ({ \
+ int __ret_warn_on = !!(condition); \
+ if (unlikely(__ret_warn_on)) \
+ pr_warning(format); \
+ unlikely(__ret_warn_on); \
+})
+#endif
+
#define container_of(ptr, type, member) \
({ \
__typeof(((type *)0)->member) *_p = (ptr); \
@@ -77,12 +132,27 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define simple_strtoul strtoul
+#define simple_strtol strtol
#define min(x, y) (x < y ? x : y)
#define max(x, y) (x > y ? x : y)
#define min_t(type, _x, _y) (type)(_x) < (type)(_y) ? (type)(_x) : (_y)
#define max_t(type, _x, _y) (type)(_x) > (type)(_y) ? (type)(_x) : (_y)
+/*
+ * This looks more complex than it should be. But we need to
+ * get the type for the ~ right in round_down (it needs to be
+ * as wide as the result!), and we want to evaluate the macro
+ * arguments just once each.
+ */
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+#define round_down(x, y) ((x) & ~__round_mask(x, y))
+
#define num_possible_cpus() mp_ncpus
+typedef struct pm_message {
+ int event;
+} pm_message_t;
+
#endif /* _LINUX_KERNEL_H_ */
diff --git a/sys/ofed/include/linux/linux_compat.c b/sys/ofed/include/linux/linux_compat.c
index 4dbdad9..80d1e1e 100644
--- a/sys/ofed/include/linux/linux_compat.c
+++ b/sys/ofed/include/linux/linux_compat.c
@@ -267,6 +267,8 @@ linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
return (error);
filp->f_flags = file->f_flag;
+ devfs_clear_cdevpriv();
+
return (0);
}
diff --git a/sys/ofed/include/linux/list.h b/sys/ofed/include/linux/list.h
index 61b42d2..4b5454a 100644
--- a/sys/ofed/include/linux/list.h
+++ b/sys/ofed/include/linux/list.h
@@ -111,6 +111,9 @@ list_del_init(struct list_head *entry)
#define list_entry(ptr, type, field) container_of(ptr, type, field)
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
#define list_for_each(p, head) \
for (p = (head)->next; p != (head); p = p->next)
diff --git a/sys/ofed/include/linux/log2.h b/sys/ofed/include/linux/log2.h
index 0a8315a..8c2a05b 100644
--- a/sys/ofed/include/linux/log2.h
+++ b/sys/ofed/include/linux/log2.h
@@ -51,10 +51,119 @@ rounddown_pow_of_two(unsigned long x)
return (1UL << (flsl(x) - 1));
}
-static inline unsigned long
-ilog2(unsigned long x)
+
+/*
+ * deal with unrepresentable constant logarithms
+ */
+extern __attribute__((const, noreturn))
+int ____ilog2_NaN(void);
+
+/*
+ * non-constant log of base 2 calculators
+ * - the arch may override these in asm/bitops.h if they can be implemented
+ * more efficiently than using fls() and fls64()
+ * - the arch is not required to handle n==0 if implementing the fallback
+ */
+#ifndef CONFIG_ARCH_HAS_ILOG2_U32
+static inline __attribute__((const))
+int __ilog2_u32(u32 n)
{
- return (flsl(x) - 1);
+ return flsl(n) - 1;
}
+#endif
+
+#ifndef CONFIG_ARCH_HAS_ILOG2_U64
+static inline __attribute__((const))
+int __ilog2_u64(u64 n)
+{
+ return flsl(n) - 1;
+}
+#endif
+
+
+/**
+ * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value
+ * @n - parameter
+ *
+ * constant-capable log of base 2 calculation
+ * - this can be used to initialise global variables from constant data, hence
+ * the massive ternary operator construction
+ *
+ * selects the appropriately-sized optimised version depending on sizeof(n)
+ */
+#define ilog2(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ (n) < 1 ? ____ilog2_NaN() : \
+ (n) & (1ULL << 63) ? 63 : \
+ (n) & (1ULL << 62) ? 62 : \
+ (n) & (1ULL << 61) ? 61 : \
+ (n) & (1ULL << 60) ? 60 : \
+ (n) & (1ULL << 59) ? 59 : \
+ (n) & (1ULL << 58) ? 58 : \
+ (n) & (1ULL << 57) ? 57 : \
+ (n) & (1ULL << 56) ? 56 : \
+ (n) & (1ULL << 55) ? 55 : \
+ (n) & (1ULL << 54) ? 54 : \
+ (n) & (1ULL << 53) ? 53 : \
+ (n) & (1ULL << 52) ? 52 : \
+ (n) & (1ULL << 51) ? 51 : \
+ (n) & (1ULL << 50) ? 50 : \
+ (n) & (1ULL << 49) ? 49 : \
+ (n) & (1ULL << 48) ? 48 : \
+ (n) & (1ULL << 47) ? 47 : \
+ (n) & (1ULL << 46) ? 46 : \
+ (n) & (1ULL << 45) ? 45 : \
+ (n) & (1ULL << 44) ? 44 : \
+ (n) & (1ULL << 43) ? 43 : \
+ (n) & (1ULL << 42) ? 42 : \
+ (n) & (1ULL << 41) ? 41 : \
+ (n) & (1ULL << 40) ? 40 : \
+ (n) & (1ULL << 39) ? 39 : \
+ (n) & (1ULL << 38) ? 38 : \
+ (n) & (1ULL << 37) ? 37 : \
+ (n) & (1ULL << 36) ? 36 : \
+ (n) & (1ULL << 35) ? 35 : \
+ (n) & (1ULL << 34) ? 34 : \
+ (n) & (1ULL << 33) ? 33 : \
+ (n) & (1ULL << 32) ? 32 : \
+ (n) & (1ULL << 31) ? 31 : \
+ (n) & (1ULL << 30) ? 30 : \
+ (n) & (1ULL << 29) ? 29 : \
+ (n) & (1ULL << 28) ? 28 : \
+ (n) & (1ULL << 27) ? 27 : \
+ (n) & (1ULL << 26) ? 26 : \
+ (n) & (1ULL << 25) ? 25 : \
+ (n) & (1ULL << 24) ? 24 : \
+ (n) & (1ULL << 23) ? 23 : \
+ (n) & (1ULL << 22) ? 22 : \
+ (n) & (1ULL << 21) ? 21 : \
+ (n) & (1ULL << 20) ? 20 : \
+ (n) & (1ULL << 19) ? 19 : \
+ (n) & (1ULL << 18) ? 18 : \
+ (n) & (1ULL << 17) ? 17 : \
+ (n) & (1ULL << 16) ? 16 : \
+ (n) & (1ULL << 15) ? 15 : \
+ (n) & (1ULL << 14) ? 14 : \
+ (n) & (1ULL << 13) ? 13 : \
+ (n) & (1ULL << 12) ? 12 : \
+ (n) & (1ULL << 11) ? 11 : \
+ (n) & (1ULL << 10) ? 10 : \
+ (n) & (1ULL << 9) ? 9 : \
+ (n) & (1ULL << 8) ? 8 : \
+ (n) & (1ULL << 7) ? 7 : \
+ (n) & (1ULL << 6) ? 6 : \
+ (n) & (1ULL << 5) ? 5 : \
+ (n) & (1ULL << 4) ? 4 : \
+ (n) & (1ULL << 3) ? 3 : \
+ (n) & (1ULL << 2) ? 2 : \
+ (n) & (1ULL << 1) ? 1 : \
+ (n) & (1ULL << 0) ? 0 : \
+ ____ilog2_NaN() \
+ ) : \
+ (sizeof(n) <= 4) ? \
+ __ilog2_u32(n) : \
+ __ilog2_u64(n) \
+ )
#endif /* _LINUX_LOG2_H_ */
diff --git a/sys/ofed/include/linux/mlx4/cmd.h b/sys/ofed/include/linux/mlx4/cmd.h
index 60d3036..d83ee3a 100644
--- a/sys/ofed/include/linux/mlx4/cmd.h
+++ b/sys/ofed/include/linux/mlx4/cmd.h
@@ -59,12 +59,16 @@ enum {
MLX4_CMD_HW_HEALTH_CHECK = 0x50,
MLX4_CMD_SET_PORT = 0xc,
MLX4_CMD_SET_NODE = 0x5a,
+ MLX4_CMD_QUERY_FUNC = 0x56,
MLX4_CMD_ACCESS_DDR = 0x2e,
MLX4_CMD_MAP_ICM = 0xffa,
MLX4_CMD_UNMAP_ICM = 0xff9,
MLX4_CMD_MAP_ICM_AUX = 0xffc,
MLX4_CMD_UNMAP_ICM_AUX = 0xffb,
MLX4_CMD_SET_ICM_SIZE = 0xffd,
+ /*master notify fw on finish for slave's flr*/
+ MLX4_CMD_INFORM_FLR_DONE = 0x5b,
+ MLX4_CMD_GET_OP_REQ = 0x59,
/* TPT commands */
MLX4_CMD_SW2HW_MPT = 0xd,
@@ -119,6 +123,26 @@ enum {
/* miscellaneous commands */
MLX4_CMD_DIAG_RPRT = 0x30,
MLX4_CMD_NOP = 0x31,
+ MLX4_CMD_ACCESS_MEM = 0x2e,
+ MLX4_CMD_SET_VEP = 0x52,
+
+ /* Ethernet specific commands */
+ MLX4_CMD_SET_VLAN_FLTR = 0x47,
+ MLX4_CMD_SET_MCAST_FLTR = 0x48,
+ MLX4_CMD_DUMP_ETH_STATS = 0x49,
+
+ /* Communication channel commands */
+ MLX4_CMD_ARM_COMM_CHANNEL = 0x57,
+ MLX4_CMD_GEN_EQE = 0x58,
+
+ /* virtual commands */
+ MLX4_CMD_ALLOC_RES = 0xf00,
+ MLX4_CMD_FREE_RES = 0xf01,
+ MLX4_CMD_MCAST_ATTACH = 0xf05,
+ MLX4_CMD_UCAST_ATTACH = 0xf06,
+ MLX4_CMD_PROMISC = 0xf08,
+ MLX4_CMD_QUERY_FUNC_CAP = 0xf0a,
+ MLX4_CMD_QP_ATTACH = 0xf0b,
/* debug commands */
MLX4_CMD_QUERY_DEBUG_MSG = 0x2a,
@@ -127,16 +151,26 @@ enum {
/* statistics commands */
MLX4_CMD_QUERY_IF_STAT = 0X54,
MLX4_CMD_SET_IF_STAT = 0X55,
+
+ /* set port opcode modifiers */
+ MLX4_SET_PORT_PRIO2TC = 0x8,
+ MLX4_SET_PORT_SCHEDULER = 0x9,
+
+ /* register/delete flow steering network rules */
+ MLX4_QP_FLOW_STEERING_ATTACH = 0x65,
+ MLX4_QP_FLOW_STEERING_DETACH = 0x66,
+ MLX4_FLOW_STEERING_IB_UC_QP_RANGE = 0x64,
};
enum {
- MLX4_CMD_TIME_CLASS_A = 10000,
- MLX4_CMD_TIME_CLASS_B = 10000,
- MLX4_CMD_TIME_CLASS_C = 10000,
+ MLX4_CMD_TIME_CLASS_A = 60000,
+ MLX4_CMD_TIME_CLASS_B = 60000,
+ MLX4_CMD_TIME_CLASS_C = 60000,
};
enum {
- MLX4_MAILBOX_SIZE = 4096
+ MLX4_MAILBOX_SIZE = 4096,
+ MLX4_ACCESS_MEM_ALIGN = 256,
};
enum {
@@ -149,6 +183,11 @@ enum {
MLX4_SET_PORT_GID_TABLE = 0x5,
};
+enum {
+ MLX4_CMD_WRAPPED,
+ MLX4_CMD_NATIVE
+};
+
struct mlx4_dev;
struct mlx4_cmd_mailbox {
@@ -158,23 +197,24 @@ struct mlx4_cmd_mailbox {
int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
int out_is_imm, u32 in_modifier, u8 op_modifier,
- u16 op, unsigned long timeout);
+ u16 op, unsigned long timeout, int native);
/* Invoke a command with no output parameter */
static inline int mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u32 in_modifier,
- u8 op_modifier, u16 op, unsigned long timeout)
+ u8 op_modifier, u16 op, unsigned long timeout,
+ int native)
{
return __mlx4_cmd(dev, in_param, NULL, 0, in_modifier,
- op_modifier, op, timeout);
+ op_modifier, op, timeout, native);
}
/* Invoke a command with an output mailbox */
static inline int mlx4_cmd_box(struct mlx4_dev *dev, u64 in_param, u64 out_param,
u32 in_modifier, u8 op_modifier, u16 op,
- unsigned long timeout)
+ unsigned long timeout, int native)
{
return __mlx4_cmd(dev, in_param, &out_param, 0, in_modifier,
- op_modifier, op, timeout);
+ op_modifier, op, timeout, native);
}
/*
@@ -184,13 +224,21 @@ static inline int mlx4_cmd_box(struct mlx4_dev *dev, u64 in_param, u64 out_param
*/
static inline int mlx4_cmd_imm(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
u32 in_modifier, u8 op_modifier, u16 op,
- unsigned long timeout)
+ unsigned long timeout, int native)
{
return __mlx4_cmd(dev, in_param, out_param, 1, in_modifier,
- op_modifier, op, timeout);
+ op_modifier, op, timeout, native);
}
struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev);
void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox);
+u32 mlx4_comm_get_version(void);
+int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u8 *mac);
+int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos);
+int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting);
+
+
+#define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8)
+
#endif /* MLX4_CMD_H */
diff --git a/sys/ofed/include/linux/mlx4/cq.h b/sys/ofed/include/linux/mlx4/cq.h
index 6f65b2c..0821669 100644
--- a/sys/ofed/include/linux/mlx4/cq.h
+++ b/sys/ofed/include/linux/mlx4/cq.h
@@ -64,6 +64,22 @@ struct mlx4_err_cqe {
u8 owner_sr_opcode;
};
+struct mlx4_ts_cqe {
+ __be32 vlan_my_qpn;
+ __be32 immed_rss_invalid;
+ __be32 g_mlpath_rqpn;
+ __be32 timestamp_hi;
+ __be16 status;
+ u8 ipv6_ext_mask;
+ u8 badfcs_enc;
+ __be32 byte_cnt;
+ __be16 wqe_index;
+ __be16 checksum;
+ u8 reserved;
+ __be16 timestamp_lo;
+ u8 owner_sr_opcode;
+} __packed;
+
enum {
MLX4_CQE_VLAN_PRESENT_MASK = 1 << 29,
MLX4_CQE_QPN_MASK = 0xffffff,
@@ -146,5 +162,5 @@ int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq,
u16 count, u16 period);
int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq,
int entries, struct mlx4_mtt *mtt);
-
+int mlx4_cq_ignore_overrun(struct mlx4_dev *dev, struct mlx4_cq *cq);
#endif /* MLX4_CQ_H */
diff --git a/sys/ofed/include/linux/mlx4/device.h b/sys/ofed/include/linux/mlx4/device.h
index 167c7e5..2828ef0 100644
--- a/sys/ofed/include/linux/mlx4/device.h
+++ b/sys/ofed/include/linux/mlx4/device.h
@@ -36,47 +36,176 @@
#include <linux/pci.h>
#include <linux/completion.h>
#include <linux/radix-tree.h>
+//#include <linux/cpu_rmap.h> /* XXX SK Probably not needed in freeBSD XXX */
#include <asm/atomic.h>
-#include <linux/mlx4/driver.h>
+#include <linux/clocksource.h> /* XXX SK ported to freeBSD */
+
+#define MAX_MSIX_P_PORT 17
+#define MAX_MSIX 64
+#define MSIX_LEGACY_SZ 4
+#define MIN_MSIX_P_PORT 5
+
+#define MLX4_ROCE_MAX_GIDS 128
+#define MLX4_ROCE_PF_GIDS 16
+
+#define MLX4_NUM_UP 8
+#define MLX4_NUM_TC 8
+#define MLX4_MAX_100M_UNITS_VAL 255 /*
+ * work around: can't set values
+ * greater then this value when
+ * using 100 Mbps units.
+ */
+#define MLX4_RATELIMIT_100M_UNITS 3 /* 100 Mbps */
+#define MLX4_RATELIMIT_1G_UNITS 4 /* 1 Gbps */
+#define MLX4_RATELIMIT_DEFAULT 0x00ff
+
+
+
+#define MLX4_LEAST_ATTACHED_VECTOR 0xffffffff
enum {
MLX4_FLAG_MSI_X = 1 << 0,
MLX4_FLAG_OLD_PORT_CMDS = 1 << 1,
+ MLX4_FLAG_MASTER = 1 << 2,
+ MLX4_FLAG_SLAVE = 1 << 3,
+ MLX4_FLAG_SRIOV = 1 << 4,
};
enum {
- MLX4_MAX_PORTS = 2
+ MLX4_PORT_CAP_IS_SM = 1 << 1,
+ MLX4_PORT_CAP_DEV_MGMT_SUP = 1 << 19,
};
enum {
+ MLX4_MAX_PORTS = 2,
+ MLX4_MAX_PORT_PKEYS = 128
+};
+
+/* base qkey for use in sriov tunnel-qp/proxy-qp communication.
+ * These qkeys must not be allowed for general use. This is a 64k range,
+ * and to test for violation, we use the mask (protect against future chg).
+ */
+#define MLX4_RESERVED_QKEY_BASE (0xFFFF0000)
+#define MLX4_RESERVED_QKEY_MASK (0xFFFF0000)
+
+enum {
MLX4_BOARD_ID_LEN = 64
};
enum {
- MLX4_DEV_CAP_FLAG_RC = 1 << 0,
- MLX4_DEV_CAP_FLAG_UC = 1 << 1,
- MLX4_DEV_CAP_FLAG_UD = 1 << 2,
- MLX4_DEV_CAP_FLAG_XRC = 1 << 3,
- MLX4_DEV_CAP_FLAG_SRQ = 1 << 6,
- MLX4_DEV_CAP_FLAG_IPOIB_CSUM = 1 << 7,
- MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 << 8,
- MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1 << 9,
- MLX4_DEV_CAP_FLAG_DPDP = 1 << 12,
- MLX4_DEV_CAP_FLAG_RAW_ETY = 1 << 13,
- MLX4_DEV_CAP_FLAG_BLH = 1 << 15,
- MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1 << 16,
- MLX4_DEV_CAP_FLAG_APM = 1 << 17,
- MLX4_DEV_CAP_FLAG_ATOMIC = 1 << 18,
- MLX4_DEV_CAP_FLAG_RAW_MCAST = 1 << 19,
- MLX4_DEV_CAP_FLAG_UD_AV_PORT = 1 << 20,
- MLX4_DEV_CAP_FLAG_UD_MCAST = 1 << 21,
- MLX4_DEV_CAP_FLAG_IBOE = 1 << 30,
- MLX4_DEV_CAP_FLAG_FC_T11 = 1 << 31
+ MLX4_MAX_NUM_PF = 16,
+ MLX4_MAX_NUM_VF = 64,
+ MLX4_MFUNC_MAX = 80,
+ MLX4_MAX_EQ_NUM = 1024,
+ MLX4_MFUNC_EQ_NUM = 4,
+ MLX4_MFUNC_MAX_EQES = 8,
+ MLX4_MFUNC_EQE_MASK = (MLX4_MFUNC_MAX_EQES - 1)
+};
+
+/* Driver supports 3 diffrent device methods to manage traffic steering:
+ * -device managed - High level API for ib and eth flow steering. FW is
+ * managing flow steering tables.
+ * - B0 steering mode - Common low level API for ib and (if supported) eth.
+ * - A0 steering mode - Limited low level API for eth. In case of IB,
+ * B0 mode is in use.
+ */
+enum {
+ MLX4_STEERING_MODE_A0,
+ MLX4_STEERING_MODE_B0,
+ MLX4_STEERING_MODE_DEVICE_MANAGED
+};
+
+static inline const char *mlx4_steering_mode_str(int steering_mode)
+{
+ switch (steering_mode) {
+ case MLX4_STEERING_MODE_A0:
+ return "A0 steering";
+
+ case MLX4_STEERING_MODE_B0:
+ return "B0 steering";
+
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ return "Device managed flow steering";
+
+ default:
+ return "Unrecognize steering mode";
+ }
+}
+
+enum {
+ MLX4_DEV_CAP_FLAG_RC = 1LL << 0,
+ MLX4_DEV_CAP_FLAG_UC = 1LL << 1,
+ MLX4_DEV_CAP_FLAG_UD = 1LL << 2,
+ MLX4_DEV_CAP_FLAG_XRC = 1LL << 3,
+ MLX4_DEV_CAP_FLAG_SRQ = 1LL << 6,
+ MLX4_DEV_CAP_FLAG_IPOIB_CSUM = 1LL << 7,
+ MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8,
+ MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9,
+ MLX4_DEV_CAP_FLAG_DPDP = 1LL << 12,
+ MLX4_DEV_CAP_FLAG_BLH = 1LL << 15,
+ MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1LL << 16,
+ MLX4_DEV_CAP_FLAG_APM = 1LL << 17,
+ MLX4_DEV_CAP_FLAG_ATOMIC = 1LL << 18,
+ MLX4_DEV_CAP_FLAG_RAW_MCAST = 1LL << 19,
+ MLX4_DEV_CAP_FLAG_UD_AV_PORT = 1LL << 20,
+ MLX4_DEV_CAP_FLAG_UD_MCAST = 1LL << 21,
+ MLX4_DEV_CAP_FLAG_IBOE = 1LL << 30,
+ MLX4_DEV_CAP_FLAG_UC_LOOPBACK = 1LL << 32,
+ MLX4_DEV_CAP_FLAG_FCS_KEEP = 1LL << 34,
+ MLX4_DEV_CAP_FLAG_WOL_PORT1 = 1LL << 37,
+ MLX4_DEV_CAP_FLAG_WOL_PORT2 = 1LL << 38,
+ MLX4_DEV_CAP_FLAG_UDP_RSS = 1LL << 40,
+ MLX4_DEV_CAP_FLAG_VEP_UC_STEER = 1LL << 41,
+ MLX4_DEV_CAP_FLAG_VEP_MC_STEER = 1LL << 42,
+ MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48,
+ MLX4_DEV_CAP_FLAG_COUNTERS_EXT = 1LL << 49,
+ MLX4_DEV_CAP_FLAG_SET_PORT_ETH_SCHED = 1LL << 53,
+ MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55,
+ MLX4_DEV_CAP_FLAG_FAST_DROP = 1LL << 57,
+ MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59,
+ MLX4_DEV_CAP_FLAG_ESWITCH_SUPPORT = 1LL << 60,
+ MLX4_DEV_CAP_FLAG_64B_EQE = 1LL << 61,
+ MLX4_DEV_CAP_FLAG_64B_CQE = 1LL << 62
+};
+
+enum {
+ MLX4_DEV_CAP_FLAG2_RSS = 1LL << 0,
+ MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1,
+ MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2,
+ MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3
};
enum {
+ MLX4_DEV_CAP_64B_EQE_ENABLED = 1LL << 0,
+ MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1
+};
+
+enum {
+ MLX4_USER_DEV_CAP_64B_CQE = 1L << 0
+};
+
+enum {
+ MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0
+};
+
+/* bit enums for an 8-bit flags field indicating special use
+ * QPs which require special handling in qp_reserve_range.
+ * Currently, this only includes QPs used by the ETH interface,
+ * where we expect to use blueflame. These QPs must not have
+ * bits 6 and 7 set in their qp number.
+ *
+ * This enum may use only bits 0..7.
+ */
+enum {
+ MLX4_RESERVE_BF_QP = 1 << 7,
+};
+
+
+#define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90)
+
+enum {
MLX4_BMME_FLAG_LOCAL_INV = 1 << 6,
MLX4_BMME_FLAG_REMOTE_INV = 1 << 7,
MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9,
@@ -102,7 +231,14 @@ enum mlx4_event {
MLX4_EVENT_TYPE_PORT_CHANGE = 0x09,
MLX4_EVENT_TYPE_EQ_OVERFLOW = 0x0f,
MLX4_EVENT_TYPE_ECC_DETECT = 0x0e,
- MLX4_EVENT_TYPE_CMD = 0x0a
+ MLX4_EVENT_TYPE_CMD = 0x0a,
+ MLX4_EVENT_TYPE_VEP_UPDATE = 0x19,
+ MLX4_EVENT_TYPE_COMM_CHANNEL = 0x18,
+ MLX4_EVENT_TYPE_OP_REQUIRED = 0x1a,
+ MLX4_EVENT_TYPE_FATAL_WARNING = 0x1b,
+ MLX4_EVENT_TYPE_FLR_EVENT = 0x1c,
+ MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT = 0x1d,
+ MLX4_EVENT_TYPE_NONE = 0xff,
};
enum {
@@ -111,6 +247,29 @@ enum {
};
enum {
+ MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0,
+};
+
+enum slave_port_state {
+ SLAVE_PORT_DOWN = 0,
+ SLAVE_PENDING_UP,
+ SLAVE_PORT_UP,
+};
+
+enum slave_port_gen_event {
+ SLAVE_PORT_GEN_EVENT_DOWN = 0,
+ SLAVE_PORT_GEN_EVENT_UP,
+ SLAVE_PORT_GEN_EVENT_NONE,
+};
+
+enum slave_port_state_event {
+ MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
+ MLX4_PORT_STATE_DEV_EVENT_PORT_UP,
+ MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
+ MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
+};
+
+enum {
MLX4_PERM_LOCAL_READ = 1 << 10,
MLX4_PERM_LOCAL_WRITE = 1 << 11,
MLX4_PERM_REMOTE_READ = 1 << 12,
@@ -126,7 +285,6 @@ enum {
MLX4_OPCODE_SEND = 0x0a,
MLX4_OPCODE_SEND_IMM = 0x0b,
MLX4_OPCODE_LSO = 0x0e,
- MLX4_OPCODE_BIG_LSO = 0x2e,
MLX4_OPCODE_RDMA_READ = 0x10,
MLX4_OPCODE_ATOMIC_CS = 0x11,
MLX4_OPCODE_ATOMIC_FA = 0x12,
@@ -150,14 +308,26 @@ enum {
MLX4_STAT_RATE_OFFSET = 5
};
+enum mlx4_protocol {
+ MLX4_PROT_IB_IPV6 = 0,
+ MLX4_PROT_ETH,
+ MLX4_PROT_IB_IPV4,
+ MLX4_PROT_FCOE
+};
+
enum {
MLX4_MTT_FLAG_PRESENT = 1
};
+enum {
+ MLX4_MAX_MTT_SHIFT = 31
+};
+
enum mlx4_qp_region {
MLX4_QP_REGION_FW = 0,
MLX4_QP_REGION_ETH_ADDR,
MLX4_QP_REGION_FC_ADDR,
+ MLX4_QP_REGION_FC_EXCH,
MLX4_NUM_QP_REGION
};
@@ -173,25 +343,56 @@ enum mlx4_special_vlan_idx {
MLX4_VLAN_MISS_IDX,
MLX4_VLAN_REGULAR
};
-#define MLX4_LEAST_ATTACHED_VECTOR 0xffffffff
+
+enum mlx4_steer_type {
+ MLX4_MC_STEER = 0,
+ MLX4_UC_STEER,
+ MLX4_NUM_STEERS
+};
+
+enum {
+ MLX4_NUM_FEXCH = 64 * 1024,
+};
enum {
- MLX4_CUNTERS_DISABLED,
- MLX4_CUNTERS_BASIC,
- MLX4_CUNTERS_EXT
+ MLX4_MAX_FAST_REG_PAGES = 511,
};
enum {
- MAX_FAST_REG_PAGES = 511,
+ MLX4_DEV_PMC_SUBTYPE_GUID_INFO = 0x14,
+ MLX4_DEV_PMC_SUBTYPE_PORT_INFO = 0x15,
+ MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE = 0x16,
};
+/* Port mgmt change event handling */
+enum {
+ MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK = 1 << 0,
+ MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK = 1 << 1,
+ MLX4_EQ_PORT_INFO_LID_CHANGE_MASK = 1 << 2,
+ MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK = 1 << 3,
+ MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4,
+};
+
+#define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
+ MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK)
+
static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor)
{
return (major << 32) | (minor << 16) | subminor;
}
+struct mlx4_phys_caps {
+ u32 gid_phys_table_len[MLX4_MAX_PORTS + 1];
+ u32 pkey_phys_table_len[MLX4_MAX_PORTS + 1];
+ u32 num_phys_eqs;
+ u32 base_sqpn;
+ u32 base_proxy_sqpn;
+ u32 base_tunnel_sqpn;
+};
+
struct mlx4_caps {
u64 fw_ver;
+ u32 function;
int num_ports;
int vl_cap[MLX4_MAX_PORTS + 1];
int ib_mtu_cap[MLX4_MAX_PORTS + 1];
@@ -206,6 +407,7 @@ struct mlx4_caps {
u64 trans_code[MLX4_MAX_PORTS + 1];
int local_ca_ack_delay;
int num_uars;
+ u32 uar_page_size;
int bf_reg_size;
int bf_regs_per_page;
int max_sq_sg;
@@ -216,7 +418,10 @@ struct mlx4_caps {
int max_rq_desc_sz;
int max_qp_init_rdma;
int max_qp_dest_rdma;
- int sqp_start;
+ u32 *qp0_proxy;
+ u32 *qp1_proxy;
+ u32 *qp0_tunnel;
+ u32 *qp1_tunnel;
int num_srqs;
int max_srq_wqes;
int max_srq_sge;
@@ -227,9 +432,10 @@ struct mlx4_caps {
int num_eqs;
int reserved_eqs;
int num_comp_vectors;
+ int comp_pool;
int num_mpts;
- int num_mtt_segs;
- int mtts_per_seg;
+ int max_fmr_maps;
+ int num_mtts;
int fmr_reserved_mtts;
int reserved_mtts;
int reserved_mrws;
@@ -238,36 +444,47 @@ struct mlx4_caps {
int num_amgms;
int reserved_mcgs;
int num_qp_per_mgm;
+ int steering_mode;
int num_pds;
int reserved_pds;
- int mtt_entry_sz;
- int reserved_xrcds;
int max_xrcds;
+ int reserved_xrcds;
+ int mtt_entry_sz;
u32 max_msg_sz;
u32 page_size_cap;
u64 flags;
+ u64 flags2;
u32 bmme_flags;
u32 reserved_lkey;
u16 stat_rate_support;
- int udp_rss;
- int loopback_support;
- int wol;
+ u8 cq_timestamp;
u8 port_width_cap[MLX4_MAX_PORTS + 1];
int max_gso_sz;
+ int max_rss_tbl_sz;
int reserved_qps_cnt[MLX4_NUM_QP_REGION];
int reserved_qps;
int reserved_qps_base[MLX4_NUM_QP_REGION];
int log_num_macs;
int log_num_vlans;
- int log_num_prios;
enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
u8 supported_type[MLX4_MAX_PORTS + 1];
- enum mlx4_port_type port_mask[MLX4_MAX_PORTS + 1];
+ u8 suggested_type[MLX4_MAX_PORTS + 1];
+ u8 default_sense[MLX4_MAX_PORTS + 1];
+ u32 port_mask[MLX4_MAX_PORTS + 1];
enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1];
- u8 counters_mode;
+ u32 max_counters;
+ u8 port_ib_mtu[MLX4_MAX_PORTS + 1];
+ u16 sqp_demux;
+ u32 sync_qp;
+ u32 eqe_size;
+ u32 cqe_size;
+ u8 eqe_factor;
+ u32 userspace_caps; /* userspace must be aware to */
+ u32 function_caps; /* functions must be aware to */
+ u8 fast_drop;
+ u16 hca_core_clock;
u32 max_basic_counters;
- u32 max_ext_counters;
- u32 mc_promisc_mode;
+ u32 max_extended_counters;
};
struct mlx4_buf_list {
@@ -284,7 +501,7 @@ struct mlx4_buf {
};
struct mlx4_mtt {
- u32 first_seg;
+ u32 offset;
int order;
int page_shift;
};
@@ -375,6 +592,8 @@ struct mlx4_cq {
atomic_t refcount;
struct completion free;
+ int eqn;
+ u16 irq;
};
struct mlx4_qp {
@@ -432,52 +651,168 @@ union mlx4_ext_av {
struct mlx4_eth_av eth;
};
-struct mlx4_counters {
- __be32 counter_mode;
- __be32 num_ifc;
- u32 reserved[2];
- __be64 rx_frames;
- __be64 rx_bytes;
- __be64 tx_frames;
- __be64 tx_bytes;
-};
-
-struct mlx4_counters_ext {
- __be32 counter_mode;
- __be32 num_ifc;
- u32 reserved[2];
- __be64 rx_uni_frames;
- __be64 rx_uni_bytes;
- __be64 rx_mcast_frames;
- __be64 rx_mcast_bytes;
- __be64 rx_bcast_frames;
- __be64 rx_bcast_bytes;
- __be64 rx_nobuf_frames;
- __be64 rx_nobuf_bytes;
- __be64 rx_err_frames;
- __be64 rx_err_bytes;
- __be64 tx_uni_frames;
- __be64 tx_uni_bytes;
- __be64 tx_mcast_frames;
- __be64 tx_mcast_bytes;
- __be64 tx_bcast_frames;
- __be64 tx_bcast_bytes;
- __be64 tx_nobuf_frames;
- __be64 tx_nobuf_bytes;
- __be64 tx_err_frames;
- __be64 tx_err_bytes;
+struct mlx4_if_stat_control {
+ u8 reserved1[3];
+ /* Extended counters enabled */
+ u8 cnt_mode;
+ /* Number of interfaces */
+ __be32 num_of_if;
+ __be32 reserved[2];
+};
+
+struct mlx4_if_stat_basic {
+ struct mlx4_if_stat_control control;
+ struct {
+ __be64 IfRxFrames;
+ __be64 IfRxOctets;
+ __be64 IfTxFrames;
+ __be64 IfTxOctets;
+ } counters[];
+};
+#define MLX4_IF_STAT_BSC_SZ(ports)(sizeof(struct mlx4_if_stat_extended) +\
+ sizeof(((struct mlx4_if_stat_extended *)0)->\
+ counters[0]) * ports)
+
+struct mlx4_if_stat_extended {
+ struct mlx4_if_stat_control control;
+ struct {
+ __be64 IfRxUnicastFrames;
+ __be64 IfRxUnicastOctets;
+ __be64 IfRxMulticastFrames;
+ __be64 IfRxMulticastOctets;
+ __be64 IfRxBroadcastFrames;
+ __be64 IfRxBroadcastOctets;
+ __be64 IfRxNoBufferFrames;
+ __be64 IfRxNoBufferOctets;
+ __be64 IfRxErrorFrames;
+ __be64 IfRxErrorOctets;
+ __be32 reserved[39];
+ __be64 IfTxUnicastFrames;
+ __be64 IfTxUnicastOctets;
+ __be64 IfTxMulticastFrames;
+ __be64 IfTxMulticastOctets;
+ __be64 IfTxBroadcastFrames;
+ __be64 IfTxBroadcastOctets;
+ __be64 IfTxDroppedFrames;
+ __be64 IfTxDroppedOctets;
+ __be64 IfTxRequestedFramesSent;
+ __be64 IfTxGeneratedFramesSent;
+ __be64 IfTxTsoOctets;
+ } __packed counters[];
+};
+#define MLX4_IF_STAT_EXT_SZ(ports) (sizeof(struct mlx4_if_stat_extended) +\
+ sizeof(((struct mlx4_if_stat_extended *)\
+ 0)->counters[0]) * ports)
+
+union mlx4_counter {
+ struct mlx4_if_stat_control control;
+ struct mlx4_if_stat_basic basic;
+ struct mlx4_if_stat_extended ext;
+};
+#define MLX4_IF_STAT_SZ(ports) MLX4_IF_STAT_EXT_SZ(ports)
+
+struct mlx4_quotas {
+ int qp;
+ int cq;
+ int srq;
+ int mpt;
+ int mtt;
+ int counter;
+ int xrcd;
};
struct mlx4_dev {
struct pci_dev *pdev;
unsigned long flags;
+ unsigned long num_slaves;
struct mlx4_caps caps;
+ struct mlx4_phys_caps phys_caps;
+ struct mlx4_quotas quotas;
struct radix_tree_root qp_table_tree;
- struct radix_tree_root srq_table_tree;
- u32 rev_id;
+ u8 rev_id;
char board_id[MLX4_BOARD_ID_LEN];
+ int num_vfs;
+ int numa_node;
+ int oper_log_mgm_entry_size;
+ u64 regid_promisc_array[MLX4_MAX_PORTS + 1];
+ u64 regid_allmulti_array[MLX4_MAX_PORTS + 1];
};
+struct mlx4_eqe {
+ u8 reserved1;
+ u8 type;
+ u8 reserved2;
+ u8 subtype;
+ union {
+ u32 raw[6];
+ struct {
+ __be32 cqn;
+ } __packed comp;
+ struct {
+ u16 reserved1;
+ __be16 token;
+ u32 reserved2;
+ u8 reserved3[3];
+ u8 status;
+ __be64 out_param;
+ } __packed cmd;
+ struct {
+ __be32 qpn;
+ } __packed qp;
+ struct {
+ __be32 srqn;
+ } __packed srq;
+ struct {
+ __be32 cqn;
+ u32 reserved1;
+ u8 reserved2[3];
+ u8 syndrome;
+ } __packed cq_err;
+ struct {
+ u32 reserved1[2];
+ __be32 port;
+ } __packed port_change;
+ struct {
+ #define COMM_CHANNEL_BIT_ARRAY_SIZE 4
+ u32 reserved;
+ u32 bit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE];
+ } __packed comm_channel_arm;
+ struct {
+ u8 port;
+ u8 reserved[3];
+ __be64 mac;
+ } __packed mac_update;
+ struct {
+ __be32 slave_id;
+ } __packed flr_event;
+ struct {
+ __be16 current_temperature;
+ __be16 warning_threshold;
+ } __packed warming;
+ struct {
+ u8 reserved[3];
+ u8 port;
+ union {
+ struct {
+ __be16 mstr_sm_lid;
+ __be16 port_lid;
+ __be32 changed_attr;
+ u8 reserved[3];
+ u8 mstr_sm_sl;
+ __be64 gid_prefix;
+ } __packed port_info;
+ struct {
+ __be32 block_ptr;
+ __be32 tbl_entries_mask;
+ } __packed tbl_change_info;
+ } params;
+ } __packed port_mgmt_change;
+ } event;
+ u8 slave_id;
+ u8 reserved3[2];
+ u8 owner;
+} __packed;
+
struct mlx4_init_port_param {
int set_guid0;
int set_node_guid;
@@ -492,29 +827,71 @@ struct mlx4_init_port_param {
u64 si_guid;
};
-static inline void mlx4_query_steer_cap(struct mlx4_dev *dev, int *log_mac,
- int *log_vlan, int *log_prio)
-{
- *log_mac = dev->caps.log_num_macs;
- *log_vlan = dev->caps.log_num_vlans;
- *log_prio = dev->caps.log_num_prios;
-}
-
#define mlx4_foreach_port(port, dev, type) \
for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
if ((type) == (dev)->caps.port_mask[(port)])
+#define mlx4_foreach_non_ib_transport_port(port, dev) \
+ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
+ if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB))
+
#define mlx4_foreach_ib_transport_port(port, dev) \
- for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
+ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \
((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
+#define MLX4_INVALID_SLAVE_ID 0xFF
+
+void handle_port_mgmt_change_event(struct work_struct *work);
+
+static inline int mlx4_master_func_num(struct mlx4_dev *dev)
+{
+ return dev->caps.function;
+}
+
+static inline int mlx4_is_master(struct mlx4_dev *dev)
+{
+ return dev->flags & MLX4_FLAG_MASTER;
+}
+
+static inline int mlx4_num_reserved_sqps(struct mlx4_dev *dev)
+{
+ return dev->phys_caps.base_sqpn + 8 +
+ 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev);
+}
+
+static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn)
+{
+ return (qpn < dev->phys_caps.base_sqpn + 8 +
+ 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev));
+}
+
+static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn)
+{
+ int guest_proxy_base = dev->phys_caps.base_proxy_sqpn + slave * 8;
+
+ if (qpn >= guest_proxy_base && qpn < guest_proxy_base + 8)
+ return 1;
+
+ return 0;
+}
+
+static inline int mlx4_is_mfunc(struct mlx4_dev *dev)
+{
+ return dev->flags & (MLX4_FLAG_SLAVE | MLX4_FLAG_MASTER);
+}
+
+static inline int mlx4_is_slave(struct mlx4_dev *dev)
+{
+ return dev->flags & MLX4_FLAG_SLAVE;
+}
+
int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
struct mlx4_buf *buf);
void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
{
- if (buf->direct.buf != NULL)
+ if (BITS_PER_LONG == 64 || buf->nbufs == 1)
return buf->direct.buf + offset;
else
return buf->page_list[offset >> PAGE_SHIFT].buf +
@@ -523,31 +900,21 @@ static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn);
void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn);
-
int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn);
void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar);
void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar);
-int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf);
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node);
void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf);
int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
struct mlx4_mtt *mtt);
void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt);
u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt);
-int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port);
-int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
-
-int mlx4_mr_reserve_range(struct mlx4_dev *dev, int cnt, int align, u32 *base_mridx);
-void mlx4_mr_release_range(struct mlx4_dev *dev, u32 base_mridx, int cnt);
-int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
- u64 iova, u64 size, u32 access, int npages,
- int page_shift, struct mlx4_mr *mr);
int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
int npages, int page_shift, struct mlx4_mr *mr);
-void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr);
void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr);
int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr);
int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
@@ -565,16 +932,17 @@ void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres,
int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
- unsigned vector, int collapsed);
+ unsigned vector, int collapsed, int timestamp_en);
void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
-int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base);
+int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
+ int *base, u8 bf_qp);
void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp);
void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp);
-int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
+int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn,
struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq);
void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq);
int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark);
@@ -583,41 +951,185 @@ int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_waterm
int mlx4_INIT_PORT(struct mlx4_dev *dev, int port);
int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
+int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ int block_mcast_loopback, enum mlx4_protocol prot);
+int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ enum mlx4_protocol prot);
int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
- int block_mcast_loopback, enum mlx4_mcast_prot prot);
+ u8 port, int block_mcast_loopback,
+ enum mlx4_protocol protocol, u64 *reg_id);
int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
- enum mlx4_mcast_prot prot);
+ enum mlx4_protocol protocol, u64 reg_id);
+
+enum {
+ MLX4_DOMAIN_UVERBS = 0x1000,
+ MLX4_DOMAIN_ETHTOOL = 0x2000,
+ MLX4_DOMAIN_RFS = 0x3000,
+ MLX4_DOMAIN_NIC = 0x5000,
+};
-int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index);
-void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index);
+enum mlx4_net_trans_rule_id {
+ MLX4_NET_TRANS_RULE_ID_ETH = 0,
+ MLX4_NET_TRANS_RULE_ID_IB,
+ MLX4_NET_TRANS_RULE_ID_IPV6,
+ MLX4_NET_TRANS_RULE_ID_IPV4,
+ MLX4_NET_TRANS_RULE_ID_TCP,
+ MLX4_NET_TRANS_RULE_ID_UDP,
+ MLX4_NET_TRANS_RULE_NUM, /* should be last */
+};
+
+extern const u16 __sw_id_hw[];
+static inline int map_hw_to_sw_id(u16 header_id)
+{
+
+ int i;
+ for (i = 0; i < MLX4_NET_TRANS_RULE_NUM; i++) {
+ if (header_id == __sw_id_hw[i])
+ return i;
+ }
+ return -EINVAL;
+}
+enum mlx4_net_trans_promisc_mode {
+ MLX4_FS_REGULAR = 0,
+ MLX4_FS_ALL_DEFAULT = 1,
+ MLX4_FS_MC_DEFAULT = 3,
+ MLX4_FS_UC_SNIFFER = 4,
+ MLX4_FS_MC_SNIFFER = 5,
+};
+
+struct mlx4_spec_eth {
+ u8 dst_mac[6];
+ u8 dst_mac_msk[6];
+ u8 src_mac[6];
+ u8 src_mac_msk[6];
+ u8 ether_type_enable;
+ __be16 ether_type;
+ __be16 vlan_id_msk;
+ __be16 vlan_id;
+};
+
+struct mlx4_spec_tcp_udp {
+ __be16 dst_port;
+ __be16 dst_port_msk;
+ __be16 src_port;
+ __be16 src_port_msk;
+};
+
+struct mlx4_spec_ipv4 {
+ __be32 dst_ip;
+ __be32 dst_ip_msk;
+ __be32 src_ip;
+ __be32 src_ip_msk;
+};
+
+struct mlx4_spec_ib {
+ __be32 r_u_qpn;
+ __be32 qpn_msk;
+ u8 dst_gid[16];
+ u8 dst_gid_msk[16];
+};
+
+struct mlx4_spec_list {
+ struct list_head list;
+ enum mlx4_net_trans_rule_id id;
+ union {
+ struct mlx4_spec_eth eth;
+ struct mlx4_spec_ib ib;
+ struct mlx4_spec_ipv4 ipv4;
+ struct mlx4_spec_tcp_udp tcp_udp;
+ };
+};
+
+enum mlx4_net_trans_hw_rule_queue {
+ MLX4_NET_TRANS_Q_FIFO,
+ MLX4_NET_TRANS_Q_LIFO,
+};
+
+struct mlx4_net_trans_rule {
+ struct list_head list;
+ enum mlx4_net_trans_hw_rule_queue queue_mode;
+ bool exclusive;
+ bool allow_loopback;
+ enum mlx4_net_trans_promisc_mode promisc_mode;
+ u8 port;
+ u16 priority;
+ u32 qpn;
+};
+
+int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn,
+ enum mlx4_net_trans_promisc_mode mode);
+int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port,
+ enum mlx4_net_trans_promisc_mode mode);
+int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port);
+int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port);
+int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port);
+int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port);
+int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
+
+int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac);
+void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
+int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port);
+int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
+void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap);
+int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
+ u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
+int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
+ u8 promisc);
+int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc);
+int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
+ u8 *pg, u16 *ratelimit);
int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
-void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index);
+void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
-int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
- u64 *page_list, int npages, u64 iova, u32 fbo,
- u32 len, u32 *lkey, u32 *rkey, int same_key);
int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
int npages, u64 iova, u32 *lkey, u32 *rkey);
-int mlx4_fmr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
- u32 access, int max_pages, int max_maps,
- u8 page_shift, struct mlx4_fmr *fmr);
int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
int max_maps, u8 page_shift, struct mlx4_fmr *fmr);
int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
u32 *lkey, u32 *rkey);
-int mlx4_fmr_free_reserved(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
int mlx4_SYNC_TPT(struct mlx4_dev *dev);
int mlx4_query_diag_counters(struct mlx4_dev *mlx4_dev, int array_length,
- u8 op_modifier, u32 in_offset[], u32 counter_out[]);
+ u8 op_modifier, u32 in_offset[],
+ u32 counter_out[]);
+
int mlx4_test_interrupts(struct mlx4_dev *dev);
+int mlx4_assign_eq(struct mlx4_dev *dev, char *name, int *vector);
+void mlx4_release_eq(struct mlx4_dev *dev, int vec);
-void mlx4_get_fc_t11_settings(struct mlx4_dev *dev, int *enable_pre_t11, int *t11_supported);
+int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port);
+int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
void mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
+int mlx4_flow_attach(struct mlx4_dev *dev,
+ struct mlx4_net_trans_rule *rule, u64 *reg_id);
+int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id);
+
+void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port,
+ int i, int val);
+
+int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey);
+
+int mlx4_is_slave_active(struct mlx4_dev *dev, int slave);
+int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port);
+int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port);
+int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr);
+int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 port_subtype_change);
+enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port);
+int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int event, enum slave_port_gen_event *gen_event);
+
+void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid);
+__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave);
+int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id);
+int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, u8 *gid);
+
+int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, u32 max_range_qpn);
+
+cycle_t mlx4_read_clock(struct mlx4_dev *dev);
+
#endif /* MLX4_DEVICE_H */
diff --git a/sys/ofed/include/linux/mlx4/driver.h b/sys/ofed/include/linux/mlx4/driver.h
index 15c8319..8235a97 100644
--- a/sys/ofed/include/linux/mlx4/driver.h
+++ b/sys/ofed/include/linux/mlx4/driver.h
@@ -33,15 +33,22 @@
#ifndef MLX4_DRIVER_H
#define MLX4_DRIVER_H
-#include <linux/device.h>
+#include <linux/mlx4/device.h>
struct mlx4_dev;
+#define MLX4_MAC_MASK 0xffffffffffffULL
+#define MLX4_BE_SHORT_MASK cpu_to_be16(0xffff)
+#define MLX4_BE_WORD_MASK cpu_to_be32(0xffffffff)
+
enum mlx4_dev_event {
MLX4_DEV_EVENT_CATASTROPHIC_ERROR,
MLX4_DEV_EVENT_PORT_UP,
MLX4_DEV_EVENT_PORT_DOWN,
MLX4_DEV_EVENT_PORT_REINIT,
+ MLX4_DEV_EVENT_PORT_MGMT_CHANGE,
+ MLX4_DEV_EVENT_SLAVE_INIT,
+ MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
};
enum mlx4_query_reply {
@@ -49,11 +56,6 @@ enum mlx4_query_reply {
MLX4_QUERY_MINE_NOPORT = 0
};
-enum mlx4_prot {
- MLX4_PROT_IB,
- MLX4_PROT_EN,
-};
-
enum mlx4_mcast_prot {
MLX4_MCAST_PROT_IB = 0,
MLX4_MCAST_PROT_EN = 1,
@@ -63,20 +65,32 @@ struct mlx4_interface {
void * (*add) (struct mlx4_dev *dev);
void (*remove)(struct mlx4_dev *dev, void *context);
void (*event) (struct mlx4_dev *dev, void *context,
- enum mlx4_dev_event event, int port);
- void * (*get_prot_dev) (struct mlx4_dev *dev, void *context, u8 port);
- enum mlx4_prot protocol;
+ enum mlx4_dev_event event, unsigned long param);
+ void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port);
enum mlx4_query_reply (*query) (void *context, void *);
struct list_head list;
+ enum mlx4_protocol protocol;
};
int mlx4_register_interface(struct mlx4_interface *intf);
void mlx4_unregister_interface(struct mlx4_interface *intf);
-void *mlx4_get_prot_dev(struct mlx4_dev *dev, enum mlx4_prot proto, int port);
-struct mlx4_dev *mlx4_query_interface(void *, int *port);
-void mlx4_set_iboe_counter(struct mlx4_dev *dev, int index, u8 port);
-int mlx4_get_iboe_counter(struct mlx4_dev *dev, u8 port);
+void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port);
+
+#ifndef ETH_ALEN
+#define ETH_ALEN 6
+#endif
+static inline u64 mlx4_mac_to_u64(u8 *addr)
+{
+ u64 mac = 0;
+ int i;
+
+ for (i = 0; i < ETH_ALEN; i++) {
+ mac <<= 8;
+ mac |= addr[i];
+ }
+ return mac;
+}
#endif /* MLX4_DRIVER_H */
diff --git a/sys/ofed/include/linux/mlx4/qp.h b/sys/ofed/include/linux/mlx4/qp.h
index 3fe2bc5..2d45a9d 100644
--- a/sys/ofed/include/linux/mlx4/qp.h
+++ b/sys/ofed/include/linux/mlx4/qp.h
@@ -39,6 +39,15 @@
#define MLX4_INVALID_LKEY 0x100
+enum ib_m_qp_attr_mask {
+ IB_M_EXT_CLASS_1 = 1 << 28,
+ IB_M_EXT_CLASS_2 = 1 << 29,
+ IB_M_EXT_CLASS_3 = 1 << 30,
+
+ IB_M_QP_MOD_VEND_MASK = (IB_M_EXT_CLASS_1 | IB_M_EXT_CLASS_2 |
+ IB_M_EXT_CLASS_3)
+};
+
enum mlx4_qp_optpar {
MLX4_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
MLX4_QP_OPTPAR_RRE = 1 << 1,
@@ -95,11 +104,42 @@ enum {
MLX4_QP_BIT_RWE = 1 << 14,
MLX4_QP_BIT_RAE = 1 << 13,
MLX4_QP_BIT_RIC = 1 << 4,
+ MLX4_QP_BIT_COLL_SYNC_RQ = 1 << 2,
+ MLX4_QP_BIT_COLL_SYNC_SQ = 1 << 1,
+ MLX4_QP_BIT_COLL_MASTER = 1 << 0
+};
+
+enum {
+ MLX4_RSS_HASH_XOR = 0,
+ MLX4_RSS_HASH_TOP = 1,
+
+ MLX4_RSS_UDP_IPV6 = 1 << 0,
+ MLX4_RSS_UDP_IPV4 = 1 << 1,
+ MLX4_RSS_TCP_IPV6 = 1 << 2,
+ MLX4_RSS_IPV6 = 1 << 3,
+ MLX4_RSS_TCP_IPV4 = 1 << 4,
+ MLX4_RSS_IPV4 = 1 << 5,
+
+ /* offset of mlx4_rss_context within mlx4_qp_context.pri_path */
+ MLX4_RSS_OFFSET_IN_QPC_PRI_PATH = 0x24,
+ /* offset of being RSS indirection QP within mlx4_qp_context.flags */
+ MLX4_RSS_QPC_FLAG_OFFSET = 13,
+};
+
+struct mlx4_rss_context {
+ __be32 base_qpn;
+ __be32 default_qpn;
+ u16 reserved;
+ u8 hash_fn;
+ u8 flags;
+ __be32 rss_key[10];
+ __be32 base_qpn_udp;
};
struct mlx4_qp_path {
u8 fl;
- u8 reserved1[2];
+ u8 reserved1[1];
+ u8 disable_pkey_check;
u8 pkey_index;
u8 counter_index;
u8 grh_mylmc;
@@ -112,7 +152,8 @@ struct mlx4_qp_path {
u8 rgid[16];
u8 sched_queue;
u8 vlan_index;
- u8 reserved3[2];
+ u8 feup;
+ u8 reserved3;
u8 reserved4[2];
u8 dmac[6];
};
@@ -153,16 +194,7 @@ struct mlx4_qp_context {
u8 reserved4[2];
u8 mtt_base_addr_h;
__be32 mtt_base_addr_l;
- u8 VE;
- u8 reserved5;
- __be16 VFT_id_prio;
- u8 reserved6;
- u8 exch_size;
- __be16 exch_base;
- u8 VFT_hop_cnt;
- u8 my_fc_id_idx;
- __be16 reserved7;
- u32 reserved8[7];
+ u32 reserved5[10];
};
/* Which firmware version adds support for NEC (NoErrorCompletion) bit */
@@ -192,8 +224,12 @@ struct mlx4_wqe_ctrl_seg {
* [4] IP checksum
* [3:2] C (generate completion queue entry)
* [1] SE (solicited event)
+ * [0] FL (force loopback)
*/
- __be32 srcrb_flags;
+ union {
+ __be32 srcrb_flags;
+ __be16 srcrb_flags16[2];
+ };
/*
* imm is immediate data for send/RDMA write w/ immediate;
* also invalidation key for send with invalidate; input
@@ -204,15 +240,15 @@ struct mlx4_wqe_ctrl_seg {
enum {
MLX4_WQE_MLX_VL15 = 1 << 17,
- MLX4_WQE_MLX_SLR = 1 << 16,
- MLX4_WQE_MLX_ICRC = 1 << 4
+ MLX4_WQE_MLX_SLR = 1 << 16
};
struct mlx4_wqe_mlx_seg {
u8 owner;
u8 reserved1[2];
u8 opcode;
- u8 reserved2[3];
+ __be16 sched_prio;
+ u8 reserved2;
u8 size;
/*
* [17] VL15
@@ -338,9 +374,6 @@ static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1));
}
-struct mlx4_qp *mlx4_qp_lookup_lock(struct mlx4_dev *dev, u32 qpn);
void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp);
-int mlx4_qp_get_region(struct mlx4_dev *dev, enum mlx4_qp_region region,
- int *base_qpn, int *cnt);
#endif /* MLX4_QP_H */
diff --git a/sys/ofed/include/linux/mlx4/srq.h b/sys/ofed/include/linux/mlx4/srq.h
index 5e041e5..799a069 100644
--- a/sys/ofed/include/linux/mlx4/srq.h
+++ b/sys/ofed/include/linux/mlx4/srq.h
@@ -33,22 +33,10 @@
#ifndef MLX4_SRQ_H
#define MLX4_SRQ_H
-#include <linux/types.h>
-#include <linux/mlx4/device.h>
-
struct mlx4_wqe_srq_next_seg {
u16 reserved1;
__be16 next_wqe_index;
u32 reserved2[3];
};
-void mlx4_srq_invalidate(struct mlx4_dev *dev, struct mlx4_srq *srq);
-void mlx4_srq_remove(struct mlx4_dev *dev, struct mlx4_srq *srq);
-
-static inline struct mlx4_srq *__mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn)
-{
- return radix_tree_lookup(&dev->srq_table_tree,
- srqn & (dev->caps.num_srqs - 1));
-}
-
#endif /* MLX4_SRQ_H */
diff --git a/sys/ofed/include/linux/moduleparam.h b/sys/ofed/include/linux/moduleparam.h
index 2c541a6..e8534c7 100644
--- a/sys/ofed/include/linux/moduleparam.h
+++ b/sys/ofed/include/linux/moduleparam.h
@@ -87,6 +87,9 @@ param_sysinit(struct kernel_param *param)
#define module_param(var, type, mode) \
module_param_named(var, var, type, mode)
+#define module_param_array(var, type, addr_argc, mode) \
+ module_param_named(var, var, type, mode)
+
#define MODULE_PARM_DESC(name, desc)
static inline int
diff --git a/sys/ofed/include/linux/pci.h b/sys/ofed/include/linux/pci.h
index 5d91e2d..5c9cfde 100644
--- a/sys/ofed/include/linux/pci.h
+++ b/sys/ofed/include/linux/pci.h
@@ -72,6 +72,9 @@ struct pci_device_id {
#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
#define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274
+#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
+#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
+#define PCI_FUNC(devfn) ((devfn) & 0x07)
#define PCI_VDEVICE(_vendor, _device) \
.vendor = PCI_VENDOR_ID_##_vendor, .device = (_device), \
@@ -93,14 +96,18 @@ struct pci_device_id {
struct pci_dev;
+
struct pci_driver {
struct list_head links;
char *name;
struct pci_device_id *id_table;
int (*probe)(struct pci_dev *dev, const struct pci_device_id *id);
void (*remove)(struct pci_dev *dev);
+ int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */
+ int (*resume) (struct pci_dev *dev); /* Device woken up */
driver_t driver;
devclass_t bsdclass;
+ struct pci_error_handlers *err_handler;
};
extern struct list_head pci_drivers;
@@ -117,6 +124,9 @@ struct pci_dev {
uint16_t device;
uint16_t vendor;
unsigned int irq;
+ unsigned int devfn;
+ u8 revision;
+ struct pci_devinfo *bus; /* bus this device is on, equivalent to linux struct pci_bus */
};
static inline struct resource_list_entry *
@@ -296,6 +306,7 @@ pci_disable_msix(struct pci_dev *pdev)
#define PCI_CAP_ID_EXP PCIY_EXPRESS
#define PCI_CAP_ID_PCIX PCIY_PCIX
+
static inline int
pci_find_capability(struct pci_dev *pdev, int capid)
{
@@ -306,6 +317,26 @@ pci_find_capability(struct pci_dev *pdev, int capid)
return (reg);
}
+
+
+
+/**
+ * pci_pcie_cap - get the saved PCIe capability offset
+ * @dev: PCI device
+ *
+ * PCIe capability offset is calculated at PCI device initialization
+ * time and saved in the data structure. This function returns saved
+ * PCIe capability offset. Using this instead of pci_find_capability()
+ * reduces unnecessary search in the PCI configuration space. If you
+ * need to calculate PCIe capability offset from raw device for some
+ * reasons, please use pci_find_capability() instead.
+ */
+static inline int pci_pcie_cap(struct pci_dev *dev)
+{
+ return pci_find_capability(dev, PCI_CAP_ID_EXP);
+}
+
+
static inline int
pci_read_config_byte(struct pci_dev *pdev, int where, u8 *val)
{
@@ -529,6 +560,30 @@ pci_enable_msix(struct pci_dev *pdev, struct msix_entry *entries, int nreq)
return (0);
}
+static inline int pci_channel_offline(struct pci_dev *pdev)
+{
+ return false;
+}
+
+static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
+{
+ return -ENODEV;
+}
+static inline void pci_disable_sriov(struct pci_dev *dev)
+{
+}
+
+/**
+ * DEFINE_PCI_DEVICE_TABLE - macro used to describe a pci device table
+ * @_table: device table name
+ *
+ * This macro is used to create a struct pci_device_id array (a device table)
+ * in a generic manner.
+ */
+#define DEFINE_PCI_DEVICE_TABLE(_table) \
+ const struct pci_device_id _table[] __devinitdata
+
+
/* XXX This should not be necessary. */
#define pcix_set_mmrbc(d, v) 0
#define pcix_get_max_mmrbc(d) 0
@@ -578,5 +633,57 @@ pci_enable_msix(struct pci_dev *pdev, struct msix_entry *entries, int nreq)
#define pci_unmap_len dma_unmap_len
#define pci_unmap_len_set dma_unmap_len_set
+typedef unsigned int __bitwise pci_channel_state_t;
+typedef unsigned int __bitwise pci_ers_result_t;
+
+enum pci_channel_state {
+ /* I/O channel is in normal state */
+ pci_channel_io_normal = (__force pci_channel_state_t) 1,
+
+ /* I/O to channel is blocked */
+ pci_channel_io_frozen = (__force pci_channel_state_t) 2,
+
+ /* PCI card is dead */
+ pci_channel_io_perm_failure = (__force pci_channel_state_t) 3,
+};
+
+enum pci_ers_result {
+ /* no result/none/not supported in device driver */
+ PCI_ERS_RESULT_NONE = (__force pci_ers_result_t) 1,
+
+ /* Device driver can recover without slot reset */
+ PCI_ERS_RESULT_CAN_RECOVER = (__force pci_ers_result_t) 2,
+
+ /* Device driver wants slot to be reset. */
+ PCI_ERS_RESULT_NEED_RESET = (__force pci_ers_result_t) 3,
+
+ /* Device has completely failed, is unrecoverable */
+ PCI_ERS_RESULT_DISCONNECT = (__force pci_ers_result_t) 4,
+
+ /* Device driver is fully recovered and operational */
+ PCI_ERS_RESULT_RECOVERED = (__force pci_ers_result_t) 5,
+};
+
+
+/* PCI bus error event callbacks */
+struct pci_error_handlers {
+ /* PCI bus error detected on this device */
+ pci_ers_result_t (*error_detected)(struct pci_dev *dev,
+ enum pci_channel_state error);
+
+ /* MMIO has been re-enabled, but not DMA */
+ pci_ers_result_t (*mmio_enabled)(struct pci_dev *dev);
+
+ /* PCI Express link has been reset */
+ pci_ers_result_t (*link_reset)(struct pci_dev *dev);
+
+ /* PCI slot has been reset */
+ pci_ers_result_t (*slot_reset)(struct pci_dev *dev);
+
+ /* Device driver may resume normal operations */
+ void (*resume)(struct pci_dev *dev);
+};
+
+
#endif /* _LINUX_PCI_H_ */
diff --git a/sys/ofed/include/linux/sysfs.h b/sys/ofed/include/linux/sysfs.h
index c60a2b9..cb1f7b2 100644
--- a/sys/ofed/include/linux/sysfs.h
+++ b/sys/ofed/include/linux/sysfs.h
@@ -105,6 +105,10 @@ sysctl_handle_attr(SYSCTL_HANDLER_ARGS)
/* Trim trailing newline. */
buf[len] = '\0';
}
+
+ /* Trim trailing newline. */
+ len--;
+ ((char*)buf)[len] = '\0';
}
/* Leave one trailing byte to append a newline. */
@@ -185,4 +189,6 @@ sysfs_remove_dir(struct kobject *kobj)
sysctl_remove_oid(kobj->oidp, 1, 1);
}
+#define sysfs_attr_init(attr) do {} while(0)
+
#endif /* _LINUX_SYSFS_H_ */
diff --git a/sys/ofed/include/linux/types.h b/sys/ofed/include/linux/types.h
index 331c8b6..65568ca 100644
--- a/sys/ofed/include/linux/types.h
+++ b/sys/ofed/include/linux/types.h
@@ -45,6 +45,8 @@ typedef _Bool bool;
#define false FALSE
#endif
+typedef u64 phys_addr_t;
+
typedef unsigned long kernel_ulong_t;
typedef unsigned int uint;
typedef unsigned gfp_t;
diff --git a/sys/ofed/include/linux/workqueue.h b/sys/ofed/include/linux/workqueue.h
index ce5759b..b895bd3 100644
--- a/sys/ofed/include/linux/workqueue.h
+++ b/sys/ofed/include/linux/workqueue.h
@@ -80,7 +80,7 @@ do { \
callout_init(&(_work)->timer, CALLOUT_MPSAFE); \
} while (0)
-#define INIT_DELAYED_WORK_DEFERRABLE INIT_DELAYED_WORK
+#define INIT_DEFERRABLE_WORK INIT_DELAYED_WORK
#define schedule_work(work) \
do { \
@@ -121,6 +121,14 @@ queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *work,
return (!pending);
}
+static inline bool schedule_delayed_work(struct delayed_work *dwork,
+ unsigned long delay)
+{
+ struct workqueue_struct wq;
+ wq.taskqueue = taskqueue_thread;
+ return queue_delayed_work(&wq, dwork, delay);
+}
+
static inline struct workqueue_struct *
_create_workqueue_common(char *name, int cpus)
{
@@ -190,4 +198,15 @@ cancel_delayed_work(struct delayed_work *work)
return 0;
}
+static inline int
+cancel_delayed_work_sync(struct delayed_work *work)
+{
+
+ callout_drain(&work->timer);
+ if (work->work.taskqueue &&
+ taskqueue_cancel(work->work.taskqueue, &work->work.work_task, NULL))
+ taskqueue_drain(work->work.taskqueue, &work->work.work_task);
+ return 0;
+}
+
#endif /* _LINUX_WORKQUEUE_H_ */
diff --git a/sys/ofed/include/rdma/ib_cm.h b/sys/ofed/include/rdma/ib_cm.h
index 9388583..40c24b6 100644
--- a/sys/ofed/include/rdma/ib_cm.h
+++ b/sys/ofed/include/rdma/ib_cm.h
@@ -38,6 +38,9 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_sa.h>
+/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */
+extern struct class cm_class;
+
enum ib_cm_state {
IB_CM_IDLE,
IB_CM_LISTEN,
@@ -259,6 +262,18 @@ struct ib_cm_event {
void *private_data;
};
+#define CM_REQ_ATTR_ID cpu_to_be16(0x0010)
+#define CM_MRA_ATTR_ID cpu_to_be16(0x0011)
+#define CM_REJ_ATTR_ID cpu_to_be16(0x0012)
+#define CM_REP_ATTR_ID cpu_to_be16(0x0013)
+#define CM_RTU_ATTR_ID cpu_to_be16(0x0014)
+#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015)
+#define CM_DREP_ATTR_ID cpu_to_be16(0x0016)
+#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017)
+#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018)
+#define CM_LAP_ATTR_ID cpu_to_be16(0x0019)
+#define CM_APR_ATTR_ID cpu_to_be16(0x001A)
+
/**
* ib_cm_handler - User-defined callback to process communication events.
* @cm_id: Communication identifier associated with the reported event.
diff --git a/sys/ofed/include/rdma/ib_mad.h b/sys/ofed/include/rdma/ib_mad.h
index d3b9401..32f8114 100644
--- a/sys/ofed/include/rdma/ib_mad.h
+++ b/sys/ofed/include/rdma/ib_mad.h
@@ -151,7 +151,7 @@ struct ib_rmpp_hdr {
typedef u64 __bitwise ib_sa_comp_mask;
-#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << n))
+#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << (n)))
/*
* ib_sa_hdr and ib_sa_mad structures must be packed because they have
diff --git a/sys/ofed/include/rdma/ib_pma.h b/sys/ofed/include/rdma/ib_pma.h
new file mode 100644
index 0000000..a5889f1
--- /dev/null
+++ b/sys/ofed/include/rdma/ib_pma.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+ * All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(IB_PMA_H)
+#define IB_PMA_H
+
+#include <rdma/ib_mad.h>
+
+/*
+ * PMA class portinfo capability mask bits
+ */
+#define IB_PMA_CLASS_CAP_ALLPORTSELECT cpu_to_be16(1 << 8)
+#define IB_PMA_CLASS_CAP_EXT_WIDTH cpu_to_be16(1 << 9)
+#define IB_PMA_CLASS_CAP_XMIT_WAIT cpu_to_be16(1 << 12)
+
+#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001)
+#define IB_PMA_PORT_SAMPLES_CONTROL cpu_to_be16(0x0010)
+#define IB_PMA_PORT_SAMPLES_RESULT cpu_to_be16(0x0011)
+#define IB_PMA_PORT_COUNTERS cpu_to_be16(0x0012)
+#define IB_PMA_PORT_COUNTERS_EXT cpu_to_be16(0x001D)
+#define IB_PMA_PORT_SAMPLES_RESULT_EXT cpu_to_be16(0x001E)
+
+struct ib_pma_mad {
+ struct ib_mad_hdr mad_hdr;
+ u8 reserved[40];
+ u8 data[192];
+} __packed;
+
+struct ib_pma_portsamplescontrol {
+ u8 opcode;
+ u8 port_select;
+ u8 tick;
+ u8 counter_width; /* resv: 7:3, counter width: 2:0 */
+ __be32 counter_mask0_9; /* 2, 10 3-bit fields */
+ __be16 counter_mask10_14; /* 1, 5 3-bit fields */
+ u8 sample_mechanisms;
+ u8 sample_status; /* only lower 2 bits */
+ __be64 option_mask;
+ __be64 vendor_mask;
+ __be32 sample_start;
+ __be32 sample_interval;
+ __be16 tag;
+ __be16 counter_select[15];
+ __be32 reserved1;
+ __be64 samples_only_option_mask;
+ __be32 reserved2[28];
+};
+
+struct ib_pma_portsamplesresult {
+ __be16 tag;
+ __be16 sample_status; /* only lower 2 bits */
+ __be32 counter[15];
+};
+
+struct ib_pma_portsamplesresult_ext {
+ __be16 tag;
+ __be16 sample_status; /* only lower 2 bits */
+ __be32 extended_width; /* only upper 2 bits */
+ __be64 counter[15];
+};
+
+struct ib_pma_portcounters {
+ u8 reserved;
+ u8 port_select;
+ __be16 counter_select;
+ __be16 symbol_error_counter;
+ u8 link_error_recovery_counter;
+ u8 link_downed_counter;
+ __be16 port_rcv_errors;
+ __be16 port_rcv_remphys_errors;
+ __be16 port_rcv_switch_relay_errors;
+ __be16 port_xmit_discards;
+ u8 port_xmit_constraint_errors;
+ u8 port_rcv_constraint_errors;
+ u8 reserved1;
+ u8 link_overrun_errors; /* LocalLink: 7:4, BufferOverrun: 3:0 */
+ __be16 reserved2;
+ __be16 vl15_dropped;
+ __be32 port_xmit_data;
+ __be32 port_rcv_data;
+ __be32 port_xmit_packets;
+ __be32 port_rcv_packets;
+ __be32 port_xmit_wait;
+} __packed;
+
+
+#define IB_PMA_SEL_SYMBOL_ERROR cpu_to_be16(0x0001)
+#define IB_PMA_SEL_LINK_ERROR_RECOVERY cpu_to_be16(0x0002)
+#define IB_PMA_SEL_LINK_DOWNED cpu_to_be16(0x0004)
+#define IB_PMA_SEL_PORT_RCV_ERRORS cpu_to_be16(0x0008)
+#define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS cpu_to_be16(0x0010)
+#define IB_PMA_SEL_PORT_XMIT_DISCARDS cpu_to_be16(0x0040)
+#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS cpu_to_be16(0x0200)
+#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS cpu_to_be16(0x0400)
+#define IB_PMA_SEL_PORT_VL15_DROPPED cpu_to_be16(0x0800)
+#define IB_PMA_SEL_PORT_XMIT_DATA cpu_to_be16(0x1000)
+#define IB_PMA_SEL_PORT_RCV_DATA cpu_to_be16(0x2000)
+#define IB_PMA_SEL_PORT_XMIT_PACKETS cpu_to_be16(0x4000)
+#define IB_PMA_SEL_PORT_RCV_PACKETS cpu_to_be16(0x8000)
+
+struct ib_pma_portcounters_ext {
+ u8 reserved;
+ u8 port_select;
+ __be16 counter_select;
+ __be32 reserved1;
+ __be64 port_xmit_data;
+ __be64 port_rcv_data;
+ __be64 port_xmit_packets;
+ __be64 port_rcv_packets;
+ __be64 port_unicast_xmit_packets;
+ __be64 port_unicast_rcv_packets;
+ __be64 port_multicast_xmit_packets;
+ __be64 port_multicast_rcv_packets;
+} __packed;
+
+#define IB_PMA_SELX_PORT_XMIT_DATA cpu_to_be16(0x0001)
+#define IB_PMA_SELX_PORT_RCV_DATA cpu_to_be16(0x0002)
+#define IB_PMA_SELX_PORT_XMIT_PACKETS cpu_to_be16(0x0004)
+#define IB_PMA_SELX_PORT_RCV_PACKETS cpu_to_be16(0x0008)
+#define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS cpu_to_be16(0x0010)
+#define IB_PMA_SELX_PORT_UNI_RCV_PACKETS cpu_to_be16(0x0020)
+#define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS cpu_to_be16(0x0040)
+#define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS cpu_to_be16(0x0080)
+
+#endif /* IB_PMA_H */
diff --git a/sys/ofed/include/rdma/ib_sa.h b/sys/ofed/include/rdma/ib_sa.h
index 5a8f2ce..61588d9 100644
--- a/sys/ofed/include/rdma/ib_sa.h
+++ b/sys/ofed/include/rdma/ib_sa.h
@@ -372,6 +372,28 @@ struct ib_sa_notice_data_port_error {
u8 padding[49];
};
+#define IB_SA_GUIDINFO_REC_LID IB_SA_COMP_MASK(0)
+#define IB_SA_GUIDINFO_REC_BLOCK_NUM IB_SA_COMP_MASK(1)
+#define IB_SA_GUIDINFO_REC_RES1 IB_SA_COMP_MASK(2)
+#define IB_SA_GUIDINFO_REC_RES2 IB_SA_COMP_MASK(3)
+#define IB_SA_GUIDINFO_REC_GID0 IB_SA_COMP_MASK(4)
+#define IB_SA_GUIDINFO_REC_GID1 IB_SA_COMP_MASK(5)
+#define IB_SA_GUIDINFO_REC_GID2 IB_SA_COMP_MASK(6)
+#define IB_SA_GUIDINFO_REC_GID3 IB_SA_COMP_MASK(7)
+#define IB_SA_GUIDINFO_REC_GID4 IB_SA_COMP_MASK(8)
+#define IB_SA_GUIDINFO_REC_GID5 IB_SA_COMP_MASK(9)
+#define IB_SA_GUIDINFO_REC_GID6 IB_SA_COMP_MASK(10)
+#define IB_SA_GUIDINFO_REC_GID7 IB_SA_COMP_MASK(11)
+
+struct ib_sa_guidinfo_rec {
+ __be16 lid;
+ u8 block_num;
+ /* reserved */
+ u8 res1;
+ __be32 res2;
+ u8 guid_info_list[64];
+};
+
struct ib_sa_client {
atomic_t users;
struct completion comp;
@@ -556,4 +578,16 @@ ib_sa_register_inform_info(struct ib_sa_client *client,
*/
void ib_sa_unregister_inform_info(struct ib_inform_info *info);
+int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_guidinfo_rec *rec,
+ ib_sa_comp_mask comp_mask, u8 method,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_guidinfo_rec *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query);
+
+
#endif /* IB_SA_H */
diff --git a/sys/ofed/include/rdma/ib_user_verbs.h b/sys/ofed/include/rdma/ib_user_verbs.h
index b2721c7..670d6e8 100644
--- a/sys/ofed/include/rdma/ib_user_verbs.h
+++ b/sys/ofed/include/rdma/ib_user_verbs.h
@@ -82,9 +82,13 @@ enum {
IB_USER_VERBS_CMD_QUERY_SRQ,
IB_USER_VERBS_CMD_DESTROY_SRQ,
IB_USER_VERBS_CMD_POST_SRQ_RECV,
+ IB_USER_VERBS_CMD_OPEN_XRCD,
+ IB_USER_VERBS_CMD_CLOSE_XRCD,
+ IB_USER_VERBS_CMD_CREATE_XSRQ,
+ IB_USER_VERBS_CMD_OPEN_QP,
+ IB_USER_VERBS_CMD_ATTACH_FLOW,
+ IB_USER_VERBS_CMD_DETACH_FLOW,
IB_USER_VERBS_CMD_CREATE_XRC_SRQ,
- IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN,
- IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN,
IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP,
IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP,
IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP,
@@ -230,6 +234,21 @@ struct ib_uverbs_dealloc_pd {
__u32 pd_handle;
};
+struct ib_uverbs_open_xrcd {
+ __u64 response;
+ __u32 fd;
+ __u32 oflags;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_open_xrcd_resp {
+ __u32 xrcd_handle;
+};
+
+struct ib_uverbs_close_xrcd {
+ __u32 xrcd_handle;
+};
+
struct ib_uverbs_reg_mr {
__u64 response;
__u64 start;
@@ -412,6 +431,17 @@ struct ib_uverbs_create_qp {
__u64 driver_data[0];
};
+struct ib_uverbs_open_qp {
+ __u64 response;
+ __u64 user_handle;
+ __u32 pd_handle;
+ __u32 qpn;
+ __u8 qp_type;
+ __u8 reserved[7];
+ __u64 driver_data[0];
+};
+
+/* also used for open response */
struct ib_uverbs_create_qp_resp {
__u32 qp_handle;
__u32 qpn;
@@ -569,6 +599,16 @@ struct ib_uverbs_send_wr {
} wr;
};
+struct ibv_uverbs_flow_spec {
+ __u32 type;
+ __be32 src_ip;
+ __be32 dst_ip;
+ __be16 src_port;
+ __be16 dst_port;
+ __u8 l4_protocol;
+ __u8 block_mc_loopback;
+};
+
struct ib_uverbs_post_send {
__u64 response;
__u32 qp_handle;
@@ -646,6 +686,45 @@ struct ib_uverbs_detach_mcast {
__u64 driver_data[0];
};
+struct ibv_kern_flow_spec {
+ __u32 type;
+ __u32 reserved1;
+ union {
+ struct {
+ __be16 ethertype;
+ __be16 vlan;
+ __u8 vlan_present;
+ __u8 mac[6];
+ __u8 port;
+ } eth;
+ struct {
+ __be32 qpn;
+ } ib_uc;
+ struct {
+ __u8 mgid[16];
+ } ib_mc;
+ } l2_id;
+ __be32 src_ip;
+ __be32 dst_ip;
+ __be16 src_port;
+ __be16 dst_port;
+ __u8 l4_protocol;
+ __u8 block_mc_loopback;
+ __u8 reserved[2];
+};
+
+struct ib_uverbs_attach_flow {
+ __u32 qp_handle;
+ __u32 priority;
+ struct ibv_kern_flow_spec spec;
+};
+
+struct ib_uverbs_detach_flow {
+ __u32 qp_handle;
+ __u32 priority;
+ struct ibv_kern_flow_spec spec;
+};
+
struct ib_uverbs_create_srq {
__u64 response;
__u64 user_handle;
@@ -656,15 +735,17 @@ struct ib_uverbs_create_srq {
__u64 driver_data[0];
};
-struct ib_uverbs_create_xrc_srq {
+struct ib_uverbs_create_xsrq {
__u64 response;
__u64 user_handle;
+ __u32 srq_type;
__u32 pd_handle;
__u32 max_wr;
__u32 max_sge;
__u32 srq_limit;
+ __u32 reserved;
__u32 xrcd_handle;
- __u32 xrc_cq;
+ __u32 cq_handle;
__u64 driver_data[0];
};
@@ -672,7 +753,7 @@ struct ib_uverbs_create_srq_resp {
__u32 srq_handle;
__u32 max_wr;
__u32 max_sge;
- __u32 reserved;
+ __u32 srqn;
};
struct ib_uverbs_modify_srq {
diff --git a/sys/ofed/include/rdma/ib_verbs.h b/sys/ofed/include/rdma/ib_verbs.h
index f5b054a..0145cb2 100644
--- a/sys/ofed/include/rdma/ib_verbs.h
+++ b/sys/ofed/include/rdma/ib_verbs.h
@@ -47,12 +47,15 @@
#include <linux/list.h>
#include <linux/rwsem.h>
#include <linux/scatterlist.h>
+#include <linux/workqueue.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/uaccess.h>
#include <linux/rbtree.h>
#include <linux/mutex.h>
+extern struct workqueue_struct *ib_wq;
+
union ib_gid {
u8 raw[16];
struct {
@@ -114,6 +117,11 @@ enum ib_device_cap_flags {
IB_DEVICE_XRC = (1<<20),
IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21),
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
+ IB_DEVICE_MR_ALLOCATE = (1<<23),
+ IB_DEVICE_SHARED_MR = (1<<24),
+ IB_DEVICE_QPG = (1<<25),
+ IB_DEVICE_UD_RSS = (1<<26),
+ IB_DEVICE_UD_TSS = (1<<27)
};
enum ib_atomic_cap {
@@ -161,6 +169,7 @@ struct ib_device_attr {
int max_srq_wr;
int max_srq_sge;
unsigned int max_fast_reg_page_list_len;
+ int max_rss_tbl_sz;
u16 max_pkeys;
u8 local_ca_ack_delay;
};
@@ -207,6 +216,7 @@ enum ib_port_cap_flags {
IB_PORT_SM_DISABLED = 1 << 10,
IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
+ IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
IB_PORT_CM_SUP = 1 << 16,
IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
IB_PORT_REINIT_SUP = 1 << 18,
@@ -237,6 +247,15 @@ static inline int ib_width_enum_to_int(enum ib_port_width width)
}
}
+enum ib_port_speed {
+ IB_SPEED_SDR = 1,
+ IB_SPEED_DDR = 2,
+ IB_SPEED_QDR = 4,
+ IB_SPEED_FDR10 = 8,
+ IB_SPEED_FDR = 16,
+ IB_SPEED_EDR = 32
+};
+
struct ib_protocol_stats {
/* TBD... */
};
@@ -421,7 +440,15 @@ enum ib_rate {
IB_RATE_40_GBPS = 7,
IB_RATE_60_GBPS = 8,
IB_RATE_80_GBPS = 9,
- IB_RATE_120_GBPS = 10
+ IB_RATE_120_GBPS = 10,
+ IB_RATE_14_GBPS = 11,
+ IB_RATE_56_GBPS = 12,
+ IB_RATE_112_GBPS = 13,
+ IB_RATE_168_GBPS = 14,
+ IB_RATE_25_GBPS = 15,
+ IB_RATE_100_GBPS = 16,
+ IB_RATE_200_GBPS = 17,
+ IB_RATE_300_GBPS = 18
};
/**
@@ -433,6 +460,13 @@ enum ib_rate {
int ib_rate_to_mult(enum ib_rate rate) __attribute_const__;
/**
+ * ib_rate_to_mbps - Convert the IB rate enum to Mbps.
+ * For example, IB_RATE_2_5_GBPS will be converted to 2500.
+ * @rate: rate to convert.
+ */
+int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__;
+
+/**
* mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
* enum.
* @mult: multiple to convert.
@@ -498,6 +532,7 @@ enum ib_wc_flags {
IB_WC_GRH = 1,
IB_WC_WITH_IMM = (1<<1),
IB_WC_WITH_INVALIDATE = (1<<2),
+ IB_WC_IP_CSUM_OK = (1<<3),
};
struct ib_wc {
@@ -528,6 +563,11 @@ enum ib_cq_notify_flags {
IB_CQ_REPORT_MISSED_EVENTS = 1 << 2,
};
+enum ib_srq_type {
+ IB_SRQT_BASIC,
+ IB_SRQT_XRC
+};
+
enum ib_srq_attr_mask {
IB_SRQ_MAX_WR = 1 << 0,
IB_SRQ_LIMIT = 1 << 1,
@@ -543,6 +583,14 @@ struct ib_srq_init_attr {
void (*event_handler)(struct ib_event *, void *);
void *srq_context;
struct ib_srq_attr attr;
+ enum ib_srq_type srq_type;
+
+ union {
+ struct {
+ struct ib_xrcd *xrcd;
+ struct ib_cq *cq;
+ } xrc;
+ } ext;
};
struct ib_qp_cap {
@@ -551,6 +599,7 @@ struct ib_qp_cap {
u32 max_send_sge;
u32 max_recv_sge;
u32 max_inline_data;
+ u32 qpg_tss_mask_sz;
};
enum ib_sig_type {
@@ -572,13 +621,32 @@ enum ib_qp_type {
IB_QPT_UD,
IB_QPT_XRC,
IB_QPT_RAW_IPV6,
- IB_QPT_RAW_ETY,
- IB_QPT_RAW_ETH
+ IB_QPT_RAW_ETHERTYPE,
+ IB_QPT_RAW_PACKET = 8,
+ IB_QPT_XRC_INI = 9,
+ IB_QPT_XRC_TGT,
+ IB_QPT_MAX,
};
enum ib_qp_create_flags {
IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0,
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
+ IB_QP_CREATE_NETIF_QP = 1 << 2,
+ /* reserve bits 26-31 for low level drivers' internal use */
+ IB_QP_CREATE_RESERVED_START = 1 << 26,
+ IB_QP_CREATE_RESERVED_END = 1 << 31,
+};
+
+enum ib_qpg_type {
+ IB_QPG_NONE = 0,
+ IB_QPG_PARENT = (1<<0),
+ IB_QPG_CHILD_RX = (1<<1),
+ IB_QPG_CHILD_TX = (1<<2)
+};
+
+struct ib_qpg_init_attrib {
+ u32 tss_child_count;
+ u32 rss_child_count;
};
struct ib_qp_init_attr {
@@ -587,14 +655,26 @@ struct ib_qp_init_attr {
struct ib_cq *send_cq;
struct ib_cq *recv_cq;
struct ib_srq *srq;
+ struct ib_xrcd *xrcd; /* XRC TGT QPs only */
struct ib_qp_cap cap;
+ union {
+ struct ib_qp *qpg_parent; /* see qpg_type */
+ struct ib_qpg_init_attrib parent_attrib;
+ };
enum ib_sig_type sq_sig_type;
enum ib_qp_type qp_type;
enum ib_qp_create_flags create_flags;
- struct ib_xrcd *xrc_domain; /* XRC qp's only */
+ enum ib_qpg_type qpg_type;
u8 port_num; /* special QP types only */
};
+struct ib_qp_open_attr {
+ void (*event_handler)(struct ib_event *, void *);
+ void *qp_context;
+ u32 qp_num;
+ enum ib_qp_type qp_type;
+};
+
enum ib_rnr_timeout {
IB_RNR_TIMER_655_36 = 0,
IB_RNR_TIMER_000_01 = 1,
@@ -651,7 +731,8 @@ enum ib_qp_attr_mask {
IB_QP_MAX_DEST_RD_ATOMIC = (1<<17),
IB_QP_PATH_MIG_STATE = (1<<18),
IB_QP_CAP = (1<<19),
- IB_QP_DEST_QPN = (1<<20)
+ IB_QP_DEST_QPN = (1<<20),
+ IB_QP_GROUP_RSS = (1<<21)
};
enum ib_qp_state {
@@ -724,6 +805,20 @@ enum ib_send_flags {
IB_SEND_IP_CSUM = (1<<4)
};
+enum ib_flow_types {
+ IB_FLOW_ETH = 0,
+ IB_FLOW_IB_UC = 1,
+ IB_FLOW_IB_MC_IPV4 = 2,
+ IB_FLOW_IB_MC_IPV6 = 3
+};
+
+enum {
+ IB_FLOW_L4_NONE = 0,
+ IB_FLOW_L4_OTHER = 3,
+ IB_FLOW_L4_UDP = 5,
+ IB_FLOW_L4_TCP = 6
+};
+
struct ib_sge {
u64 addr;
u32 length;
@@ -785,7 +880,7 @@ struct ib_send_wr {
u8 static_rate;
} raw_ety;
} wr;
- u32 xrc_remote_srq_num; /* valid for XRC sends only */
+ u32 xrc_remote_srq_num; /* XRC TGT QPs only */
};
struct ib_recv_wr {
@@ -800,7 +895,15 @@ enum ib_access_flags {
IB_ACCESS_REMOTE_WRITE = (1<<1),
IB_ACCESS_REMOTE_READ = (1<<2),
IB_ACCESS_REMOTE_ATOMIC = (1<<3),
- IB_ACCESS_MW_BIND = (1<<4)
+ IB_ACCESS_MW_BIND = (1<<4),
+ IB_ACCESS_ALLOCATE_MR = (1<<5),
+ IB_ACCESS_SHARED_MR_USER_READ = (1<<6),
+ IB_ACCESS_SHARED_MR_USER_WRITE = (1<<7),
+ IB_ACCESS_SHARED_MR_GROUP_READ = (1<<8),
+ IB_ACCESS_SHARED_MR_GROUP_WRITE = (1<<9),
+ IB_ACCESS_SHARED_MR_OTHER_READ = (1<<10),
+ IB_ACCESS_SHARED_MR_OTHER_WRITE = (1<<11)
+
};
struct ib_phys_buf {
@@ -847,7 +950,7 @@ struct ib_ucontext {
struct list_head qp_list;
struct list_head srq_list;
struct list_head ah_list;
- struct list_head xrc_domain_list;
+ struct list_head xrcd_list;
int closing;
};
@@ -884,12 +987,14 @@ struct ib_pd {
struct ib_xrcd {
struct ib_device *device;
struct ib_uobject *uobject;
+ atomic_t usecnt; /* count all exposed resources */
struct inode *inode;
struct rb_node node;
- atomic_t usecnt; /* count all resources */
+
+ struct mutex tgt_qp_mutex;
+ struct list_head tgt_qp_list;
};
-
struct ib_ah {
struct ib_device *device;
struct ib_pd *pd;
@@ -911,13 +1016,19 @@ struct ib_cq {
struct ib_srq {
struct ib_device *device;
struct ib_pd *pd;
- struct ib_cq *xrc_cq;
- struct ib_xrcd *xrcd;
struct ib_uobject *uobject;
void (*event_handler)(struct ib_event *, void *);
void *srq_context;
+ enum ib_srq_type srq_type;
atomic_t usecnt;
- u32 xrc_srq_num;
+
+ union {
+ struct {
+ struct ib_xrcd *xrcd;
+ struct ib_cq *cq;
+ u32 srq_num;
+ } xrc;
+ } ext;
};
struct ib_qp {
@@ -926,12 +1037,17 @@ struct ib_qp {
struct ib_cq *send_cq;
struct ib_cq *recv_cq;
struct ib_srq *srq;
+ struct ib_xrcd *xrcd; /* XRC TGT QPs only */
+ struct list_head xrcd_list;
+ atomic_t usecnt; /* count times opened, mcast attaches */
+ struct list_head open_list;
+ struct ib_qp *real_qp;
struct ib_uobject *uobject;
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
u32 qp_num;
enum ib_qp_type qp_type;
- struct ib_xrcd *xrcd; /* XRC QPs only */
+ enum ib_qpg_type qpg_type;
};
struct ib_mr {
@@ -958,6 +1074,32 @@ struct ib_fmr {
u32 rkey;
};
+struct ib_flow_spec {
+ enum ib_flow_types type;
+ union {
+ struct {
+ __be16 ethertype;
+ __be16 vlan;
+ u8 vlan_present;
+ u8 mac[6];
+ u8 port;
+ } eth;
+ struct {
+ __be32 qpn;
+ } ib_uc;
+ struct {
+ u8 mgid[16];
+ } ib_mc;
+ } l2_id;
+ __be32 src_ip;
+ __be32 dst_ip;
+ __be16 src_port;
+ __be16 dst_port;
+ u8 l4_protocol;
+ u8 block_mc_loopback;
+ u8 rule_type;
+};
+
struct ib_mad;
struct ib_grh;
@@ -1037,9 +1179,9 @@ struct ib_device {
struct list_head event_handler_list;
spinlock_t event_handler_lock;
+ spinlock_t client_data_lock;
struct list_head core_list;
struct list_head client_data_list;
- spinlock_t client_data_lock;
struct ib_cache cache;
int *pkey_tbl_len;
@@ -1143,7 +1285,8 @@ struct ib_device {
u64 start, u64 length,
u64 virt_addr,
int mr_access_flags,
- struct ib_udata *udata);
+ struct ib_udata *udata,
+ int mr_id);
int (*query_mr)(struct ib_mr *mr,
struct ib_mr_attr *mr_attr);
int (*dereg_mr)(struct ib_mr *mr);
@@ -1191,7 +1334,7 @@ struct ib_device {
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device,
- struct ib_ucontext *context,
+ struct ib_ucontext *ucontext,
struct ib_udata *udata);
int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
int (*create_xrc_rcv_qp)(struct ib_qp_init_attr *init_attr,
@@ -1211,7 +1354,17 @@ struct ib_device {
int (*unreg_xrc_rcv_qp)(struct ib_xrcd *xrcd,
void *context,
u32 qp_num);
-
+ int (*attach_flow)(struct ib_qp *qp,
+ struct ib_flow_spec *spec,
+ int priority);
+ int (*detach_flow)(struct ib_qp *qp,
+ struct ib_flow_spec *spec,
+ int priority);
+
+ unsigned long (*get_unmapped_area)(struct file *file,
+ unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags);
struct ib_dma_mapping_ops *dma_ops;
struct module *owner;
@@ -1225,8 +1378,8 @@ struct ib_device {
IB_DEV_UNREGISTERED
} reg_state;
- u64 uverbs_cmd_mask;
int uverbs_abi_ver;
+ u64 uverbs_cmd_mask;
char node_desc[64];
__be64 node_guid;
@@ -1248,7 +1401,9 @@ struct ib_client {
struct ib_device *ib_alloc_device(size_t size);
void ib_dealloc_device(struct ib_device *device);
-int ib_register_device (struct ib_device *device);
+int ib_register_device(struct ib_device *device,
+ int (*port_callback)(struct ib_device *,
+ u8, struct kobject *));
void ib_unregister_device(struct ib_device *device);
int ib_register_client (struct ib_client *client);
@@ -1269,15 +1424,6 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len
}
/**
- * ib_sysfs_create_port_files - iterate over port sysfs directories
- * @device: the IB device
- * @create: a function to create sysfs files in each port directory
- */
-int ib_sysfs_create_port_files(struct ib_device *device,
- int (*create)(struct ib_device *dev, u8 port_num,
- struct kobject *kobj));
-
-/**
* ib_modify_qp_is_ok - Check that the supplied attribute mask
* contains all required attributes and no attributes not allowed for
* the given QP state transition.
@@ -1427,8 +1573,8 @@ struct ib_srq *ib_create_xrc_srq(struct ib_pd *pd,
struct ib_srq_init_attr *srq_init_attr);
/**
- * ib_create_srq - Creates an SRQ associated with the specified
- * protection domain.
+ * ib_create_srq - Creates a SRQ associated with the specified protection
+ * domain.
* @pd: The protection domain associated with the SRQ.
* @srq_init_attr: A list of initial attributes required to create the
* SRQ. If SRQ creation succeeds, then the attributes are updated to
@@ -1534,12 +1680,36 @@ int ib_query_qp(struct ib_qp *qp,
int ib_destroy_qp(struct ib_qp *qp);
/**
+ * ib_open_qp - Obtain a reference to an existing sharable QP.
+ * @xrcd - XRC domain
+ * @qp_open_attr: Attributes identifying the QP to open.
+ *
+ * Returns a reference to a sharable QP.
+ */
+struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
+ struct ib_qp_open_attr *qp_open_attr);
+
+/**
+ * ib_close_qp - Release an external reference to a QP.
+ * @qp: The QP handle to release
+ *
+ * The opened QP handle is released by the caller. The underlying
+ * shared QP is not destroyed until all internal references are released.
+ */
+int ib_close_qp(struct ib_qp *qp);
+
+/**
* ib_post_send - Posts a list of work requests to the send queue of
* the specified QP.
* @qp: The QP to post the work request on.
* @send_wr: A list of work requests to post on the send queue.
* @bad_send_wr: On an immediate failure, this parameter will reference
* the work request that failed to be posted on the QP.
+ *
+ * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate
+ * error is returned, the QP state shall not be affected,
+ * ib_post_send() will return an immediate error after queueing any
+ * earlier work requests in the list.
*/
static inline int ib_post_send(struct ib_qp *qp,
struct ib_send_wr *send_wr,
@@ -1581,8 +1751,7 @@ static inline int ib_post_recv(struct ib_qp *qp,
* the associated completion and event handlers.
* @cqe: The minimum size of the CQ.
* @comp_vector - Completion vector used to signal completion events.
- * Must be >= 0 and < context->num_comp_vectors
- * or IB_CQ_VECTOR_LEAST_ATTACHED.
+ * Must be >= 0 and < context->num_comp_vectors.
*
* Users can examine the cq structure to determine the actual CQ size.
*/
@@ -2154,17 +2323,19 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
*/
int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
-
/**
- * ib_dealloc_xrcd - Deallocates an extended reliably connected domain.
- * @xrcd: The xrc domain to deallocate.
+ * ib_alloc_xrcd - Allocates an XRC domain.
+ * @device: The device on which to allocate the XRC domain.
*/
-int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device);
/**
- * ib_alloc_xrcd - Allocates an extended reliably connected domain.
- * @device: The device on which to allocate the xrcd.
+ * ib_dealloc_xrcd - Deallocates an XRC domain.
+ * @xrcd: The XRC domain to deallocate.
*/
-struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device);
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+
+int ib_attach_flow(struct ib_qp *qp, struct ib_flow_spec *spec, int priority);
+int ib_detach_flow(struct ib_qp *qp, struct ib_flow_spec *spec, int priority);
#endif /* IB_VERBS_H */
OpenPOWER on IntegriCloud