summaryrefslogtreecommitdiffstats
path: root/sys/ofed/drivers/net/mlx4/main.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/ofed/drivers/net/mlx4/main.c')
-rw-r--r--sys/ofed/drivers/net/mlx4/main.c2050
1 files changed, 1613 insertions, 437 deletions
diff --git a/sys/ofed/drivers/net/mlx4/main.c b/sys/ofed/drivers/net/mlx4/main.c
index b0897bf..dd10029 100644
--- a/sys/ofed/drivers/net/mlx4/main.c
+++ b/sys/ofed/drivers/net/mlx4/main.c
@@ -38,7 +38,10 @@
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
+#include <linux/slab.h>
#include <linux/io-mapping.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
@@ -62,10 +65,6 @@ MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
#endif /* CONFIG_MLX4_DEBUG */
-int mlx4_blck_lb=1;
-module_param_named(block_loopback, mlx4_blck_lb, int, 0644);
-MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0");
-
#ifdef CONFIG_PCI_MSI
static int msi_x = 1;
@@ -78,124 +77,177 @@ MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
#endif /* CONFIG_PCI_MSI */
+static int enable_sys_tune = 0;
+module_param(enable_sys_tune, int, 0444);
+MODULE_PARM_DESC(enable_sys_tune, "Tune the cpu's for better performance (default 0)");
+
+int mlx4_blck_lb = 1;
+module_param_named(block_loopback, mlx4_blck_lb, int, 0644);
+MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0 "
+ "(default: 1)");
+
+static int num_vfs;
+module_param(num_vfs, int, 0444);
+MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0");
+
+static int probe_vf;
+module_param(probe_vf, int, 0644);
+MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)");
+
+int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
+
+module_param_named(log_num_mgm_entry_size,
+ mlx4_log_num_mgm_entry_size, int, 0444);
+MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
+ " of qp per mcg, for example:"
+ " 10 gives 248.range: 7 <="
+ " log_num_mgm_entry_size <= 12."
+ " To activate device managed"
+ " flow steering when available, set to -1");
+
+static int high_rate_steer;
+module_param(high_rate_steer, int, 0444);
+MODULE_PARM_DESC(high_rate_steer, "Enable steering mode for higher packet rate"
+ " (default off)");
+
+static int fast_drop;
+module_param_named(fast_drop, fast_drop, int, 0444);
+MODULE_PARM_DESC(fast_drop,
+ "Enable fast packet drop when no recieve WQEs are posted");
+
+int mlx4_enable_64b_cqe_eqe;
+module_param_named(enable_64b_cqe_eqe, mlx4_enable_64b_cqe_eqe, int, 0644);
+MODULE_PARM_DESC(enable_64b_cqe_eqe,
+ "Enable 64 byte CQEs/EQEs when the the FW supports this, if nonzero");
+
+#define HCA_GLOBAL_CAP_MASK 0
+
+#define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE
+
static char mlx4_version[] __devinitdata =
DRV_NAME ": Mellanox ConnectX core driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
-struct mutex drv_mutex;
-
-static struct mlx4_profile default_profile = {
- .num_qp = 1 << 18,
- .num_srq = 1 << 16,
- .rdmarc_per_qp = 1 << 4,
- .num_cq = 1 << 16,
- .num_mcg = 1 << 13,
- .num_mpt = 1 << 19,
- .num_mtt = 1 << 20,
-};
-
-static int log_num_mac = 2;
+static int log_num_mac = 7;
module_param_named(log_num_mac, log_num_mac, int, 0444);
MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
-static int use_prio;
-module_param_named(use_prio, use_prio, bool, 0444);
-MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
- "(0/1, default 0)");
+static int log_num_vlan;
+module_param_named(log_num_vlan, log_num_vlan, int, 0444);
+MODULE_PARM_DESC(log_num_vlan,
+ "(Obsolete) Log2 max number of VLANs per ETH port (0-7)");
+/* Log2 max number of VLANs per ETH port (0-7) */
+#define MLX4_LOG_NUM_VLANS 7
-static struct mlx4_profile mod_param_profile = { 0 };
+int log_mtts_per_seg = ilog2(1);
+module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
+MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment "
+ "(0-7) (default: 0)");
+
+static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
+#if 0
+static int arr_argc = 2;
+module_param_array(port_type_array, int, &arr_argc, 0444);
+MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
+ "1 for IB, 2 for Ethernet");
+#endif
+
+struct mlx4_port_config {
+ struct list_head list;
+ enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
+ struct pci_dev *pdev;
+};
+
+#define MLX4_LOG_NUM_MTT 20
+/* We limit to 30 as of a bit map issue which uses int and not uint.
+ see mlx4_buddy_init -> bitmap_zero which gets int.
+*/
+#define MLX4_MAX_LOG_NUM_MTT 30
+static struct mlx4_profile mod_param_profile = {
+ .num_qp = 19,
+ .num_srq = 16,
+ .rdmarc_per_qp = 4,
+ .num_cq = 16,
+ .num_mcg = 13,
+ .num_mpt = 19,
+ .num_mtt = 0, /* max(20, 2*MTTs for host memory)) */
+};
module_param_named(log_num_qp, mod_param_profile.num_qp, int, 0444);
-MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA");
+MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA (default: 19)");
module_param_named(log_num_srq, mod_param_profile.num_srq, int, 0444);
-MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA");
+MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA "
+ "(default: 16)");
-module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int, 0444);
-MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP");
+module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int,
+ 0444);
+MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP "
+ "(default: 4)");
module_param_named(log_num_cq, mod_param_profile.num_cq, int, 0444);
-MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA");
+MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA (default: 16)");
module_param_named(log_num_mcg, mod_param_profile.num_mcg, int, 0444);
-MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA");
+MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA "
+ "(default: 13)");
module_param_named(log_num_mpt, mod_param_profile.num_mpt, int, 0444);
MODULE_PARM_DESC(log_num_mpt,
- "log maximum number of memory protection table entries per HCA");
+ "log maximum number of memory protection table entries per "
+ "HCA (default: 19)");
module_param_named(log_num_mtt, mod_param_profile.num_mtt, int, 0444);
MODULE_PARM_DESC(log_num_mtt,
- "log maximum number of memory translation table segments per HCA");
-
-static int log_mtts_per_seg = 0;
-module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
-MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
+ "log maximum number of memory translation table segments per "
+ "HCA (default: max(20, 2*MTTs for register all of the host memory limited to 30))");
-static void process_mod_param_profile(void)
-{
- default_profile.num_qp = (mod_param_profile.num_qp ?
- 1 << mod_param_profile.num_qp :
- default_profile.num_qp);
- default_profile.num_srq = (mod_param_profile.num_srq ?
- 1 << mod_param_profile.num_srq :
- default_profile.num_srq);
- default_profile.rdmarc_per_qp = (mod_param_profile.rdmarc_per_qp ?
- 1 << mod_param_profile.rdmarc_per_qp :
- default_profile.rdmarc_per_qp);
- default_profile.num_cq = (mod_param_profile.num_cq ?
- 1 << mod_param_profile.num_cq :
- default_profile.num_cq);
- default_profile.num_mcg = (mod_param_profile.num_mcg ?
- 1 << mod_param_profile.num_mcg :
- default_profile.num_mcg);
- default_profile.num_mpt = (mod_param_profile.num_mpt ?
- 1 << mod_param_profile.num_mpt :
- default_profile.num_mpt);
- default_profile.num_mtt = (mod_param_profile.num_mtt ?
- 1 << mod_param_profile.num_mtt :
- default_profile.num_mtt);
-}
-
-struct mlx4_port_config
-{
- struct list_head list;
- enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
- struct pci_dev *pdev;
+enum {
+ MLX4_IF_STATE_BASIC,
+ MLX4_IF_STATE_EXTENDED
};
-static LIST_HEAD(config_list);
-
-static void mlx4_config_cleanup(void)
-{
- struct mlx4_port_config *config, *tmp;
-
- list_for_each_entry_safe(config, tmp, &config_list, list) {
- list_del(&config->list);
- kfree(config);
- }
-}
-
-void *mlx4_get_prot_dev(struct mlx4_dev *dev, enum mlx4_prot proto, int port)
+static void process_mod_param_profile(struct mlx4_profile *profile)
{
- return mlx4_find_get_prot_dev(dev, proto, port);
-}
-EXPORT_SYMBOL(mlx4_get_prot_dev);
-
-void mlx4_set_iboe_counter(struct mlx4_dev *dev, int index, u8 port)
-{
- struct mlx4_priv *priv = mlx4_priv(dev);
-
- priv->iboe_counter_index[port - 1] = index;
-}
-EXPORT_SYMBOL(mlx4_set_iboe_counter);
-int mlx4_get_iboe_counter(struct mlx4_dev *dev, u8 port)
-{
- struct mlx4_priv *priv = mlx4_priv(dev);
+ vm_size_t hwphyssz;
+ hwphyssz = 0;
+ TUNABLE_ULONG_FETCH("hw.realmem", (u_long *) &hwphyssz);
- return priv->iboe_counter_index[port - 1];
+ profile->num_qp = 1 << mod_param_profile.num_qp;
+ profile->num_srq = 1 << mod_param_profile.num_srq;
+ profile->rdmarc_per_qp = 1 << mod_param_profile.rdmarc_per_qp;
+ profile->num_cq = 1 << mod_param_profile.num_cq;
+ profile->num_mcg = 1 << mod_param_profile.num_mcg;
+ profile->num_mpt = 1 << mod_param_profile.num_mpt;
+ /*
+ * We want to scale the number of MTTs with the size of the
+ * system memory, since it makes sense to register a lot of
+ * memory on a system with a lot of memory. As a heuristic,
+ * make sure we have enough MTTs to register twice the system
+ * memory (with PAGE_SIZE entries).
+ *
+ * This number has to be a power of two and fit into 32 bits
+ * due to device limitations. We cap this at 2^30 as of bit map
+ * limitation to work with int instead of uint (mlx4_buddy_init -> bitmap_zero)
+ * That limits us to 4TB of memory registration per HCA with
+ * 4KB pages, which is probably OK for the next few months.
+ */
+ if (mod_param_profile.num_mtt)
+ profile->num_mtt = 1 << mod_param_profile.num_mtt;
+ else {
+ profile->num_mtt =
+ roundup_pow_of_two(max_t(unsigned,
+ 1 << (MLX4_LOG_NUM_MTT - log_mtts_per_seg),
+ min(1UL <<
+ (MLX4_MAX_LOG_NUM_MTT -
+ log_mtts_per_seg),
+ (hwphyssz << 1)
+ >> log_mtts_per_seg)));
+ /* set the actual value, so it will be reflected to the user
+ using the sysfs */
+ mod_param_profile.num_mtt = ilog2(profile->num_mtt * (1 << log_mtts_per_seg));
+ }
}
-EXPORT_SYMBOL(mlx4_get_iboe_counter);
int mlx4_check_port_params(struct mlx4_dev *dev,
enum mlx4_port_type *port_type)
@@ -230,19 +282,6 @@ static void mlx4_set_port_mask(struct mlx4_dev *dev)
dev->caps.port_mask[i] = dev->caps.port_type[i];
}
-static u8 get_counters_mode(u64 flags)
-{
- switch (flags >> 48 & 3) {
- case 2:
- case 3:
- return MLX4_CUNTERS_EXT;
- case 1:
- return MLX4_CUNTERS_BASIC;
- default:
- return MLX4_CUNTERS_DISABLED;
- }
-}
-
static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
{
int err;
@@ -276,21 +315,29 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
}
dev->caps.num_ports = dev_cap->num_ports;
+ dev->phys_caps.num_phys_eqs = MLX4_MAX_EQ_NUM;
for (i = 1; i <= dev->caps.num_ports; ++i) {
dev->caps.vl_cap[i] = dev_cap->max_vl[i];
dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i];
+ dev->phys_caps.gid_phys_table_len[i] = dev_cap->max_gids[i];
+ dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i];
+ /* set gid and pkey table operating lengths by default
+ * to non-sriov values */
dev->caps.gid_table_len[i] = dev_cap->max_gids[i];
dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i];
dev->caps.port_width_cap[i] = dev_cap->max_port_width[i];
dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i];
dev->caps.def_mac[i] = dev_cap->def_mac[i];
dev->caps.supported_type[i] = dev_cap->supported_port_types[i];
+ dev->caps.suggested_type[i] = dev_cap->suggested_type[i];
+ dev->caps.default_sense[i] = dev_cap->default_sense[i];
dev->caps.trans_type[i] = dev_cap->trans_type[i];
dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i];
dev->caps.wavelength[i] = dev_cap->wavelength[i];
dev->caps.trans_code[i] = dev_cap->trans_code[i];
}
+ dev->caps.uar_page_size = PAGE_SIZE;
dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE;
dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
dev->caps.bf_reg_size = dev_cap->bf_reg_size;
@@ -304,51 +351,96 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev->caps.reserved_srqs = dev_cap->reserved_srqs;
dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz;
dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz;
- dev->caps.num_qp_per_mgm = MLX4_QP_PER_MGM;
/*
* Subtract 1 from the limit because we need to allocate a
- * spare CQE so the HCA HW can tell the difference between an
- * empty CQ and a full CQ.
+ * spare CQE to enable resizing the CQ
*/
dev->caps.max_cqes = dev_cap->max_cq_sz - 1;
dev->caps.reserved_cqs = dev_cap->reserved_cqs;
dev->caps.reserved_eqs = dev_cap->reserved_eqs;
- dev->caps.mtts_per_seg = 1 << log_mtts_per_seg;
- dev->caps.reserved_mtts = DIV_ROUND_UP(dev_cap->reserved_mtts,
- dev->caps.mtts_per_seg);
+ dev->caps.reserved_mtts = dev_cap->reserved_mtts;
dev->caps.reserved_mrws = dev_cap->reserved_mrws;
- dev->caps.reserved_uars = dev_cap->reserved_uars;
+
+ /* The first 128 UARs are used for EQ doorbells */
+ dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars);
dev->caps.reserved_pds = dev_cap->reserved_pds;
- dev->caps.mtt_entry_sz = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
+ dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
+ dev_cap->reserved_xrcds : 0;
+ dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
+ dev_cap->max_xrcds : 0;
+ dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz;
+
dev->caps.max_msg_sz = dev_cap->max_msg_sz;
dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1);
dev->caps.flags = dev_cap->flags;
+ dev->caps.flags2 = dev_cap->flags2;
dev->caps.bmme_flags = dev_cap->bmme_flags;
dev->caps.reserved_lkey = dev_cap->reserved_lkey;
dev->caps.stat_rate_support = dev_cap->stat_rate_support;
- dev->caps.udp_rss = dev_cap->udp_rss;
- dev->caps.loopback_support = dev_cap->loopback_support;
- dev->caps.wol = dev_cap->wol;
+ dev->caps.cq_timestamp = dev_cap->timestamp_support;
dev->caps.max_gso_sz = dev_cap->max_gso_sz;
- dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
- dev_cap->reserved_xrcds : 0;
- dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
- dev_cap->max_xrcds : 0;
+ dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz;
+
+ /* Sense port always allowed on supported devices for ConnectX-1 and -2 */
+ if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
+ dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
+ /* Don't do sense port on multifunction devices (for now at least) */
+ if (mlx4_is_mfunc(dev))
+ dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
dev->caps.log_num_macs = log_num_mac;
- dev->caps.log_num_prios = use_prio ? 3 : 0;
+ dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
+
+ dev->caps.fast_drop = fast_drop ?
+ !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FAST_DROP) :
+ 0;
for (i = 1; i <= dev->caps.num_ports; ++i) {
dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
if (dev->caps.supported_type[i]) {
- if (dev->caps.supported_type[i] != MLX4_PORT_TYPE_ETH)
- dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
- else
+ /* if only ETH is supported - assign ETH */
+ if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
+ /* if only IB is supported, assign IB */
+ else if (dev->caps.supported_type[i] ==
+ MLX4_PORT_TYPE_IB)
+ dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
+ else {
+ /* if IB and ETH are supported, we set the port
+ * type according to user selection of port type;
+ * if user selected none, take the FW hint */
+ if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE)
+ dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
+ MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
+ else
+ dev->caps.port_type[i] = port_type_array[i - 1];
+ }
}
- dev->caps.possible_type[i] = dev->caps.port_type[i];
+ /*
+ * Link sensing is allowed on the port if 3 conditions are true:
+ * 1. Both protocols are supported on the port.
+ * 2. Different types are supported on the port
+ * 3. FW declared that it supports link sensing
+ */
mlx4_priv(dev)->sense.sense_allowed[i] =
- dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO;
+ ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
+ (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
+ (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
+
+ /*
+ * If "default_sense" bit is set, we move the port to "AUTO" mode
+ * and perform sense_port FW command to try and set the correct
+ * port type from beginning
+ */
+ if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
+ enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
+ dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
+ mlx4_SENSE_PORT(dev, i, &sensed_port);
+ if (sensed_port != MLX4_PORT_TYPE_NONE)
+ dev->caps.port_type[i] = sensed_port;
+ } else {
+ dev->caps.possible_type[i] = dev->caps.port_type[i];
+ }
if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
dev->caps.log_num_macs = dev_cap->log_max_macs[i];
@@ -356,52 +448,316 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
"for port %d, reducing to %d.\n",
i, 1 << dev->caps.log_num_macs);
}
- dev->caps.log_num_vlans = dev_cap->log_max_vlans[i];
+ if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) {
+ dev->caps.log_num_vlans = dev_cap->log_max_vlans[i];
+ mlx4_warn(dev, "Requested number of VLANs is too much "
+ "for port %d, reducing to %d.\n",
+ i, 1 << dev->caps.log_num_vlans);
+ }
}
- dev->caps.counters_mode = get_counters_mode(dev_cap->flags);
- dev->caps.max_basic_counters = 1 << ilog2(dev_cap->max_basic_counters);
- dev->caps.max_ext_counters = 1 << ilog2(dev_cap->max_ext_counters);
+ dev->caps.max_basic_counters = dev_cap->max_basic_counters;
+ dev->caps.max_extended_counters = dev_cap->max_extended_counters;
+ /* support extended counters if available */
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT)
+ dev->caps.max_counters = dev->caps.max_extended_counters;
+ else
+ dev->caps.max_counters = dev->caps.max_basic_counters;
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
(1 << dev->caps.log_num_macs) *
(1 << dev->caps.log_num_vlans) *
- (1 << dev->caps.log_num_prios) *
dev->caps.num_ports;
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
- dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR];
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
+
+ dev->caps.sync_qp = dev_cap->sync_qp;
+ dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
+
+ if (!mlx4_enable_64b_cqe_eqe) {
+ if (dev_cap->flags &
+ (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
+ mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
+ dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
+ dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
+ }
+ }
+
+ if ((dev->caps.flags &
+ (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
+ mlx4_is_master(dev))
+ dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
return 0;
}
-
-static int mlx4_save_config(struct mlx4_dev *dev)
+/*The function checks if there are live vf, return the num of them*/
+static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
{
- struct mlx4_port_config *config;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_state;
int i;
+ int ret = 0;
+
+ for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
+ s_state = &priv->mfunc.master.slave_state[i];
+ if (s_state->active && s_state->last_cmd !=
+ MLX4_COMM_CMD_RESET) {
+ mlx4_warn(dev, "%s: slave: %d is still active\n",
+ __func__, i);
+ ret++;
+ }
+ }
+ return ret;
+}
+
+int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
+{
+ u32 qk = MLX4_RESERVED_QKEY_BASE;
+
+ if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
+ qpn < dev->phys_caps.base_proxy_sqpn)
+ return -EINVAL;
+
+ if (qpn >= dev->phys_caps.base_tunnel_sqpn)
+ /* tunnel qp */
+ qk += qpn - dev->phys_caps.base_tunnel_sqpn;
+ else
+ qk += qpn - dev->phys_caps.base_proxy_sqpn;
+ *qkey = qk;
+ return 0;
+}
+EXPORT_SYMBOL(mlx4_get_parav_qkey);
+
+void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
+{
+ struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
+
+ if (!mlx4_is_master(dev))
+ return;
+
+ priv->virt2phys_pkey[slave][port - 1][i] = val;
+}
+EXPORT_SYMBOL(mlx4_sync_pkey_table);
+
+void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
+{
+ struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
+
+ if (!mlx4_is_master(dev))
+ return;
+
+ priv->slave_node_guids[slave] = guid;
+}
+EXPORT_SYMBOL(mlx4_put_slave_node_guid);
+
+__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
+
+ if (!mlx4_is_master(dev))
+ return 0;
+
+ return priv->slave_node_guids[slave];
+}
+EXPORT_SYMBOL(mlx4_get_slave_node_guid);
+
+int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_slave;
+
+ if (!mlx4_is_master(dev))
+ return 0;
+
+ s_slave = &priv->mfunc.master.slave_state[slave];
+ return !!s_slave->active;
+}
+EXPORT_SYMBOL(mlx4_is_slave_active);
+
+static void slave_adjust_steering_mode(struct mlx4_dev *dev,
+ struct mlx4_dev_cap *dev_cap,
+ struct mlx4_init_hca_param *hca_param)
+{
+ dev->caps.steering_mode = hca_param->steering_mode;
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED)
+ dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
+ else
+ dev->caps.num_qp_per_mgm =
+ 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
+
+ mlx4_dbg(dev, "Steering mode is: %s\n",
+ mlx4_steering_mode_str(dev->caps.steering_mode));
+}
+
+static int mlx4_slave_cap(struct mlx4_dev *dev)
+{
+ int err;
+ u32 page_size;
+ struct mlx4_dev_cap dev_cap;
+ struct mlx4_func_cap func_cap;
+ struct mlx4_init_hca_param hca_param;
+ int i;
+
+ memset(&hca_param, 0, sizeof(hca_param));
+ err = mlx4_QUERY_HCA(dev, &hca_param);
+ if (err) {
+ mlx4_err(dev, "QUERY_HCA command failed, aborting.\n");
+ return err;
+ }
- list_for_each_entry(config, &config_list, list) {
- if (config->pdev == dev->pdev) {
- for (i = 1; i <= dev->caps.num_ports; i++)
- config->port_type[i] = dev->caps.possible_type[i];
- return 0;
+ /*fail if the hca has an unknown capability */
+ if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) !=
+ HCA_GLOBAL_CAP_MASK) {
+ mlx4_err(dev, "Unknown hca global capabilities\n");
+ return -ENOSYS;
+ }
+
+ mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz;
+
+ dev->caps.hca_core_clock = hca_param.hca_core_clock;
+
+ memset(&dev_cap, 0, sizeof(dev_cap));
+ dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp;
+ err = mlx4_dev_cap(dev, &dev_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+ return err;
+ }
+
+ err = mlx4_QUERY_FW(dev);
+ if (err)
+ mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n");
+
+ page_size = ~dev->caps.page_size_cap + 1;
+ mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
+ if (page_size > PAGE_SIZE) {
+ mlx4_err(dev, "HCA minimum page size of %d bigger than "
+ "kernel PAGE_SIZE of %d, aborting.\n",
+ page_size, PAGE_SIZE);
+ return -ENODEV;
+ }
+
+ /* slave gets uar page size from QUERY_HCA fw command */
+ dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
+
+ /* TODO: relax this assumption */
+ if (dev->caps.uar_page_size != PAGE_SIZE) {
+ mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %d\n",
+ dev->caps.uar_page_size, PAGE_SIZE);
+ return -ENODEV;
+ }
+
+ memset(&func_cap, 0, sizeof(func_cap));
+ err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n",
+ err);
+ return err;
+ }
+
+ if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
+ PF_CONTEXT_BEHAVIOUR_MASK) {
+ mlx4_err(dev, "Unknown pf context behaviour\n");
+ return -ENOSYS;
+ }
+
+ dev->caps.num_ports = func_cap.num_ports;
+ dev->quotas.qp = func_cap.qp_quota;
+ dev->quotas.srq = func_cap.srq_quota;
+ dev->quotas.cq = func_cap.cq_quota;
+ dev->quotas.mpt = func_cap.mpt_quota;
+ dev->quotas.mtt = func_cap.mtt_quota;
+ dev->caps.num_qps = 1 << hca_param.log_num_qps;
+ dev->caps.num_srqs = 1 << hca_param.log_num_srqs;
+ dev->caps.num_cqs = 1 << hca_param.log_num_cqs;
+ dev->caps.num_mpts = 1 << hca_param.log_mpt_sz;
+ dev->caps.num_eqs = func_cap.max_eq;
+ dev->caps.reserved_eqs = func_cap.reserved_eq;
+ dev->caps.num_pds = MLX4_NUM_PDS;
+ dev->caps.num_mgms = 0;
+ dev->caps.num_amgms = 0;
+
+ if (dev->caps.num_ports > MLX4_MAX_PORTS) {
+ mlx4_err(dev, "HCA has %d ports, but we only support %d, "
+ "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS);
+ return -ENODEV;
+ }
+
+ dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+ dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+ dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+ dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
+
+ if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
+ !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
+ err = -ENOMEM;
+ goto err_mem;
+ }
+
+ for (i = 1; i <= dev->caps.num_ports; ++i) {
+ err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_FUNC_CAP port command failed for"
+ " port %d, aborting (%d).\n", i, err);
+ goto err_mem;
}
+ dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
+ dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
+ dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
+ dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
+ dev->caps.port_mask[i] = dev->caps.port_type[i];
+ err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
+ &dev->caps.gid_table_len[i],
+ &dev->caps.pkey_table_len[i]);
+ if (err)
+ goto err_mem;
}
- config = kmalloc(sizeof(struct mlx4_port_config), GFP_KERNEL);
- if (!config)
- return -ENOMEM;
+ if (dev->caps.uar_page_size * (dev->caps.num_uars -
+ dev->caps.reserved_uars) >
+ pci_resource_len(dev->pdev, 2)) {
+ mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than "
+ "PCI resource 2 size of 0x%llx, aborting.\n",
+ dev->caps.uar_page_size * dev->caps.num_uars,
+ (unsigned long long) pci_resource_len(dev->pdev, 2));
+ err = -ENOMEM;
+ goto err_mem;
+ }
- config->pdev = dev->pdev;
- for (i = 1; i <= dev->caps.num_ports; i++)
- config->port_type[i] = dev->caps.possible_type[i];
+ if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
+ dev->caps.eqe_size = 64;
+ dev->caps.eqe_factor = 1;
+ } else {
+ dev->caps.eqe_size = 32;
+ dev->caps.eqe_factor = 0;
+ }
- list_add_tail(&config->list, &config_list);
+ if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
+ dev->caps.cqe_size = 64;
+ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+ } else {
+ dev->caps.cqe_size = 32;
+ }
+
+ slave_adjust_steering_mode(dev, &dev_cap, &hca_param);
return 0;
+
+err_mem:
+ kfree(dev->caps.qp0_tunnel);
+ kfree(dev->caps.qp0_proxy);
+ kfree(dev->caps.qp1_tunnel);
+ kfree(dev->caps.qp1_proxy);
+ dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
+ dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
+
+ return err;
}
/*
@@ -418,16 +774,15 @@ int mlx4_change_port_types(struct mlx4_dev *dev,
for (port = 0; port < dev->caps.num_ports; port++) {
/* Change the port type only if the new type is different
* from the current, and not set to Auto */
- if (port_types[port] != dev->caps.port_type[port + 1]) {
+ if (port_types[port] != dev->caps.port_type[port + 1])
change = 1;
- dev->caps.port_type[port + 1] = port_types[port];
- }
}
if (change) {
mlx4_unregister_device(dev);
for (port = 1; port <= dev->caps.num_ports; port++) {
mlx4_CLOSE_PORT(dev, port);
- err = mlx4_SET_PORT(dev, port);
+ dev->caps.port_type[port] = port_types[port - 1];
+ err = mlx4_SET_PORT(dev, port, -1);
if (err) {
mlx4_err(dev, "Failed to set port %d, "
"aborting\n", port);
@@ -435,7 +790,6 @@ int mlx4_change_port_types(struct mlx4_dev *dev,
}
}
mlx4_set_port_mask(dev);
- mlx4_save_config(dev);
err = mlx4_register_device(dev);
}
@@ -487,7 +841,7 @@ static ssize_t set_port_type(struct device *dev,
return -EINVAL;
}
- mlx4_stop_sense(mdev);
+ mlx4_stop_sense(mdev);
mutex_lock(&priv->port_mutex);
/* Possible type is always the one that was delivered */
mdev->caps.possible_type[info->port] = info->tmp_type;
@@ -499,14 +853,8 @@ static ssize_t set_port_type(struct device *dev,
types[i] = mdev->caps.port_type[i+1];
}
- if (priv->trig) {
- if (++priv->changed_ports < mdev->caps.num_ports)
- goto out;
- else
- priv->trig = priv->changed_ports = 0;
- }
-
- if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
+ if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
+ !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
for (i = 1; i <= mdev->caps.num_ports; i++) {
if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
mdev->caps.possible_type[i] = mdev->caps.port_type[i];
@@ -541,22 +889,97 @@ out:
return err ? err : count;
}
-static ssize_t trigger_port(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
+enum ibta_mtu {
+ IB_MTU_256 = 1,
+ IB_MTU_512 = 2,
+ IB_MTU_1024 = 3,
+ IB_MTU_2048 = 4,
+ IB_MTU_4096 = 5
+};
+
+static inline int int_to_ibta_mtu(int mtu)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct mlx4_dev *mdev = pci_get_drvdata(pdev);
- struct mlx4_priv *priv = container_of(mdev, struct mlx4_priv, dev);
+ switch (mtu) {
+ case 256: return IB_MTU_256;
+ case 512: return IB_MTU_512;
+ case 1024: return IB_MTU_1024;
+ case 2048: return IB_MTU_2048;
+ case 4096: return IB_MTU_4096;
+ default: return -1;
+ }
+}
- if (!priv)
- return -ENODEV;
+static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
+{
+ switch (mtu) {
+ case IB_MTU_256: return 256;
+ case IB_MTU_512: return 512;
+ case IB_MTU_1024: return 1024;
+ case IB_MTU_2048: return 2048;
+ case IB_MTU_4096: return 4096;
+ default: return -1;
+ }
+}
+static ssize_t show_port_ib_mtu(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
+ port_mtu_attr);
+ struct mlx4_dev *mdev = info->dev;
+
+ if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
+ mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
+
+ sprintf(buf, "%d\n",
+ ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
+ return strlen(buf);
+}
+
+static ssize_t set_port_ib_mtu(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
+ port_mtu_attr);
+ struct mlx4_dev *mdev = info->dev;
+ struct mlx4_priv *priv = mlx4_priv(mdev);
+ int err, port, mtu, ibta_mtu = -1;
+
+ if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
+ mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
+ return -EINVAL;
+ }
+
+ mtu = (int) simple_strtol(buf, NULL, 0);
+ ibta_mtu = int_to_ibta_mtu(mtu);
+
+ if (ibta_mtu < 0) {
+ mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
+ return -EINVAL;
+ }
+
+ mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
+
+ mlx4_stop_sense(mdev);
mutex_lock(&priv->port_mutex);
- priv->trig = 1;
+ mlx4_unregister_device(mdev);
+ for (port = 1; port <= mdev->caps.num_ports; port++) {
+ mlx4_CLOSE_PORT(mdev, port);
+ err = mlx4_SET_PORT(mdev, port, -1);
+ if (err) {
+ mlx4_err(mdev, "Failed to set port %d, "
+ "aborting\n", port);
+ goto err_set_port;
+ }
+ }
+ err = mlx4_register_device(mdev);
+err_set_port:
mutex_unlock(&priv->port_mutex);
- return count;
+ mlx4_start_sense(mdev);
+ return err ? err : count;
}
-DEVICE_ATTR(port_trigger, S_IWUGO, NULL, trigger_port);
static int mlx4_load_fw(struct mlx4_dev *dev)
{
@@ -597,6 +1020,7 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
{
struct mlx4_priv *priv = mlx4_priv(dev);
int err;
+ int num_eqs;
err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
cmpt_base +
@@ -626,12 +1050,13 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
if (err)
goto err_srq;
+ num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
+ dev->caps.num_eqs;
err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
cmpt_base +
((u64) (MLX4_CMPT_TYPE_EQ *
cmpt_entry_sz) << MLX4_CMPT_SHIFT),
- cmpt_entry_sz,
- dev->caps.num_eqs, dev->caps.num_eqs, 0, 0);
+ cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
if (err)
goto err_cq;
@@ -655,6 +1080,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
{
struct mlx4_priv *priv = mlx4_priv(dev);
u64 aux_pages;
+ int num_eqs;
int err;
err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
@@ -686,10 +1112,12 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
goto err_unmap_aux;
}
+
+ num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
+ dev->caps.num_eqs;
err = mlx4_init_icm_table(dev, &priv->eq_table.table,
init_hca->eqc_base, dev_cap->eqc_entry_sz,
- dev->caps.num_eqs, dev->caps.num_eqs,
- 0, 0);
+ num_eqs, num_eqs, 0, 0);
if (err) {
mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
goto err_unmap_cmpt;
@@ -709,7 +1137,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
init_hca->mtt_base,
dev->caps.mtt_entry_sz,
- dev->caps.num_mtt_segs,
+ dev->caps.num_mtts,
dev->caps.reserved_mtts, 1, 0);
if (err) {
mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
@@ -791,12 +1219,15 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
}
/*
- * It's not strictly required, but for simplicity just map the
- * whole multicast group table now. The table isn't very big
- * and it's a lot easier than trying to track ref counts.
+ * For flow steering device managed mode it is required to use
+ * mlx4_init_icm_table. For B0 steering mode it's not strictly
+ * required, but for simplicity just map the whole multicast
+ * group table now. The table isn't very big and it's a lot
+ * easier than trying to track ref counts.
*/
err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
- init_hca->mc_base, MLX4_MGM_ENTRY_SIZE,
+ init_hca->mc_base,
+ mlx4_get_mgm_entry_size(dev),
dev->caps.num_mgms + dev->caps.num_amgms,
dev->caps.num_mgms + dev->caps.num_amgms,
0, 0);
@@ -872,6 +1303,16 @@ static void mlx4_free_icms(struct mlx4_dev *dev)
mlx4_free_icm(dev, priv->fw.aux_icm, 0);
}
+static void mlx4_slave_exit(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ mutex_lock(&priv->cmd.slave_cmd_mutex);
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
+ mlx4_warn(dev, "Failed to close slave function.\n");
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+}
+
static int map_bf_area(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -879,8 +1320,13 @@ static int map_bf_area(struct mlx4_dev *dev)
resource_size_t bf_len;
int err = 0;
- bf_start = pci_resource_start(dev->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT);
- bf_len = pci_resource_len(dev->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT);
+ if (!dev->caps.bf_reg_size)
+ return -ENXIO;
+
+ bf_start = pci_resource_start(dev->pdev, 2) +
+ (dev->caps.num_uars << PAGE_SHIFT);
+ bf_len = pci_resource_len(dev->pdev, 2) -
+ (dev->caps.num_uars << PAGE_SHIFT);
priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
if (!priv->bf_mapping)
err = -ENOMEM;
@@ -894,177 +1340,473 @@ static void unmap_bf_area(struct mlx4_dev *dev)
io_mapping_free(mlx4_priv(dev)->bf_mapping);
}
+cycle_t mlx4_read_clock(struct mlx4_dev *dev)
+{
+ u32 clockhi, clocklo, clockhi1;
+ cycle_t cycles;
+ int i;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ for (i = 0; i < 10; i++) {
+ clockhi = swab32(readl(priv->clock_mapping));
+ clocklo = swab32(readl(priv->clock_mapping + 4));
+ clockhi1 = swab32(readl(priv->clock_mapping));
+ if (clockhi == clockhi1)
+ break;
+ }
+
+ cycles = (u64) clockhi << 32 | (u64) clocklo;
+
+ return cycles;
+}
+EXPORT_SYMBOL_GPL(mlx4_read_clock);
+
+
+static int map_internal_clock(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ priv->clock_mapping = ioremap(pci_resource_start(dev->pdev,
+ priv->fw.clock_bar) +
+ priv->fw.clock_offset, MLX4_CLOCK_SIZE);
+
+ if (!priv->clock_mapping)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void unmap_internal_clock(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (priv->clock_mapping)
+ iounmap(priv->clock_mapping);
+}
+
static void mlx4_close_hca(struct mlx4_dev *dev)
{
+ unmap_internal_clock(dev);
unmap_bf_area(dev);
- mlx4_CLOSE_HCA(dev, 0);
- mlx4_free_icms(dev);
- mlx4_UNMAP_FA(dev);
- mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
+ if (mlx4_is_slave(dev))
+ mlx4_slave_exit(dev);
+ else {
+ mlx4_CLOSE_HCA(dev, 0);
+ mlx4_free_icms(dev);
+ mlx4_UNMAP_FA(dev);
+ mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
+ }
+}
+
+static int mlx4_init_slave(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u64 dma = (u64) priv->mfunc.vhcr_dma;
+ int num_of_reset_retries = NUM_OF_RESET_RETRIES;
+ int ret_from_reset = 0;
+ u32 slave_read;
+ u32 cmd_channel_ver;
+
+ mutex_lock(&priv->cmd.slave_cmd_mutex);
+ priv->cmd.max_cmds = 1;
+ mlx4_warn(dev, "Sending reset\n");
+ ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
+ MLX4_COMM_TIME);
+ /* if we are in the middle of flr the slave will try
+ * NUM_OF_RESET_RETRIES times before leaving.*/
+ if (ret_from_reset) {
+ if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
+ msleep(SLEEP_TIME_IN_RESET);
+ while (ret_from_reset && num_of_reset_retries) {
+ mlx4_warn(dev, "slave is currently in the"
+ "middle of FLR. retrying..."
+ "(try num:%d)\n",
+ (NUM_OF_RESET_RETRIES -
+ num_of_reset_retries + 1));
+ ret_from_reset =
+ mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET,
+ 0, MLX4_COMM_TIME);
+ num_of_reset_retries = num_of_reset_retries - 1;
+ }
+ } else
+ goto err;
+ }
+
+ /* check the driver version - the slave I/F revision
+ * must match the master's */
+ slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
+ cmd_channel_ver = mlx4_comm_get_version();
+
+ if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
+ MLX4_COMM_GET_IF_REV(slave_read)) {
+ mlx4_err(dev, "slave driver version is not supported"
+ " by the master\n");
+ goto err;
+ }
+
+ mlx4_warn(dev, "Sending vhcr0\n");
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
+ MLX4_COMM_TIME))
+ goto err;
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
+ MLX4_COMM_TIME))
+ goto err;
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
+ MLX4_COMM_TIME))
+ goto err;
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME))
+ goto err;
+
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ return 0;
+
+err:
+ mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ return -EIO;
+}
+
+static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
+{
+ int i;
+
+ for (i = 1; i <= dev->caps.num_ports; i++) {
+ if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
+ dev->caps.gid_table_len[i] =
+ mlx4_get_slave_num_gids(dev, 0);
+ else
+ dev->caps.gid_table_len[i] = 1;
+ dev->caps.pkey_table_len[i] =
+ dev->phys_caps.pkey_phys_table_len[i] - 1;
+ }
+}
+
+static int choose_log_fs_mgm_entry_size(int qp_per_entry)
+{
+ int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
+
+ for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
+ i++) {
+ if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
+ break;
+ }
+
+ return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
+}
+
+static void choose_steering_mode(struct mlx4_dev *dev,
+ struct mlx4_dev_cap *dev_cap)
+{
+ // This is only valid to the integrated driver.
+ // The new ported mlx4_core driver is in B0 steering mode by default
+ // and the old mlx4_en driver is in A0 steering mode by default.
+ // If high_rate_steer == TRUE it means that A0 steering mode is on.
+ // The integration fix is to hard code high_rate_steer to TRUE.
+ high_rate_steer = 1;
+
+ if (high_rate_steer && !mlx4_is_mfunc(dev)) {
+ dev->caps.flags &= ~(MLX4_DEV_CAP_FLAG_VEP_MC_STEER |
+ MLX4_DEV_CAP_FLAG_VEP_UC_STEER);
+ dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_FS_EN;
+ }
+
+ if (mlx4_log_num_mgm_entry_size == -1 &&
+ dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
+ dev_cap->fs_log_max_ucast_qp_range_size == 0 &&
+ (!mlx4_is_mfunc(dev) ||
+ (dev_cap->fs_max_num_qp_per_entry >= (num_vfs + 1))) &&
+ choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
+ MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
+ dev->oper_log_mgm_entry_size =
+ choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
+ dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
+ dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
+ } else {
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
+ dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) {
+ dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
+ }
+ else {
+ dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
+
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
+ dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
+ mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags "
+ "set to use B0 steering. Falling back to A0 steering mode.\n");
+ }
+ dev->oper_log_mgm_entry_size =
+ mlx4_log_num_mgm_entry_size > 0 ?
+ mlx4_log_num_mgm_entry_size :
+ MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
+ dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
+ }
+ mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, "
+ "log_num_mgm_entry_size = %d\n",
+ mlx4_steering_mode_str(dev->caps.steering_mode),
+ dev->oper_log_mgm_entry_size, mlx4_log_num_mgm_entry_size);
}
static int mlx4_init_hca(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_dev_cap *dev_cap = NULL;
struct mlx4_adapter adapter;
- struct mlx4_dev_cap dev_cap;
struct mlx4_mod_stat_cfg mlx4_cfg;
struct mlx4_profile profile;
struct mlx4_init_hca_param init_hca;
- struct mlx4_port_config *config;
u64 icm_size;
int err;
- int i;
- err = mlx4_QUERY_FW(dev);
- if (err) {
- if (err == -EACCES)
- mlx4_info(dev, "non-primary physical function, skipping.\n");
- else
- mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
- return err;
- }
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_QUERY_FW(dev);
+ if (err) {
+ if (err == -EACCES)
+ mlx4_info(dev, "non-primary physical function, skipping.\n");
+ else
+ mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
+ return err;
+ }
- err = mlx4_load_fw(dev);
- if (err) {
- mlx4_err(dev, "Failed to start FW, aborting.\n");
- return err;
- }
+ err = mlx4_load_fw(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to start FW, aborting.\n");
+ return err;
+ }
- mlx4_cfg.log_pg_sz_m = 1;
- mlx4_cfg.log_pg_sz = 0;
- err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
- if (err)
- mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
+ mlx4_cfg.log_pg_sz_m = 1;
+ mlx4_cfg.log_pg_sz = 0;
+ err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
+ if (err)
+ mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
- err = mlx4_dev_cap(dev, &dev_cap);
- if (err) {
- mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
- goto err_stop_fw;
- }
+ dev_cap = kzalloc(sizeof *dev_cap, GFP_KERNEL);
+ if (!dev_cap) {
+ mlx4_err(dev, "Failed to allocate memory for dev_cap\n");
+ err = -ENOMEM;
+ goto err_stop_fw;
+ }
+
+ err = mlx4_dev_cap(dev, dev_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+ goto err_stop_fw;
+ }
- process_mod_param_profile();
- profile = default_profile;
+ choose_steering_mode(dev, dev_cap);
- list_for_each_entry(config, &config_list, list) {
- if (config->pdev == dev->pdev) {
- for (i = 1; i <= dev->caps.num_ports; i++) {
- dev->caps.possible_type[i] = config->port_type[i];
- if (config->port_type[i] != MLX4_PORT_TYPE_AUTO)
- dev->caps.port_type[i] = config->port_type[i];
- }
+ if (mlx4_is_master(dev))
+ mlx4_parav_master_pf_caps(dev);
+
+ process_mod_param_profile(&profile);
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ profile.num_mcg = MLX4_FS_NUM_MCG;
+
+ icm_size = mlx4_make_profile(dev, &profile, dev_cap,
+ &init_hca);
+ if ((long long) icm_size < 0) {
+ err = icm_size;
+ goto err_stop_fw;
+ }
+
+ dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
+
+ init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
+ init_hca.uar_page_sz = PAGE_SHIFT - 12;
+
+ err = mlx4_init_icm(dev, dev_cap, &init_hca, icm_size);
+ if (err)
+ goto err_stop_fw;
+
+ err = mlx4_INIT_HCA(dev, &init_hca);
+ if (err) {
+ mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
+ goto err_free_icm;
+ }
+ } else {
+ err = mlx4_init_slave(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to initialize slave\n");
+ return err;
}
- }
- mlx4_set_port_mask(dev);
- icm_size = mlx4_make_profile(dev, &profile, &dev_cap, &init_hca);
- if ((long long) icm_size < 0) {
- err = icm_size;
- goto err_stop_fw;
+ err = mlx4_slave_cap(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to obtain slave caps\n");
+ goto err_close;
+ }
}
if (map_bf_area(dev))
- mlx4_dbg(dev, "Kernel support for blue flame is not available for kernels < 2.6.28\n");
+ mlx4_dbg(dev, "Failed to map blue flame area\n");
- init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
-
- err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
- if (err)
- goto err_stop_fw;
+ /*
+ * Read HCA frequency by QUERY_HCA command
+ */
+ if (dev->caps.cq_timestamp) {
+ memset(&init_hca, 0, sizeof(init_hca));
+ err = mlx4_QUERY_HCA(dev, &init_hca);
+ if (err) {
+ mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n");
+ dev->caps.cq_timestamp = 0;
+ } else
+ dev->caps.hca_core_clock = init_hca.hca_core_clock;
+
+ /*
+ * In case we got HCA frequency 0 - disable timestamping
+ * to avoid dividing by zero
+ */
+ if (!dev->caps.hca_core_clock) {
+ dev->caps.cq_timestamp = 0;
+ mlx4_err(dev, "HCA frequency is 0. "
+ "Timestamping is not supported.");
+ }
- err = mlx4_INIT_HCA(dev, &init_hca);
- if (err) {
- mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
- goto err_free_icm;
+ /*
+ * Map internal clock, in case of failure disable timestamping
+ */
+ if (map_internal_clock(dev)) {
+ dev->caps.cq_timestamp = 0;
+ mlx4_err(dev, "Failed to map internal clock. "
+ "Timestamping is not supported.\n");
+ }
}
+ /*Only the master set the ports, all the rest got it from it.*/
+ if (!mlx4_is_slave(dev))
+ mlx4_set_port_mask(dev);
+
err = mlx4_QUERY_ADAPTER(dev, &adapter);
if (err) {
mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n");
- goto err_close;
+ goto unmap_bf;
}
priv->eq_table.inta_pin = adapter.inta_pin;
memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
+ if (!mlx4_is_slave(dev))
+ kfree(dev_cap);
+
return 0;
+unmap_bf:
+ unmap_internal_clock(dev);
+ unmap_bf_area(dev);
+
+ if (mlx4_is_slave(dev)) {
+ kfree(dev->caps.qp0_tunnel);
+ kfree(dev->caps.qp0_proxy);
+ kfree(dev->caps.qp1_tunnel);
+ kfree(dev->caps.qp1_proxy);
+ }
+
err_close:
- mlx4_CLOSE_HCA(dev, 0);
+ if (mlx4_is_slave(dev))
+ mlx4_slave_exit(dev);
+ else
+ mlx4_CLOSE_HCA(dev, 0);
err_free_icm:
- mlx4_free_icms(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_free_icms(dev);
err_stop_fw:
- unmap_bf_area(dev);
- mlx4_UNMAP_FA(dev);
- mlx4_free_icm(dev, priv->fw.fw_icm, 0);
-
+ if (!mlx4_is_slave(dev)) {
+ mlx4_UNMAP_FA(dev);
+ mlx4_free_icm(dev, priv->fw.fw_icm, 0);
+ if (dev_cap)
+ kfree(dev_cap);
+ }
return err;
}
static int mlx4_init_counters_table(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- int err;
- int nent;
-
- switch (dev->caps.counters_mode) {
- case MLX4_CUNTERS_BASIC:
- nent = dev->caps.max_basic_counters;
- break;
- case MLX4_CUNTERS_EXT:
- nent = dev->caps.max_ext_counters;
- break;
- default:
+ int res;
+ int nent_pow2;
+
+ if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
return -ENOENT;
- }
- err = mlx4_bitmap_init(&priv->counters_bitmap, nent, nent - 1, 0, 0);
- if (err)
- return err;
- return 0;
+ nent_pow2 = roundup_pow_of_two(dev->caps.max_counters);
+ res = mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2,
+ nent_pow2 - 1, 0,
+ nent_pow2 - dev->caps.max_counters);
+ if (res)
+ return res;
+
+ if (dev->caps.max_counters == dev->caps.max_basic_counters)
+ return 0;
+
+ res = mlx4_cmd(dev, MLX4_IF_STATE_EXTENDED, 0, 0,
+ MLX4_CMD_SET_IF_STAT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+
+ if (res)
+ mlx4_err(dev, "Failed to set extended counters (err=%d)\n",
+ res);
+ return res;
+
}
static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
{
- switch (dev->caps.counters_mode) {
- case MLX4_CUNTERS_BASIC:
- case MLX4_CUNTERS_EXT:
+ if (!mlx4_is_slave(dev) &&
+ (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
- break;
- default:
- break;
- }
}
-int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
+int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- switch (dev->caps.counters_mode) {
- case MLX4_CUNTERS_BASIC:
- case MLX4_CUNTERS_EXT:
- *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
- if (*idx == -1)
- return -ENOMEM;
- return 0;
- default:
+ if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
+ return -ENOENT;
+
+ *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
+ if (*idx == -1)
return -ENOMEM;
+
+ return 0;
+}
+
+int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
+{
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER,
+ RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (!err)
+ *idx = get_param_l(&out_param);
+
+ return err;
}
+ return __mlx4_counter_alloc(dev, idx);
}
EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
+void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
+{
+ mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx);
+ return;
+}
+
void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
{
- switch (dev->caps.counters_mode) {
- case MLX4_CUNTERS_BASIC:
- case MLX4_CUNTERS_EXT:
- mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx);
- return;
- default:
+ u64 in_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, idx);
+ mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE,
+ MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
return;
}
+ __mlx4_counter_free(dev, idx);
}
EXPORT_SYMBOL_GPL(mlx4_counter_free);
@@ -1078,18 +1820,19 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
err = mlx4_init_uar_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "user access region table, aborting.\n");
+ "user access region table (err=%d), aborting.\n",
+ err);
return err;
}
err = mlx4_uar_alloc(dev, &priv->driver_uar);
if (err) {
- mlx4_err(dev, "Failed to allocate driver access region, "
- "aborting.\n");
+ mlx4_err(dev, "Failed to allocate driver access region "
+ "(err=%d), aborting.\n", err);
goto err_uar_table_free;
}
- priv->kar = ioremap(priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+ priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
if (!priv->kar) {
mlx4_err(dev, "Couldn't map kernel access region, "
"aborting.\n");
@@ -1100,35 +1843,36 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
err = mlx4_init_pd_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "protection domain table, aborting.\n");
+ "protection domain table (err=%d), aborting.\n", err);
goto err_kar_unmap;
}
err = mlx4_init_xrcd_table(dev);
if (err) {
- mlx4_err(dev, "Failed to initialize extended "
- "reliably connected domain table, aborting.\n");
+ mlx4_err(dev, "Failed to initialize "
+ "reliable connection domain table (err=%d), "
+ "aborting.\n", err);
goto err_pd_table_free;
}
err = mlx4_init_mr_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "memory region table, aborting.\n");
+ "memory region table (err=%d), aborting.\n", err);
goto err_xrcd_table_free;
}
err = mlx4_init_eq_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "event queue table, aborting.\n");
+ "event queue table (err=%d), aborting.\n", err);
goto err_mr_table_free;
}
err = mlx4_cmd_use_events(dev);
if (err) {
mlx4_err(dev, "Failed to switch to event-driven "
- "firmware commands, aborting.\n");
+ "firmware commands (err=%d), aborting.\n", err);
goto err_eq_table_free;
}
@@ -1154,50 +1898,74 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
err = mlx4_init_cq_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "completion queue table, aborting.\n");
+ "completion queue table (err=%d), aborting.\n", err);
goto err_cmd_poll;
}
err = mlx4_init_srq_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "shared receive queue table, aborting.\n");
+ "shared receive queue table (err=%d), aborting.\n",
+ err);
goto err_cq_table_free;
}
err = mlx4_init_qp_table(dev);
if (err) {
mlx4_err(dev, "Failed to initialize "
- "queue pair table, aborting.\n");
+ "queue pair table (err=%d), aborting.\n", err);
goto err_srq_table_free;
}
- err = mlx4_init_mcg_table(dev);
- if (err) {
- mlx4_err(dev, "Failed to initialize "
- "multicast group table, aborting.\n");
- goto err_qp_table_free;
- }
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_init_mcg_table(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to initialize "
+ "multicast group table (err=%d), aborting.\n",
+ err);
+ goto err_qp_table_free;
+ }
- err = mlx4_init_counters_table(dev);
- if (err && err != -ENOENT) {
- mlx4_err(dev, "Failed to initialize counters table, aborting.\n");
- goto err_mcg_table_free;
- }
+ err = mlx4_init_counters_table(dev);
+ if (err && err != -ENOENT) {
+ mlx4_err(dev, "Failed to initialize counters table (err=%d), "
+ "aborting.\n", err);
+ goto err_mcg_table_free;
+ }
- for (port = 1; port <= dev->caps.num_ports; port++) {
- ib_port_default_caps = 0;
- err = mlx4_get_port_ib_caps(dev, port, &ib_port_default_caps);
- if (err)
- mlx4_warn(dev, "failed to get port %d default "
- "ib capabilities (%d). Continuing with "
- "caps = 0\n", port, err);
- dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
- err = mlx4_SET_PORT(dev, port);
- if (err) {
- mlx4_err(dev, "Failed to set port %d, aborting\n",
- port);
- goto err_counters_table_free;
+ for (port = 1; port <= dev->caps.num_ports; port++) {
+ ib_port_default_caps = 0;
+ err = mlx4_get_port_ib_caps(dev, port,
+ &ib_port_default_caps);
+ if (err)
+ mlx4_warn(dev, "failed to get port %d default "
+ "ib capabilities (%d). Continuing "
+ "with caps = 0\n", port, err);
+ dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
+
+ /* initialize per-slave default ib port capabilities */
+ if (mlx4_is_master(dev)) {
+ int i;
+ for (i = 0; i < dev->num_slaves; i++) {
+ if (i == mlx4_master_func_num(dev))
+ continue;
+ priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
+ ib_port_default_caps;
+ }
+ }
+
+ if (mlx4_is_mfunc(dev))
+ dev->caps.port_ib_mtu[port] = IB_MTU_2048;
+ else
+ dev->caps.port_ib_mtu[port] = IB_MTU_4096;
+
+ err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
+ dev->caps.pkey_table_len[port] : -1);
+ if (err) {
+ mlx4_err(dev, "Failed to set port %d (err=%d), "
+ "aborting\n", port, err);
+ goto err_counters_table_free;
+ }
}
}
@@ -1248,13 +2016,16 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct msix_entry *entries;
- int nreq;
+ int nreq = min_t(int, dev->caps.num_ports *
+ min_t(int, num_possible_cpus() + 1, MAX_MSIX_P_PORT)
+ + MSIX_LEGACY_SZ, MAX_MSIX);
int err;
int i;
if (msi_x) {
nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
- num_possible_cpus() + 1);
+ nreq);
+
entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
if (!entries)
goto no_msi;
@@ -1277,7 +2048,15 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
goto no_msi;
}
- dev->caps.num_comp_vectors = nreq - 1;
+ if (nreq <
+ MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) {
+ /*Working in legacy mode , all EQ's shared*/
+ dev->caps.comp_pool = 0;
+ dev->caps.num_comp_vectors = nreq - 1;
+ } else {
+ dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ;
+ dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1;
+ }
for (i = 0; i < nreq; ++i)
priv->eq_table.eq[i].irq = entries[i].vector;
@@ -1289,6 +2068,7 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
no_msi:
dev->caps.num_comp_vectors = 1;
+ dev->caps.comp_pool = 0;
for (i = 0; i < 2; ++i)
priv->eq_table.eq[i].irq = dev->pdev->irq;
@@ -1301,14 +2081,22 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
info->dev = dev;
info->port = port;
- mlx4_init_mac_table(dev, &info->mac_table);
- mlx4_init_vlan_table(dev, &info->vlan_table);
+ if (!mlx4_is_slave(dev)) {
+ mlx4_init_mac_table(dev, &info->mac_table);
+ mlx4_init_vlan_table(dev, &info->vlan_table);
+ info->base_qpn = mlx4_get_base_qpn(dev, port);
+ }
sprintf(info->dev_name, "mlx4_port%d", port);
info->port_attr.attr.name = info->dev_name;
- info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
+ if (mlx4_is_mfunc(dev))
+ info->port_attr.attr.mode = S_IRUGO;
+ else {
+ info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
+ info->port_attr.store = set_port_type;
+ }
info->port_attr.show = show_port_type;
- info->port_attr.store = set_port_type;
+ sysfs_attr_init(&info->port_attr.attr);
err = device_create_file(&dev->pdev->dev, &info->port_attr);
if (err) {
@@ -1316,6 +2104,24 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
info->port = -1;
}
+ sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
+ info->port_mtu_attr.attr.name = info->dev_mtu_name;
+ if (mlx4_is_mfunc(dev))
+ info->port_mtu_attr.attr.mode = S_IRUGO;
+ else {
+ info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR;
+ info->port_mtu_attr.store = set_port_ib_mtu;
+ }
+ info->port_mtu_attr.show = show_port_ib_mtu;
+ sysfs_attr_init(&info->port_mtu_attr.attr);
+
+ err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr);
+ if (err) {
+ mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
+ device_remove_file(&info->dev->pdev->dev, &info->port_attr);
+ info->port = -1;
+ }
+
return err;
}
@@ -1325,25 +2131,114 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
return;
device_remove_file(&info->dev->pdev->dev, &info->port_attr);
+ device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr);
+}
+
+static int mlx4_init_steering(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int num_entries = dev->caps.num_ports;
+ int i, j;
+
+ priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
+ if (!priv->steer)
+ return -ENOMEM;
+
+ for (i = 0; i < num_entries; i++)
+ for (j = 0; j < MLX4_NUM_STEERS; j++) {
+ INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
+ INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
+ }
+ return 0;
+}
+
+static void mlx4_clear_steering(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_steer_index *entry, *tmp_entry;
+ struct mlx4_promisc_qp *pqp, *tmp_pqp;
+ int num_entries = dev->caps.num_ports;
+ int i, j;
+
+ for (i = 0; i < num_entries; i++) {
+ for (j = 0; j < MLX4_NUM_STEERS; j++) {
+ list_for_each_entry_safe(pqp, tmp_pqp,
+ &priv->steer[i].promisc_qps[j],
+ list) {
+ list_del(&pqp->list);
+ kfree(pqp);
+ }
+ list_for_each_entry_safe(entry, tmp_entry,
+ &priv->steer[i].steer_entries[j],
+ list) {
+ list_del(&entry->list);
+ list_for_each_entry_safe(pqp, tmp_pqp,
+ &entry->duplicates,
+ list) {
+ list_del(&pqp->list);
+ kfree(pqp);
+ }
+ kfree(entry);
+ }
+ }
+ }
+ kfree(priv->steer);
}
-static int mlx4_init_trigger(struct mlx4_priv *priv)
+static int extended_func_num(struct pci_dev *pdev)
{
- memcpy(&priv->trigger_attr, &dev_attr_port_trigger,
- sizeof(struct device_attribute));
- return device_create_file(&priv->dev.pdev->dev, &priv->trigger_attr);
+ return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
+}
+
+#define MLX4_OWNER_BASE 0x8069c
+#define MLX4_OWNER_SIZE 4
+
+static int mlx4_get_ownership(struct mlx4_dev *dev)
+{
+ void __iomem *owner;
+ u32 ret;
+
+ if (pci_channel_offline(dev->pdev))
+ return -EIO;
+
+ owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
+ MLX4_OWNER_SIZE);
+ if (!owner) {
+ mlx4_err(dev, "Failed to obtain ownership bit\n");
+ return -ENOMEM;
+ }
+
+ ret = readl(owner);
+ iounmap(owner);
+ return (int) !!ret;
}
-static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
+static void mlx4_free_ownership(struct mlx4_dev *dev)
+{
+ void __iomem *owner;
+
+ if (pci_channel_offline(dev->pdev))
+ return;
+
+ owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
+ MLX4_OWNER_SIZE);
+ if (!owner) {
+ mlx4_err(dev, "Failed to obtain ownership bit\n");
+ return;
+ }
+ writel(0, owner);
+ msleep(1000);
+ iounmap(owner);
+}
+
+static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
{
struct mlx4_priv *priv;
struct mlx4_dev *dev;
int err;
int port;
- int i;
- printk(KERN_INFO PFX "Initializing %s\n",
- pci_name(pdev));
+ pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
err = pci_enable_device(pdev);
if (err) {
@@ -1351,13 +2246,24 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
"aborting.\n");
return err;
}
+ if (num_vfs > MLX4_MAX_NUM_VF) {
+ dev_err(&pdev->dev, "There are more VF's (%d) than allowed(%d)\n",
+ num_vfs, MLX4_MAX_NUM_VF);
+ return -EINVAL;
+ }
+ if (num_vfs < 0) {
+ dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
+ return -EINVAL;
+ }
/*
- * Check for BARs. We expect 0: 1MB
+ * Check for BARs.
*/
- if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
- pci_resource_len(pdev, 0) != 1 << 20) {
- dev_err(&pdev->dev, "Missing DCS, aborting.\n");
+ if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
+ !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+ dev_err(&pdev->dev, "Missing DCS, aborting."
+ "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%x)\n",
+ pci_dev_data, pci_resource_flags(pdev, 0));
err = -ENODEV;
goto err_disable_pdev;
}
@@ -1367,18 +2273,12 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_disable_pdev;
}
- err = pci_request_region(pdev, 0, DRV_NAME);
+ err = pci_request_regions(pdev, DRV_NAME);
if (err) {
- dev_err(&pdev->dev, "Cannot request control region, aborting.\n");
+ dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
goto err_disable_pdev;
}
- err = pci_request_region(pdev, 2, DRV_NAME);
- if (err) {
- dev_err(&pdev->dev, "Cannot request UAR region, aborting.\n");
- goto err_release_bar0;
- }
-
pci_set_master(pdev);
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
@@ -1387,7 +2287,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
- goto err_release_bar2;
+ goto err_release_regions;
}
}
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
@@ -1398,16 +2298,19 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
if (err) {
dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
"aborting.\n");
- goto err_release_bar2;
+ goto err_release_regions;
}
}
+ /* Allow large DMA segments, up to the firmware limit of 1 GB */
+ dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
+
priv = kzalloc(sizeof *priv, GFP_KERNEL);
if (!priv) {
dev_err(&pdev->dev, "Device struct alloc failed, "
"aborting.\n");
err = -ENOMEM;
- goto err_release_bar2;
+ goto err_release_regions;
}
dev = &priv->dev;
@@ -1419,47 +2322,154 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
INIT_LIST_HEAD(&priv->pgdir_list);
mutex_init(&priv->pgdir_mutex);
- for (i = 0; i < MLX4_MAX_PORTS; ++i)
- priv->iboe_counter_index[i] = -1;
INIT_LIST_HEAD(&priv->bf_list);
mutex_init(&priv->bf_mutex);
- /*
- * Now reset the HCA before we touch the PCI capabilities or
- * attempt a firmware command, since a boot ROM may have left
- * the HCA in an undefined state.
- */
- err = mlx4_reset(dev);
- if (err) {
- mlx4_err(dev, "Failed to reset HCA, aborting.\n");
- goto err_free_dev;
+ dev->rev_id = pdev->revision;
+ dev->numa_node = dev_to_node(&pdev->dev);
+ /* Detect if this device is a virtual function */
+ if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
+ /* When acting as pf, we normally skip vfs unless explicitly
+ * requested to probe them. */
+ if (num_vfs && extended_func_num(pdev) > probe_vf) {
+ mlx4_warn(dev, "Skipping virtual function:%d\n",
+ extended_func_num(pdev));
+ err = -ENODEV;
+ goto err_free_dev;
+ }
+ mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
+ dev->flags |= MLX4_FLAG_SLAVE;
+ } else {
+ /* We reset the device and enable SRIOV only for physical
+ * devices. Try to claim ownership on the device;
+ * if already taken, skip -- do not allow multiple PFs */
+ err = mlx4_get_ownership(dev);
+ if (err) {
+ if (err < 0)
+ goto err_free_dev;
+ else {
+ mlx4_warn(dev, "Multiple PFs not yet supported."
+ " Skipping PF.\n");
+ err = -EINVAL;
+ goto err_free_dev;
+ }
+ }
+
+ if (num_vfs) {
+ mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", num_vfs);
+ err = pci_enable_sriov(pdev, num_vfs);
+ if (err) {
+ mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n",
+ err);
+ err = 0;
+ } else {
+ mlx4_warn(dev, "Running in master mode\n");
+ dev->flags |= MLX4_FLAG_SRIOV |
+ MLX4_FLAG_MASTER;
+ dev->num_vfs = num_vfs;
+ }
+ }
+
+ atomic_set(&priv->opreq_count, 0);
+ INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
+
+ /*
+ * Now reset the HCA before we touch the PCI capabilities or
+ * attempt a firmware command, since a boot ROM may have left
+ * the HCA in an undefined state.
+ */
+ err = mlx4_reset(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to reset HCA, aborting.\n");
+ goto err_sriov;
+ }
}
- if (mlx4_cmd_init(dev)) {
+slave_start:
+ err = mlx4_cmd_init(dev);
+ if (err) {
mlx4_err(dev, "Failed to init command interface, aborting.\n");
- goto err_free_dev;
+ goto err_sriov;
+ }
+
+ /* In slave functions, the communication channel must be initialized
+ * before posting commands. Also, init num_slaves before calling
+ * mlx4_init_hca */
+ if (mlx4_is_mfunc(dev)) {
+ if (mlx4_is_master(dev))
+ dev->num_slaves = MLX4_MAX_NUM_SLAVES;
+ else {
+ dev->num_slaves = 0;
+ err = mlx4_multi_func_init(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to init slave mfunc"
+ " interface, aborting.\n");
+ goto err_cmd;
+ }
+ }
}
err = mlx4_init_hca(dev);
- if (err)
- goto err_cmd;
+ if (err) {
+ if (err == -EACCES) {
+ /* Not primary Physical function
+ * Running in slave mode */
+ mlx4_cmd_cleanup(dev);
+ dev->flags |= MLX4_FLAG_SLAVE;
+ dev->flags &= ~MLX4_FLAG_MASTER;
+ goto slave_start;
+ } else
+ goto err_mfunc;
+ }
+
+ /* In master functions, the communication channel must be initialized
+ * after obtaining its address from fw */
+ if (mlx4_is_master(dev)) {
+ err = mlx4_multi_func_init(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to init master mfunc"
+ "interface, aborting.\n");
+ goto err_close;
+ }
+ }
err = mlx4_alloc_eq_table(dev);
if (err)
- goto err_close;
+ goto err_master_mfunc;
+
+ priv->msix_ctl.pool_bm = 0;
+ mutex_init(&priv->msix_ctl.pool_lock);
mlx4_enable_msi_x(dev);
+ if ((mlx4_is_mfunc(dev)) &&
+ !(dev->flags & MLX4_FLAG_MSI_X)) {
+ err = -ENOSYS;
+ mlx4_err(dev, "INTx is not supported in multi-function mode."
+ " aborting.\n");
+ goto err_free_eq;
+ }
+
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_init_steering(dev);
+ if (err)
+ goto err_free_eq;
+ }
err = mlx4_setup_hca(dev);
- if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X)) {
+ if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
+ !mlx4_is_mfunc(dev)) {
dev->flags &= ~MLX4_FLAG_MSI_X;
+ dev->caps.num_comp_vectors = 1;
+ dev->caps.comp_pool = 0;
pci_disable_msix(pdev);
err = mlx4_setup_hca(dev);
}
if (err)
- goto err_free_eq;
+ goto err_steer;
+
+ mlx4_init_quotas(dev);
for (port = 1; port <= dev->caps.num_ports; port++) {
err = mlx4_init_port_info(dev, port);
@@ -1471,24 +2481,17 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
if (err)
goto err_port;
- err = mlx4_init_trigger(priv);
- if (err)
- goto err_register;
-
err = mlx4_sense_init(dev);
if (err)
- goto err_trigger;
+ goto err_port;
mlx4_start_sense(dev);
+ priv->pci_dev_data = pci_dev_data;
pci_set_drvdata(pdev, dev);
return 0;
-err_trigger:
- device_remove_file(&dev->pdev->dev, &priv->trigger_attr);
-err_register:
- mlx4_unregister_device(dev);
err_port:
for (--port; port >= 1; --port)
mlx4_cleanup_port_info(&priv->port[port]);
@@ -1505,26 +2508,49 @@ err_port:
mlx4_cleanup_pd_table(dev);
mlx4_cleanup_uar_table(dev);
+err_steer:
+ if (!mlx4_is_slave(dev))
+ mlx4_clear_steering(dev);
+
err_free_eq:
mlx4_free_eq_table(dev);
+err_master_mfunc:
+ if (mlx4_is_master(dev))
+ mlx4_multi_func_cleanup(dev);
+
+ if (mlx4_is_slave(dev)) {
+ kfree(dev->caps.qp0_tunnel);
+ kfree(dev->caps.qp0_proxy);
+ kfree(dev->caps.qp1_tunnel);
+ kfree(dev->caps.qp1_proxy);
+ }
+
err_close:
if (dev->flags & MLX4_FLAG_MSI_X)
pci_disable_msix(pdev);
mlx4_close_hca(dev);
+err_mfunc:
+ if (mlx4_is_slave(dev))
+ mlx4_multi_func_cleanup(dev);
+
err_cmd:
mlx4_cmd_cleanup(dev);
+err_sriov:
+ if (dev->flags & MLX4_FLAG_SRIOV)
+ pci_disable_sriov(pdev);
+
+ if (!mlx4_is_slave(dev))
+ mlx4_free_ownership(dev);
+
err_free_dev:
kfree(priv);
-err_release_bar2:
- pci_release_region(pdev, 2);
-
-err_release_bar0:
- pci_release_region(pdev, 0);
+err_release_regions:
+ pci_release_regions(pdev);
err_disable_pdev:
pci_disable_device(pdev);
@@ -1535,14 +2561,9 @@ err_disable_pdev:
static int __devinit mlx4_init_one(struct pci_dev *pdev,
const struct pci_device_id *id)
{
- static int mlx4_version_printed;
+ printk_once(KERN_INFO "%s", mlx4_version);
- if (!mlx4_version_printed) {
- printk(KERN_INFO "%s", mlx4_version);
- ++mlx4_version_printed;
- }
-
- return __mlx4_init_one(pdev, id);
+ return __mlx4_init_one(pdev, id->driver_data);
}
static void mlx4_remove_one(struct pci_dev *pdev)
@@ -1552,16 +2573,25 @@ static void mlx4_remove_one(struct pci_dev *pdev)
int p;
if (dev) {
+ /* in SRIOV it is not allowed to unload the pf's
+ * driver while there are alive vf's */
+ if (mlx4_is_master(dev)) {
+ if (mlx4_how_many_lives_vf(dev))
+ mlx4_err(dev, "Removing PF when there are assigned VF's !!!\n");
+ }
mlx4_sense_cleanup(dev);
mlx4_unregister_device(dev);
- device_remove_file(&dev->pdev->dev, &priv->trigger_attr);
for (p = 1; p <= dev->caps.num_ports; p++) {
mlx4_cleanup_port_info(&priv->port[p]);
mlx4_CLOSE_PORT(dev, p);
}
- mlx4_cleanup_counters_table(dev);
+ if (mlx4_is_master(dev))
+ mlx4_free_resource_tracker(dev,
+ RES_TR_FREE_SLAVES_ONLY);
+
+ mlx4_cleanup_counters_table(dev);
mlx4_cleanup_mcg_table(dev);
mlx4_cleanup_qp_table(dev);
mlx4_cleanup_srq_table(dev);
@@ -1572,19 +2602,40 @@ static void mlx4_remove_one(struct pci_dev *pdev)
mlx4_cleanup_xrcd_table(dev);
mlx4_cleanup_pd_table(dev);
+ if (mlx4_is_master(dev))
+ mlx4_free_resource_tracker(dev,
+ RES_TR_FREE_STRUCTS_ONLY);
+
iounmap(priv->kar);
mlx4_uar_free(dev, &priv->driver_uar);
mlx4_cleanup_uar_table(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_clear_steering(dev);
mlx4_free_eq_table(dev);
+ if (mlx4_is_master(dev))
+ mlx4_multi_func_cleanup(dev);
mlx4_close_hca(dev);
+ if (mlx4_is_slave(dev))
+ mlx4_multi_func_cleanup(dev);
mlx4_cmd_cleanup(dev);
if (dev->flags & MLX4_FLAG_MSI_X)
pci_disable_msix(pdev);
+ if (dev->flags & MLX4_FLAG_SRIOV) {
+ mlx4_warn(dev, "Disabling SR-IOV\n");
+ pci_disable_sriov(pdev);
+ }
+
+ if (!mlx4_is_slave(dev))
+ mlx4_free_ownership(dev);
+
+ kfree(dev->caps.qp0_tunnel);
+ kfree(dev->caps.qp0_proxy);
+ kfree(dev->caps.qp1_tunnel);
+ kfree(dev->caps.qp1_proxy);
kfree(priv);
- pci_release_region(pdev, 2);
- pci_release_region(pdev, 0);
+ pci_release_regions(pdev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
}
@@ -1592,66 +2643,187 @@ static void mlx4_remove_one(struct pci_dev *pdev)
int mlx4_restart_one(struct pci_dev *pdev)
{
+ struct mlx4_dev *dev = pci_get_drvdata(pdev);
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int pci_dev_data;
+
+ pci_dev_data = priv->pci_dev_data;
mlx4_remove_one(pdev);
- return __mlx4_init_one(pdev, NULL);
+ return __mlx4_init_one(pdev, pci_dev_data);
}
-static struct pci_device_id mlx4_pci_table[] = {
- { PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */
- { PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */
- { PCI_VDEVICE(MELLANOX, 0x6354) }, /* MT25408 "Hermon" QDR */
- { PCI_VDEVICE(MELLANOX, 0x6732) }, /* MT25408 "Hermon" DDR PCIe gen2 */
- { PCI_VDEVICE(MELLANOX, 0x673c) }, /* MT25408 "Hermon" QDR PCIe gen2 */
- { PCI_VDEVICE(MELLANOX, 0x6368) }, /* MT25408 "Hermon" EN 10GigE */
- { PCI_VDEVICE(MELLANOX, 0x6750) }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
- { PCI_VDEVICE(MELLANOX, 0x6372) }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */
- { PCI_VDEVICE(MELLANOX, 0x675a) }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
- { PCI_VDEVICE(MELLANOX, 0x6764) }, /* MT26468 ConnectX EN 10GigE PCIe gen2 */
- { PCI_VDEVICE(MELLANOX, 0x6746) }, /* MT26438 ConnectX VPI PCIe 2.0 5GT/s - IB QDR / 10GigE Virt+ */
- { PCI_VDEVICE(MELLANOX, 0x676e) }, /* MT26478 ConnectX EN 40GigE PCIe 2.0 5GT/s */
- { PCI_VDEVICE(MELLANOX, 0x6778) }, /* MT26488 ConnectX VPI PCIe 2.0 5GT/s - IB DDR / 10GigE Virt+ */
- { PCI_VDEVICE(MELLANOX, 0x1000) },
- { PCI_VDEVICE(MELLANOX, 0x1001) },
- { PCI_VDEVICE(MELLANOX, 0x1002) },
- { PCI_VDEVICE(MELLANOX, 0x1003) },
- { PCI_VDEVICE(MELLANOX, 0x1004) },
- { PCI_VDEVICE(MELLANOX, 0x1005) },
- { PCI_VDEVICE(MELLANOX, 0x1006) },
- { PCI_VDEVICE(MELLANOX, 0x1007) },
- { PCI_VDEVICE(MELLANOX, 0x1008) },
- { PCI_VDEVICE(MELLANOX, 0x1009) },
- { PCI_VDEVICE(MELLANOX, 0x100a) },
- { PCI_VDEVICE(MELLANOX, 0x100b) },
- { PCI_VDEVICE(MELLANOX, 0x100c) },
- { PCI_VDEVICE(MELLANOX, 0x100d) },
- { PCI_VDEVICE(MELLANOX, 0x100e) },
- { PCI_VDEVICE(MELLANOX, 0x100f) },
+static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = {
+ /* MT25408 "Hermon" SDR */
+ { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" DDR */
+ { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" QDR */
+ { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" DDR PCIe gen2 */
+ { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" QDR PCIe gen2 */
+ { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" EN 10GigE */
+ { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
+ { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25458 ConnectX EN 10GBASE-T 10GigE */
+ { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
+ { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT26468 ConnectX EN 10GigE PCIe gen2*/
+ { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
+ { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT26478 ConnectX2 40GigE PCIe gen2 */
+ { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT },
+ /* MT25400 Family [ConnectX-2 Virtual Function] */
+ { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF },
+ /* MT27500 Family [ConnectX-3] */
+ { PCI_VDEVICE(MELLANOX, 0x1003), 0 },
+ /* MT27500 Family [ConnectX-3 Virtual Function] */
+ { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF },
+ { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
+ { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
+ { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
+ { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */
+ { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */
+ { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */
+ { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */
{ 0, }
};
MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
+static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ mlx4_remove_one(pdev);
+
+ return state == pci_channel_io_perm_failure ?
+ PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
+{
+ int ret = __mlx4_init_one(pdev, 0);
+
+ return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
+}
+
+static const struct pci_error_handlers mlx4_err_handler = {
+ .error_detected = mlx4_pci_err_detected,
+ .slot_reset = mlx4_pci_slot_reset,
+};
+
+static int suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ mlx4_remove_one(pdev);
+
+ if (mlx4_log_num_mgm_entry_size != -1 &&
+ (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
+ mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) {
+ pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not "
+ "in legal range (-1 or %d..%d)\n",
+ mlx4_log_num_mgm_entry_size,
+ MLX4_MIN_MGM_LOG_ENTRY_SIZE,
+ MLX4_MAX_MGM_LOG_ENTRY_SIZE);
+ return -1;
+ }
+ return 0;
+}
+
+static int resume(struct pci_dev *pdev)
+{
+ return __mlx4_init_one(pdev, 0);
+}
+
static struct pci_driver mlx4_driver = {
.name = DRV_NAME,
- .id_table = mlx4_pci_table,
+ .id_table = (struct pci_device_id*)mlx4_pci_table,
.probe = mlx4_init_one,
- .remove = __devexit_p(mlx4_remove_one)
+ .remove = __devexit_p(mlx4_remove_one),
+ .suspend = suspend,
+ .resume = resume,
+ .err_handler = (struct pci_error_handlers*)&mlx4_err_handler,
};
static int __init mlx4_verify_params(void)
{
if ((log_num_mac < 0) || (log_num_mac > 7)) {
- printk(KERN_WARNING "mlx4_core: bad num_mac: %d\n", log_num_mac);
+ pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac);
+ return -1;
+ }
+
+ if (log_num_vlan != 0)
+ pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
+ MLX4_LOG_NUM_VLANS);
+
+ if (mlx4_set_4k_mtu != -1)
+ pr_warning("mlx4_core: set_4k_mtu - obsolete module param\n");
+
+ if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) {
+ pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
+ return -1;
+ }
+
+ /* Check if module param for ports type has legal combination */
+ if (port_type_array[0] == false && port_type_array[1] == true) {
+ pr_warning("mlx4_core: module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
+ port_type_array[0] = true;
+ }
+
+ if (mlx4_log_num_mgm_entry_size != -1 &&
+ (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
+ mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) {
+ pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not "
+ "in legal range (-1 or %d..%d)\n",
+ mlx4_log_num_mgm_entry_size,
+ MLX4_MIN_MGM_LOG_ENTRY_SIZE,
+ MLX4_MAX_MGM_LOG_ENTRY_SIZE);
+ return -1;
+ }
+
+ if (mod_param_profile.num_qp < 18 || mod_param_profile.num_qp > 23) {
+ pr_warning("mlx4_core: bad log_num_qp: %d\n",
+ mod_param_profile.num_qp);
+ return -1;
+ }
+
+ if (mod_param_profile.num_srq < 10) {
+ pr_warning("mlx4_core: too low log_num_srq: %d\n",
+ mod_param_profile.num_srq);
return -1;
}
- if (log_mtts_per_seg == 0)
- log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
- if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
- printk(KERN_WARNING "mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
+ if (mod_param_profile.num_cq < 10) {
+ pr_warning("mlx4_core: too low log_num_cq: %d\n",
+ mod_param_profile.num_cq);
return -1;
}
+ if (mod_param_profile.num_mpt < 10) {
+ pr_warning("mlx4_core: too low log_num_mpt: %d\n",
+ mod_param_profile.num_mpt);
+ return -1;
+ }
+
+ if (mod_param_profile.num_mtt && mod_param_profile.num_mtt < 15) {
+ pr_warning("mlx4_core: too low log_num_mtt: %d\n",
+ mod_param_profile.num_mtt);
+ return -1;
+ }
+
+ if (mod_param_profile.num_mtt > MLX4_MAX_LOG_NUM_MTT) {
+ pr_warning("mlx4_core: too high log_num_mtt: %d\n",
+ mod_param_profile.num_mtt);
+ return -1;
+ }
return 0;
}
@@ -1659,8 +2831,6 @@ static int __init mlx4_init(void)
{
int ret;
- mutex_init(&drv_mutex);
-
if (mlx4_verify_params())
return -EINVAL;
@@ -1670,20 +2840,26 @@ static int __init mlx4_init(void)
if (!mlx4_wq)
return -ENOMEM;
+ if (enable_sys_tune)
+ sys_tune_init();
+
ret = pci_register_driver(&mlx4_driver);
+ if (ret < 0 && enable_sys_tune)
+ sys_tune_fini();
+
return ret < 0 ? ret : 0;
}
static void __exit mlx4_cleanup(void)
{
- mutex_lock(&drv_mutex);
- mlx4_config_cleanup();
+ if (enable_sys_tune)
+ sys_tune_fini();
+
pci_unregister_driver(&mlx4_driver);
- mutex_unlock(&drv_mutex);
destroy_workqueue(mlx4_wq);
}
-module_init_order(mlx4_init, SI_ORDER_MIDDLE);
+module_init(mlx4_init);
module_exit(mlx4_cleanup);
#undef MODULE_VERSION
OpenPOWER on IntegriCloud