summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/hfi1
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile2
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.h15
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c590
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h20
-rw-r--r--drivers/infiniband/hw/hfi1/common.h15
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c238
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.h62
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c128
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c31
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c14
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h95
-rw-r--r--drivers/infiniband/hw/hfi1/init.c69
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c27
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c95
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c2
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c19
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h34
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c12
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c55
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c137
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c43
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h46
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c4
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c5
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h8
-rw-r--r--drivers/infiniband/hw/hfi1/trace_misc.h48
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rc.h7
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tx.h43
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c14
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c73
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c16
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c5
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c22
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c153
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h15
-rw-r--r--drivers/infiniband/hw/hfi1/vnic.h184
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c907
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c323
38 files changed, 3006 insertions, 570 deletions
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 0cf97a0..88085f6 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
- verbs_txreq.o
+ verbs_txreq.o vnic_main.o vnic_sdma.o
hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h
index 0d58fe3..794e681 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -229,14 +229,17 @@ static inline void aspm_ctx_timer_function(unsigned long data)
spin_unlock_irqrestore(&rcd->aspm_lock, flags);
}
-/* Disable interrupt processing for verbs contexts when PSM contexts are open */
+/*
+ * Disable interrupt processing for verbs contexts when PSM or VNIC contexts
+ * are open.
+ */
static inline void aspm_disable_all(struct hfi1_devdata *dd)
{
struct hfi1_ctxtdata *rcd;
unsigned long flags;
unsigned i;
- for (i = 0; i < dd->first_user_ctxt; i++) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
del_timer_sync(&rcd->aspm_timer);
spin_lock_irqsave(&rcd->aspm_lock, flags);
@@ -260,7 +263,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd)
if (aspm_mode != ASPM_MODE_DYNAMIC)
return;
- for (i = 0; i < dd->first_user_ctxt; i++) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
spin_lock_irqsave(&rcd->aspm_lock, flags);
rcd->aspm_intr_enable = true;
@@ -276,7 +279,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
(unsigned long)rcd);
rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
aspm_mode == ASPM_MODE_DYNAMIC &&
- rcd->ctxt < rcd->dd->first_user_ctxt;
+ rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt;
}
static inline void aspm_init(struct hfi1_devdata *dd)
@@ -286,7 +289,7 @@ static inline void aspm_init(struct hfi1_devdata *dd)
spin_lock_init(&dd->aspm_lock);
dd->aspm_supported = aspm_hw_l1_supported(dd);
- for (i = 0; i < dd->first_user_ctxt; i++)
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++)
aspm_ctx_init(dd->rcd[i]);
/* Start with ASPM disabled */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 121a4c9..0f6916d 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -64,6 +64,7 @@
#include "platform.h"
#include "aspm.h"
#include "affinity.h"
+#include "debugfs.h"
#define NUM_IB_PORTS 1
@@ -125,9 +126,16 @@ struct flag_table {
#define DEFAULT_KRCVQS 2
#define MIN_KERNEL_KCTXTS 2
#define FIRST_KERNEL_KCTXT 1
-/* sizes for both the QP and RSM map tables */
-#define NUM_MAP_ENTRIES 256
-#define NUM_MAP_REGS 32
+
+/*
+ * RSM instance allocation
+ * 0 - Verbs
+ * 1 - User Fecn Handling
+ * 2 - Vnic
+ */
+#define RSM_INS_VERBS 0
+#define RSM_INS_FECN 1
+#define RSM_INS_VNIC 2
/* Bit offset into the GUID which carries HFI id information */
#define GUID_HFI_INDEX_SHIFT 39
@@ -138,8 +146,7 @@ struct flag_table {
#define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3)
#define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4)
-/* RSM fields */
-
+/* RSM fields for Verbs */
/* packet type */
#define IB_PACKET_TYPE 2ull
#define QW_SHIFT 6ull
@@ -169,6 +176,28 @@ struct flag_table {
/* QPN[m+n:1] QW 1, OFFSET 1 */
#define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull))
+/* RSM fields for Vnic */
+/* L2_TYPE: QW 0, OFFSET 61 - for match */
+#define L2_TYPE_QW 0ull
+#define L2_TYPE_BIT_OFFSET 61ull
+#define L2_TYPE_OFFSET(off) ((L2_TYPE_QW << QW_SHIFT) | (off))
+#define L2_TYPE_MATCH_OFFSET L2_TYPE_OFFSET(L2_TYPE_BIT_OFFSET)
+#define L2_TYPE_MASK 3ull
+#define L2_16B_VALUE 2ull
+
+/* L4_TYPE QW 1, OFFSET 0 - for match */
+#define L4_TYPE_QW 1ull
+#define L4_TYPE_BIT_OFFSET 0ull
+#define L4_TYPE_OFFSET(off) ((L4_TYPE_QW << QW_SHIFT) | (off))
+#define L4_TYPE_MATCH_OFFSET L4_TYPE_OFFSET(L4_TYPE_BIT_OFFSET)
+#define L4_16B_TYPE_MASK 0xFFull
+#define L4_16B_ETH_VALUE 0x78ull
+
+/* 16B VESWID - for select */
+#define L4_16B_HDR_VESWID_OFFSET ((2 << QW_SHIFT) | (16ull))
+/* 16B ENTROPY - for select */
+#define L2_16B_ENTROPY_OFFSET ((1 << QW_SHIFT) | (32ull))
+
/* defines to build power on SC2VL table */
#define SC2VL_VAL( \
num, \
@@ -1045,6 +1074,8 @@ static void dc_start(struct hfi1_devdata *);
static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
unsigned int *np);
static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
+static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms);
+static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index);
/*
* Error interrupt table entry. This is used as input to the interrupt
@@ -6379,18 +6410,17 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
*
* The expectation is that the caller of this routine would have taken
* care of properly transitioning the link into the correct state.
+ * NOTE: the caller needs to acquire the dd->dc8051_lock lock
+ * before calling this function.
*/
-static void dc_shutdown(struct hfi1_devdata *dd)
+static void _dc_shutdown(struct hfi1_devdata *dd)
{
- unsigned long flags;
+ lockdep_assert_held(&dd->dc8051_lock);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
- if (dd->dc_shutdown) {
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+ if (dd->dc_shutdown)
return;
- }
+
dd->dc_shutdown = 1;
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
/* Shutdown the LCB */
lcb_shutdown(dd, 1);
/*
@@ -6401,35 +6431,45 @@ static void dc_shutdown(struct hfi1_devdata *dd)
write_csr(dd, DC_DC8051_CFG_RST, 0x1);
}
+static void dc_shutdown(struct hfi1_devdata *dd)
+{
+ mutex_lock(&dd->dc8051_lock);
+ _dc_shutdown(dd);
+ mutex_unlock(&dd->dc8051_lock);
+}
+
/*
* Calling this after the DC has been brought out of reset should not
* do any damage.
+ * NOTE: the caller needs to acquire the dd->dc8051_lock lock
+ * before calling this function.
*/
-static void dc_start(struct hfi1_devdata *dd)
+static void _dc_start(struct hfi1_devdata *dd)
{
- unsigned long flags;
- int ret;
+ lockdep_assert_held(&dd->dc8051_lock);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
if (!dd->dc_shutdown)
- goto done;
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+ return;
+
/* Take the 8051 out of reset */
write_csr(dd, DC_DC8051_CFG_RST, 0ull);
/* Wait until 8051 is ready */
- ret = wait_fm_ready(dd, TIMEOUT_8051_START);
- if (ret) {
+ if (wait_fm_ready(dd, TIMEOUT_8051_START))
dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
__func__);
- }
+
/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
write_csr(dd, DCC_CFG_RESET, 0x10);
/* lcb_shutdown() with abort=1 does not restore these */
write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
dd->dc_shutdown = 0;
-done:
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+}
+
+static void dc_start(struct hfi1_devdata *dd)
+{
+ mutex_lock(&dd->dc8051_lock);
+ _dc_start(dd);
+ mutex_unlock(&dd->dc8051_lock);
}
/*
@@ -6701,7 +6741,13 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
int i;
/* enable all kernel contexts */
- for (i = 0; i < dd->n_krcv_queues; i++) {
+ for (i = 0; i < dd->num_rcv_contexts; i++) {
+ struct hfi1_ctxtdata *rcd = dd->rcd[i];
+
+ /* Ensure all non-user contexts(including vnic) are enabled */
+ if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER))
+ continue;
+
rcvmask = HFI1_RCVCTRL_CTXT_ENB;
/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
@@ -7077,7 +7123,7 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
{
struct hfi1_devdata *dd = ppd->dd;
- /* Sanity check - ppd->pkeys[2] should be 0, or already initalized */
+ /* Sanity check - ppd->pkeys[2] should be 0, or already initialized */
if (!((ppd->pkeys[2] == 0) || (ppd->pkeys[2] == FULL_MGMT_P_KEY)))
dd_dev_warn(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
__func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
@@ -7165,7 +7211,7 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
* set the max_rate field in handle_verify_cap until v0.19.
*/
if ((dd->icode == ICODE_RTL_SILICON) &&
- (dd->dc8051_ver < dc8051_ver(0, 19))) {
+ (dd->dc8051_ver < dc8051_ver(0, 19, 0))) {
/* max_rate: 0 = 12.5G, 1 = 25G */
switch (max_rate) {
case 0:
@@ -7277,15 +7323,6 @@ void handle_verify_cap(struct work_struct *work)
lcb_shutdown(dd, 0);
adjust_lcb_for_fpga_serdes(dd);
- /*
- * These are now valid:
- * remote VerifyCap fields in the general LNI config
- * CSR DC8051_STS_REMOTE_GUID
- * CSR DC8051_STS_REMOTE_NODE_TYPE
- * CSR DC8051_STS_REMOTE_FM_SECURITY
- * CSR DC8051_STS_REMOTE_PORT_NO
- */
-
read_vc_remote_phy(dd, &power_management, &continious);
read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf,
&partner_supported_crc);
@@ -7350,7 +7387,7 @@ void handle_verify_cap(struct work_struct *work)
}
ppd->link_speed_active = 0; /* invalid value */
- if (dd->dc8051_ver < dc8051_ver(0, 20)) {
+ if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) {
/* remote_tx_rate: 0 = 12.5G, 1 = 25G */
switch (remote_tx_rate) {
case 0:
@@ -7416,20 +7453,6 @@ void handle_verify_cap(struct work_struct *work)
write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
set_8051_lcb_access(dd);
- ppd->neighbor_guid =
- read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
- ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
- DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
- ppd->neighbor_type =
- read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
- DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
- ppd->neighbor_fm_security =
- read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
- DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
- dd_dev_info(dd,
- "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
- ppd->neighbor_guid, ppd->neighbor_type,
- ppd->mgmt_allowed, ppd->neighbor_fm_security);
if (ppd->mgmt_allowed)
add_full_mgmt_pkey(ppd);
@@ -7897,6 +7920,9 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
}
+ if (unlikely(hfi1_dbg_fault_suppress_err(&dd->verbs_dev)))
+ reg &= ~DCC_ERR_FLG_LATE_EBP_ERR_SMASK;
+
/* report any remaining errors */
if (reg)
dd_dev_info_ratelimited(dd, "DCC Error: %s\n",
@@ -7995,7 +8021,9 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
if (likely(source < dd->num_rcv_contexts)) {
rcd = dd->rcd[source];
if (rcd) {
- if (source < dd->first_user_ctxt)
+ /* Check for non-user contexts, including vnic */
+ if ((source < dd->first_dyn_alloc_ctxt) ||
+ (rcd->sc && (rcd->sc->type == SC_KERNEL)))
rcd->do_interrupt(rcd, 0);
else
handle_user_interrupt(rcd);
@@ -8023,7 +8051,8 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
rcd = dd->rcd[source];
if (rcd) {
/* only pay attention to user urgent interrupts */
- if (source >= dd->first_user_ctxt)
+ if ((source >= dd->first_dyn_alloc_ctxt) &&
+ (!rcd->sc || (rcd->sc->type == SC_USER)))
handle_user_interrupt(rcd);
return; /* OK */
}
@@ -8156,10 +8185,10 @@ static irqreturn_t sdma_interrupt(int irq, void *data)
/* handle the interrupt(s) */
sdma_engine_interrupt(sde, status);
- } else
+ } else {
dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
sde->this_idx);
-
+ }
return IRQ_HANDLED;
}
@@ -8344,6 +8373,52 @@ static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
}
/*
+ * Provide a cache for some of the LCB registers in case the LCB is
+ * unavailable.
+ * (The LCB is unavailable in certain link states, for example.)
+ */
+struct lcb_datum {
+ u32 off;
+ u64 val;
+};
+
+static struct lcb_datum lcb_cache[] = {
+ { DC_LCB_ERR_INFO_RX_REPLAY_CNT, 0},
+ { DC_LCB_ERR_INFO_SEQ_CRC_CNT, 0 },
+ { DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT, 0 },
+};
+
+static void update_lcb_cache(struct hfi1_devdata *dd)
+{
+ int i;
+ int ret;
+ u64 val;
+
+ for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
+ ret = read_lcb_csr(dd, lcb_cache[i].off, &val);
+
+ /* Update if we get good data */
+ if (likely(ret != -EBUSY))
+ lcb_cache[i].val = val;
+ }
+}
+
+static int read_lcb_cache(u32 off, u64 *val)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
+ if (lcb_cache[i].off == off) {
+ *val = lcb_cache[i].val;
+ return 0;
+ }
+ }
+
+ pr_warn("%s bad offset 0x%x\n", __func__, off);
+ return -1;
+}
+
+/*
* Read an LCB CSR. Access may not be in host control, so check.
* Return 0 on success, -EBUSY on failure.
*/
@@ -8354,9 +8429,13 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
/* if up, go through the 8051 for the value */
if (ppd->host_link_state & HLS_UP)
return read_lcb_via_8051(dd, addr, data);
- /* if going up or down, no access */
- if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
- return -EBUSY;
+ /* if going up or down, check the cache, otherwise, no access */
+ if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE)) {
+ if (read_lcb_cache(addr, data))
+ return -EBUSY;
+ return 0;
+ }
+
/* otherwise, host has access */
*data = read_csr(dd, addr);
return 0;
@@ -8371,7 +8450,7 @@ static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
int ret;
if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR ||
- (dd->dc8051_ver < dc8051_ver(0, 20))) {
+ (dd->dc8051_ver < dc8051_ver(0, 20, 0))) {
if (acquire_lcb_access(dd, 0) == 0) {
write_csr(dd, addr, data);
release_lcb_access(dd, 0);
@@ -8420,16 +8499,11 @@ static int do_8051_command(
{
u64 reg, completed;
int return_code;
- unsigned long flags;
unsigned long timeout;
hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
- /*
- * Alternative to holding the lock for a long time:
- * - keep busy wait - have other users bounce off
- */
- spin_lock_irqsave(&dd->dc8051_lock, flags);
+ mutex_lock(&dd->dc8051_lock);
/* We can't send any commands to the 8051 if it's in reset */
if (dd->dc_shutdown) {
@@ -8455,10 +8529,8 @@ static int do_8051_command(
return_code = -ENXIO;
goto fail;
}
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
- dc_shutdown(dd);
- dc_start(dd);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
+ _dc_shutdown(dd);
+ _dc_start(dd);
}
/*
@@ -8539,8 +8611,7 @@ static int do_8051_command(
write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
fail:
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
-
+ mutex_unlock(&dd->dc8051_lock);
return return_code;
}
@@ -8677,13 +8748,20 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
& REMOTE_DEVICE_REV_MASK;
}
-void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
+void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
+ u8 *ver_patch)
{
u32 frame;
read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
- *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
- *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
+ *ver_major = (frame >> STS_FM_VERSION_MAJOR_SHIFT) &
+ STS_FM_VERSION_MAJOR_MASK;
+ *ver_minor = (frame >> STS_FM_VERSION_MINOR_SHIFT) &
+ STS_FM_VERSION_MINOR_MASK;
+
+ read_8051_config(dd, VERSION_PATCH, GENERAL_CONFIG, &frame);
+ *ver_patch = (frame >> STS_FM_VERSION_PATCH_SHIFT) &
+ STS_FM_VERSION_PATCH_MASK;
}
static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
@@ -8891,8 +8969,6 @@ int send_idle_sma(struct hfi1_devdata *dd, u64 message)
*/
static int do_quick_linkup(struct hfi1_devdata *dd)
{
- u64 reg;
- unsigned long timeout;
int ret;
lcb_shutdown(dd, 0);
@@ -8915,19 +8991,9 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
write_csr(dd, DC_LCB_CFG_RUN,
1ull << DC_LCB_CFG_RUN_EN_SHIFT);
- /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
- timeout = jiffies + msecs_to_jiffies(10);
- while (1) {
- reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
- if (reg)
- break;
- if (time_after(jiffies, timeout)) {
- dd_dev_err(dd,
- "timeout waiting for LINK_TRANSFER_ACTIVE\n");
- return -ETIMEDOUT;
- }
- udelay(2);
- }
+ ret = wait_link_transfer_active(dd, 10);
+ if (ret)
+ return ret;
write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
@@ -9091,7 +9157,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
if (ret)
goto set_local_link_attributes_fail;
- if (dd->dc8051_ver < dc8051_ver(0, 20)) {
+ if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) {
/* set the tx rate to the fastest enabled */
if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
ppd->local_tx_rate = 1;
@@ -9274,7 +9340,7 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
(qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
- dd_dev_info(dd, "%s: QSFP cable on fire\n",
+ dd_dev_info(dd, "%s: QSFP cable temperature too high\n",
__func__);
if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
@@ -9494,8 +9560,11 @@ static int test_qsfp_read(struct hfi1_pportdata *ppd)
int ret;
u8 status;
- /* report success if not a QSFP */
- if (ppd->port_type != PORT_TYPE_QSFP)
+ /*
+ * Report success if not a QSFP or, if it is a QSFP, but the cable is
+ * not present
+ */
+ if (ppd->port_type != PORT_TYPE_QSFP || !qsfp_mod_present(ppd))
return 0;
/* read byte 2, the status byte */
@@ -10082,6 +10151,64 @@ static void check_lni_states(struct hfi1_pportdata *ppd)
decode_state_complete(ppd, last_remote_state, "received");
}
+/* wait for wait_ms for LINK_TRANSFER_ACTIVE to go to 1 */
+static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms)
+{
+ u64 reg;
+ unsigned long timeout;
+
+ /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
+ timeout = jiffies + msecs_to_jiffies(wait_ms);
+ while (1) {
+ reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
+ if (reg)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(dd,
+ "timeout waiting for LINK_TRANSFER_ACTIVE\n");
+ return -ETIMEDOUT;
+ }
+ udelay(2);
+ }
+ return 0;
+}
+
+/* called when the logical link state is not down as it should be */
+static void force_logical_link_state_down(struct hfi1_pportdata *ppd)
+{
+ struct hfi1_devdata *dd = ppd->dd;
+
+ /*
+ * Bring link up in LCB loopback
+ */
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1);
+ write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
+ DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
+
+ write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
+ write_csr(dd, DC_LCB_CFG_REINIT_AS_SLAVE, 0);
+ write_csr(dd, DC_LCB_CFG_CNT_FOR_SKIP_STALL, 0x110);
+ write_csr(dd, DC_LCB_CFG_LOOPBACK, 0x2);
+
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
+ (void)read_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET);
+ udelay(3);
+ write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 1);
+ write_csr(dd, DC_LCB_CFG_RUN, 1ull << DC_LCB_CFG_RUN_EN_SHIFT);
+
+ wait_link_transfer_active(dd, 100);
+
+ /*
+ * Bring the link down again.
+ */
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1);
+ write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0);
+ write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0);
+
+ /* call again to adjust ppd->statusp, if needed */
+ get_logical_state(ppd);
+}
+
/*
* Helper for set_link_state(). Do not call except from that routine.
* Expects ppd->hls_mutex to be held.
@@ -10098,6 +10225,8 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
int do_transition;
int do_wait;
+ update_lcb_cache(dd);
+
previous_state = ppd->host_link_state;
ppd->host_link_state = HLS_GOING_OFFLINE;
pstate = read_physical_state(dd);
@@ -10135,15 +10264,18 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
return ret;
}
- /* make sure the logical state is also down */
- wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
-
/*
* Now in charge of LCB - must be after the physical state is
* offline.quiet and before host_link_state is changed.
*/
set_host_lcb_access(dd);
write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
+
+ /* make sure the logical state is also down */
+ ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
+ if (ret)
+ force_logical_link_state_down(ppd);
+
ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
if (ppd->port_type == PORT_TYPE_QSFP &&
@@ -10380,11 +10512,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
goto unexpected;
}
- ppd->host_link_state = HLS_UP_INIT;
ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
if (ret) {
- /* logical state didn't change, stay at going_up */
- ppd->host_link_state = HLS_GOING_UP;
dd_dev_err(dd,
"%s: logical state did not change to INIT\n",
__func__);
@@ -10398,6 +10527,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
handle_linkup_change(dd, 1);
+ ppd->host_link_state = HLS_UP_INIT;
}
break;
case HLS_UP_ARMED:
@@ -11853,6 +11983,10 @@ static void free_cntrs(struct hfi1_devdata *dd)
dd->scntrs = NULL;
kfree(dd->cntrnames);
dd->cntrnames = NULL;
+ if (dd->update_cntr_wq) {
+ destroy_workqueue(dd->update_cntr_wq);
+ dd->update_cntr_wq = NULL;
+ }
}
static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
@@ -12008,7 +12142,7 @@ u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data)
return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data);
}
-static void update_synth_timer(unsigned long opaque)
+static void do_update_synth_timer(struct work_struct *work)
{
u64 cur_tx;
u64 cur_rx;
@@ -12017,8 +12151,8 @@ static void update_synth_timer(unsigned long opaque)
int i, j, vl;
struct hfi1_pportdata *ppd;
struct cntr_entry *entry;
-
- struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
+ struct hfi1_devdata *dd = container_of(work, struct hfi1_devdata,
+ update_cntr_work);
/*
* Rather than keep beating on the CSRs pick a minimal set that we can
@@ -12101,7 +12235,13 @@ static void update_synth_timer(unsigned long opaque)
} else {
hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
}
+}
+static void update_synth_timer(unsigned long opaque)
+{
+ struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
+
+ queue_work(dd->update_cntr_wq, &dd->update_cntr_work);
mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
}
@@ -12337,6 +12477,13 @@ static int init_cntrs(struct hfi1_devdata *dd)
if (init_cpu_counters(dd))
goto bail;
+ dd->update_cntr_wq = alloc_ordered_workqueue("hfi1_update_cntr_%d",
+ WQ_MEM_RECLAIM, dd->unit);
+ if (!dd->update_cntr_wq)
+ goto bail;
+
+ INIT_WORK(&dd->update_cntr_work, do_update_synth_timer);
+
mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
return 0;
bail:
@@ -12726,7 +12873,10 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
first_sdma = last_general;
last_sdma = first_sdma + dd->num_sdma;
first_rx = last_sdma;
- last_rx = first_rx + dd->n_krcv_queues;
+ last_rx = first_rx + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT;
+
+ /* VNIC MSIx interrupts get mapped when VNIC contexts are created */
+ dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues;
/*
* Sanity check - the code expects all SDMA chip source
@@ -12740,7 +12890,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
const char *err_info;
irq_handler_t handler;
irq_handler_t thread = NULL;
- void *arg;
+ void *arg = NULL;
int idx;
struct hfi1_ctxtdata *rcd = NULL;
struct sdma_engine *sde = NULL;
@@ -12767,24 +12917,25 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
} else if (first_rx <= i && i < last_rx) {
idx = i - first_rx;
rcd = dd->rcd[idx];
- /* no interrupt if no rcd */
- if (!rcd)
- continue;
- /*
- * Set the interrupt register and mask for this
- * context's interrupt.
- */
- rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
- rcd->imask = ((u64)1) <<
- ((IS_RCVAVAIL_START + idx) % 64);
- handler = receive_context_interrupt;
- thread = receive_context_thread;
- arg = rcd;
- snprintf(me->name, sizeof(me->name),
- DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
- err_info = "receive context";
- remap_intr(dd, IS_RCVAVAIL_START + idx, i);
- me->type = IRQ_RCVCTXT;
+ if (rcd) {
+ /*
+ * Set the interrupt register and mask for this
+ * context's interrupt.
+ */
+ rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
+ rcd->imask = ((u64)1) <<
+ ((IS_RCVAVAIL_START + idx) % 64);
+ handler = receive_context_interrupt;
+ thread = receive_context_thread;
+ arg = rcd;
+ snprintf(me->name, sizeof(me->name),
+ DRIVER_NAME "_%d kctxt%d",
+ dd->unit, idx);
+ err_info = "receive context";
+ remap_intr(dd, IS_RCVAVAIL_START + idx, i);
+ me->type = IRQ_RCVCTXT;
+ rcd->msix_intr = i;
+ }
} else {
/* not in our expected range - complain, then
* ignore it
@@ -12822,6 +12973,84 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
return ret;
}
+void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
+{
+ int i;
+
+ if (!dd->num_msix_entries) {
+ synchronize_irq(dd->pcidev->irq);
+ return;
+ }
+
+ for (i = 0; i < dd->vnic.num_ctxt; i++) {
+ struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
+ struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
+
+ synchronize_irq(me->msix.vector);
+ }
+}
+
+void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
+
+ if (!me->arg) /* => no irq, no affinity */
+ return;
+
+ hfi1_put_irq_affinity(dd, me);
+ free_irq(me->msix.vector, me->arg);
+
+ me->arg = NULL;
+}
+
+void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ struct hfi1_msix_entry *me;
+ int idx = rcd->ctxt;
+ void *arg = rcd;
+ int ret;
+
+ rcd->msix_intr = dd->vnic.msix_idx++;
+ me = &dd->msix_entries[rcd->msix_intr];
+
+ /*
+ * Set the interrupt register and mask for this
+ * context's interrupt.
+ */
+ rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
+ rcd->imask = ((u64)1) <<
+ ((IS_RCVAVAIL_START + idx) % 64);
+
+ snprintf(me->name, sizeof(me->name),
+ DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
+ me->name[sizeof(me->name) - 1] = 0;
+ me->type = IRQ_RCVCTXT;
+
+ remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
+
+ ret = request_threaded_irq(me->msix.vector, receive_context_interrupt,
+ receive_context_thread, 0, me->name, arg);
+ if (ret) {
+ dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n",
+ me->msix.vector, idx, ret);
+ return;
+ }
+ /*
+ * assign arg after request_irq call, so it will be
+ * cleaned up
+ */
+ me->arg = arg;
+
+ ret = hfi1_get_irq_affinity(dd, me);
+ if (ret) {
+ dd_dev_err(dd,
+ "unable to pin IRQ %d\n", ret);
+ free_irq(me->msix.vector, me->arg);
+ }
+}
+
/*
* Set the general handler to accept all interrupts, remap all
* chip interrupts back to MSI-X 0.
@@ -12853,7 +13082,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
* N interrupts - one per used SDMA engine
* M interrupt - one per kernel receive context
*/
- total = 1 + dd->num_sdma + dd->n_krcv_queues;
+ total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT;
entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
if (!entries) {
@@ -12918,7 +13147,8 @@ fail:
*
* num_rcv_contexts - number of contexts being used
* n_krcv_queues - number of kernel contexts
- * first_user_ctxt - first non-kernel context in array of contexts
+ * first_dyn_alloc_ctxt - first dynamically allocated context
+ * in array of contexts
* freectxts - number of free user contexts
* num_send_contexts - number of PIO send contexts being used
*/
@@ -12995,10 +13225,14 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
total_contexts = num_kernel_contexts + num_user_contexts;
}
- /* the first N are kernel contexts, the rest are user contexts */
+ /* Accommodate VNIC contexts */
+ if ((total_contexts + HFI1_NUM_VNIC_CTXT) <= dd->chip_rcv_contexts)
+ total_contexts += HFI1_NUM_VNIC_CTXT;
+
+ /* the first N are kernel contexts, the rest are user/vnic contexts */
dd->num_rcv_contexts = total_contexts;
dd->n_krcv_queues = num_kernel_contexts;
- dd->first_user_ctxt = num_kernel_contexts;
+ dd->first_dyn_alloc_ctxt = num_kernel_contexts;
dd->num_user_contexts = num_user_contexts;
dd->freectxts = num_user_contexts;
dd_dev_info(dd,
@@ -13454,11 +13688,8 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd)
write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0);
for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++)
write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0);
- for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) {
- write_csr(dd, RCV_RSM_CFG + (8 * i), 0);
- write_csr(dd, RCV_RSM_SELECT + (8 * i), 0);
- write_csr(dd, RCV_RSM_MATCH + (8 * i), 0);
- }
+ for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++)
+ clear_rsm_rule(dd, i);
for (i = 0; i < 32; i++)
write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0);
@@ -13817,6 +14048,16 @@ static void add_rsm_rule(struct hfi1_devdata *dd, u8 rule_index,
(u64)rrd->value2 << RCV_RSM_MATCH_VALUE2_SHIFT);
}
+/*
+ * Clear a receive side mapping rule.
+ */
+static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index)
+{
+ write_csr(dd, RCV_RSM_CFG + (8 * rule_index), 0);
+ write_csr(dd, RCV_RSM_SELECT + (8 * rule_index), 0);
+ write_csr(dd, RCV_RSM_MATCH + (8 * rule_index), 0);
+}
+
/* return the number of RSM map table entries that will be used for QOS */
static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
unsigned int *np)
@@ -13932,7 +14173,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
rrd.value2 = LRH_SC_VALUE;
/* add rule 0 */
- add_rsm_rule(dd, 0, &rrd);
+ add_rsm_rule(dd, RSM_INS_VERBS, &rrd);
/* mark RSM map entries as used */
rmt->used += rmt_entries;
@@ -13962,7 +14203,7 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
/*
* RSM will extract the destination context as an index into the
* map table. The destination contexts are a sequential block
- * in the range first_user_ctxt...num_rcv_contexts-1 (inclusive).
+ * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive).
* Map entries are accessed as offset + extracted value. Adjust
* the added offset so this sequence can be placed anywhere in
* the table - as long as the entries themselves do not wrap.
@@ -13970,9 +14211,9 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
* start with that to allow for a "negative" offset.
*/
offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used -
- (int)dd->first_user_ctxt);
+ (int)dd->first_dyn_alloc_ctxt);
- for (i = dd->first_user_ctxt, idx = rmt->used;
+ for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used;
i < dd->num_rcv_contexts; i++, idx++) {
/* replace with identity mapping */
regoff = (idx % 8) * 8;
@@ -14006,11 +14247,84 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
rrd.value2 = 1;
/* add rule 1 */
- add_rsm_rule(dd, 1, &rrd);
+ add_rsm_rule(dd, RSM_INS_FECN, &rrd);
rmt->used += dd->num_user_contexts;
}
+/* Initialize RSM for VNIC */
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
+{
+ u8 i, j;
+ u8 ctx_id = 0;
+ u64 reg;
+ u32 regoff;
+ struct rsm_rule_data rrd;
+
+ if (hfi1_vnic_is_rsm_full(dd, NUM_VNIC_MAP_ENTRIES)) {
+ dd_dev_err(dd, "Vnic RSM disabled, rmt entries used = %d\n",
+ dd->vnic.rmt_start);
+ return;
+ }
+
+ dev_dbg(&(dd)->pcidev->dev, "Vnic rsm start = %d, end %d\n",
+ dd->vnic.rmt_start,
+ dd->vnic.rmt_start + NUM_VNIC_MAP_ENTRIES);
+
+ /* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */
+ regoff = RCV_RSM_MAP_TABLE + (dd->vnic.rmt_start / 8) * 8;
+ reg = read_csr(dd, regoff);
+ for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) {
+ /* Update map register with vnic context */
+ j = (dd->vnic.rmt_start + i) % 8;
+ reg &= ~(0xffllu << (j * 8));
+ reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8);
+ /* Wrap up vnic ctx index */
+ ctx_id %= dd->vnic.num_ctxt;
+ /* Write back map register */
+ if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) {
+ dev_dbg(&(dd)->pcidev->dev,
+ "Vnic rsm map reg[%d] =0x%llx\n",
+ regoff - RCV_RSM_MAP_TABLE, reg);
+
+ write_csr(dd, regoff, reg);
+ regoff += 8;
+ if (i < (NUM_VNIC_MAP_ENTRIES - 1))
+ reg = read_csr(dd, regoff);
+ }
+ }
+
+ /* Add rule for vnic */
+ rrd.offset = dd->vnic.rmt_start;
+ rrd.pkt_type = 4;
+ /* Match 16B packets */
+ rrd.field1_off = L2_TYPE_MATCH_OFFSET;
+ rrd.mask1 = L2_TYPE_MASK;
+ rrd.value1 = L2_16B_VALUE;
+ /* Match ETH L4 packets */
+ rrd.field2_off = L4_TYPE_MATCH_OFFSET;
+ rrd.mask2 = L4_16B_TYPE_MASK;
+ rrd.value2 = L4_16B_ETH_VALUE;
+ /* Calc context from veswid and entropy */
+ rrd.index1_off = L4_16B_HDR_VESWID_OFFSET;
+ rrd.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES);
+ rrd.index2_off = L2_16B_ENTROPY_OFFSET;
+ rrd.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES);
+ add_rsm_rule(dd, RSM_INS_VNIC, &rrd);
+
+ /* Enable RSM if not already enabled */
+ add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
+}
+
+void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd)
+{
+ clear_rsm_rule(dd, RSM_INS_VNIC);
+
+ /* Disable RSM if used only by vnic */
+ if (dd->vnic.rmt_start == 0)
+ clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
+}
+
static void init_rxe(struct hfi1_devdata *dd)
{
struct rsm_map_table *rmt;
@@ -14023,6 +14337,8 @@ static void init_rxe(struct hfi1_devdata *dd)
init_qos(dd, rmt);
init_user_fecn_handling(dd, rmt);
complete_rsm_map_table(dd, rmt);
+ /* record number of used rsm map entries for vnic */
+ dd->vnic.rmt_start = rmt->used;
kfree(rmt);
/*
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 043fd21..b9dbf16 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -1,7 +1,7 @@
#ifndef _CHIP_H
#define _CHIP_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -394,7 +394,8 @@
#define LAST_REMOTE_STATE_COMPLETE 0x13
#define LINK_QUALITY_INFO 0x14
#define REMOTE_DEVICE_ID 0x15
-#define LINK_DOWN_REASON 0x16
+#define LINK_DOWN_REASON 0x16 /* first byte of offset 0x16 */
+#define VERSION_PATCH 0x16 /* last byte of offset 0x16 */
/* 8051 lane specific register field IDs */
#define TX_EQ_SETTINGS 0x00
@@ -524,10 +525,12 @@ enum {
#define SUPPORTED_CRCS (CAP_CRC_14B | CAP_CRC_48B)
/* misc status version fields */
-#define STS_FM_VERSION_A_SHIFT 16
-#define STS_FM_VERSION_A_MASK 0xff
-#define STS_FM_VERSION_B_SHIFT 24
-#define STS_FM_VERSION_B_MASK 0xff
+#define STS_FM_VERSION_MINOR_SHIFT 16
+#define STS_FM_VERSION_MINOR_MASK 0xff
+#define STS_FM_VERSION_MAJOR_SHIFT 24
+#define STS_FM_VERSION_MAJOR_MASK 0xff
+#define STS_FM_VERSION_PATCH_SHIFT 24
+#define STS_FM_VERSION_PATCH_MASK 0xff
/* LCB_CFG_CRC_MODE TX_VAL and RX_VAL CRC mode values */
#define LCB_CRC_16B 0x0 /* 16b CRC */
@@ -698,7 +701,8 @@ void fabric_serdes_reset(struct hfi1_devdata *dd);
int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result);
/* chip.c */
-void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b);
+void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
+ u8 *ver_patch);
void read_guid(struct hfi1_devdata *dd);
int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout);
void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
@@ -1358,6 +1362,8 @@ int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt);
int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey);
int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt);
void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
+void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd);
/*
* Interrupt source table.
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index 1b783bb..995d62c 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -331,12 +331,15 @@ struct diag_pkt {
#define FULL_MGMT_P_KEY 0xFFFF
#define DEFAULT_P_KEY LIM_MGMT_P_KEY
-#define HFI1_FECN_SHIFT 31
-#define HFI1_FECN_MASK 1
-#define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT)
-#define HFI1_BECN_SHIFT 30
-#define HFI1_BECN_MASK 1
-#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT)
+
+/**
+ * 0xF8 - 4 bits of multicast range and 1 bit for collective range
+ * Example: For 24 bit LID space,
+ * Multicast range: 0xF00000 to 0xF7FFFF
+ * Collective range: 0xF80000 to 0xFFFFFE
+ */
+#define HFI1_MCAST_NR 0x4 /* Number of top bits set */
+#define HFI1_COLLECTIVE_NR 0x1 /* Number of bits after MCAST_NR */
#define HFI1_PSM_IOC_BASE_SEQ 0x0
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 7fe9dd8..e9fa3c2 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -1,6 +1,6 @@
#ifdef CONFIG_DEBUG_FS
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -51,8 +51,12 @@
#include <linux/export.h>
#include <linux/module.h>
#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ratelimit.h>
+#include <linux/fault-inject.h>
#include "hfi.h"
+#include "trace.h"
#include "debugfs.h"
#include "device.h"
#include "qp.h"
@@ -170,7 +174,7 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v)
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
struct hfi1_devdata *dd = dd_from_dev(ibd);
- for (j = 0; j < dd->first_user_ctxt; j++) {
+ for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
if (!dd->rcd[j])
continue;
n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
@@ -196,7 +200,7 @@ static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos)
if (!*pos)
return SEQ_START_TOKEN;
- if (*pos >= dd->first_user_ctxt)
+ if (*pos >= dd->first_dyn_alloc_ctxt)
return NULL;
return pos;
}
@@ -210,7 +214,7 @@ static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
return pos;
++*pos;
- if (*pos >= dd->first_user_ctxt)
+ if (*pos >= dd->first_dyn_alloc_ctxt)
return NULL;
return pos;
}
@@ -1063,6 +1067,222 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
DEBUGFS_FILE_OPS(sdma_cpu_list);
+#ifdef CONFIG_FAULT_INJECTION
+static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct hfi1_opcode_stats_perctx *opstats;
+
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct hfi1_opcode_stats_perctx *opstats;
+
+ ++*pos;
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void _fault_stats_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int _fault_stats_seq_show(struct seq_file *s, void *v)
+{
+ loff_t *spos = v;
+ loff_t i = *spos, j;
+ u64 n_packets = 0, n_bytes = 0;
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+
+ for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
+ if (!dd->rcd[j])
+ continue;
+ n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
+ n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
+ }
+ if (!n_packets && !n_bytes)
+ return SEQ_SKIP;
+ if (!ibd->fault_opcode->n_rxfaults[i] &&
+ !ibd->fault_opcode->n_txfaults[i])
+ return SEQ_SKIP;
+ seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
+ (unsigned long long)n_packets,
+ (unsigned long long)n_bytes,
+ (unsigned long long)ibd->fault_opcode->n_rxfaults[i],
+ (unsigned long long)ibd->fault_opcode->n_txfaults[i]);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(fault_stats);
+DEBUGFS_SEQ_FILE_OPEN(fault_stats);
+DEBUGFS_FILE_OPS(fault_stats);
+
+static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd)
+{
+ debugfs_remove_recursive(ibd->fault_opcode->dir);
+ kfree(ibd->fault_opcode);
+ ibd->fault_opcode = NULL;
+}
+
+static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd)
+{
+ struct dentry *parent = ibd->hfi1_ibdev_dbg;
+
+ ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL);
+ if (!ibd->fault_opcode)
+ return -ENOMEM;
+
+ ibd->fault_opcode->attr.interval = 1;
+ ibd->fault_opcode->attr.require_end = ULONG_MAX;
+ ibd->fault_opcode->attr.stacktrace_depth = 32;
+ ibd->fault_opcode->attr.dname = NULL;
+ ibd->fault_opcode->attr.verbose = 0;
+ ibd->fault_opcode->fault_by_opcode = false;
+ ibd->fault_opcode->opcode = 0;
+ ibd->fault_opcode->mask = 0xff;
+
+ ibd->fault_opcode->dir =
+ fault_create_debugfs_attr("fault_opcode",
+ parent,
+ &ibd->fault_opcode->attr);
+ if (IS_ERR(ibd->fault_opcode->dir)) {
+ kfree(ibd->fault_opcode);
+ return -ENOENT;
+ }
+
+ DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd);
+ if (!debugfs_create_bool("fault_by_opcode", 0600,
+ ibd->fault_opcode->dir,
+ &ibd->fault_opcode->fault_by_opcode))
+ goto fail;
+ if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir,
+ &ibd->fault_opcode->opcode))
+ goto fail;
+ if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir,
+ &ibd->fault_opcode->mask))
+ goto fail;
+
+ return 0;
+fail:
+ fault_exit_opcode_debugfs(ibd);
+ return -ENOMEM;
+}
+
+static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd)
+{
+ debugfs_remove_recursive(ibd->fault_packet->dir);
+ kfree(ibd->fault_packet);
+ ibd->fault_packet = NULL;
+}
+
+static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd)
+{
+ struct dentry *parent = ibd->hfi1_ibdev_dbg;
+
+ ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL);
+ if (!ibd->fault_packet)
+ return -ENOMEM;
+
+ ibd->fault_packet->attr.interval = 1;
+ ibd->fault_packet->attr.require_end = ULONG_MAX;
+ ibd->fault_packet->attr.stacktrace_depth = 32;
+ ibd->fault_packet->attr.dname = NULL;
+ ibd->fault_packet->attr.verbose = 0;
+ ibd->fault_packet->fault_by_packet = false;
+
+ ibd->fault_packet->dir =
+ fault_create_debugfs_attr("fault_packet",
+ parent,
+ &ibd->fault_opcode->attr);
+ if (IS_ERR(ibd->fault_packet->dir)) {
+ kfree(ibd->fault_packet);
+ return -ENOENT;
+ }
+
+ if (!debugfs_create_bool("fault_by_packet", 0600,
+ ibd->fault_packet->dir,
+ &ibd->fault_packet->fault_by_packet))
+ goto fail;
+ if (!debugfs_create_u64("fault_stats", 0400,
+ ibd->fault_packet->dir,
+ &ibd->fault_packet->n_faults))
+ goto fail;
+
+ return 0;
+fail:
+ fault_exit_packet_debugfs(ibd);
+ return -ENOMEM;
+}
+
+static void fault_exit_debugfs(struct hfi1_ibdev *ibd)
+{
+ fault_exit_opcode_debugfs(ibd);
+ fault_exit_packet_debugfs(ibd);
+}
+
+static int fault_init_debugfs(struct hfi1_ibdev *ibd)
+{
+ int ret = 0;
+
+ ret = fault_init_opcode_debugfs(ibd);
+ if (ret)
+ return ret;
+
+ ret = fault_init_packet_debugfs(ibd);
+ if (ret)
+ fault_exit_opcode_debugfs(ibd);
+
+ return ret;
+}
+
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return ibd->fault_suppress_err;
+}
+
+bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
+{
+ bool ret = false;
+ struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
+
+ if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode)
+ return false;
+ if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask))
+ return false;
+ ret = should_fail(&ibd->fault_opcode->attr, 1);
+ if (ret) {
+ trace_hfi1_fault_opcode(qp, opcode);
+ if (rx)
+ ibd->fault_opcode->n_rxfaults[opcode]++;
+ else
+ ibd->fault_opcode->n_txfaults[opcode]++;
+ }
+ return ret;
+}
+
+bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
+{
+ struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi;
+ struct hfi1_ibdev *ibd = dev_from_rdi(rdi);
+ bool ret = false;
+
+ if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet)
+ return false;
+
+ ret = should_fail(&ibd->fault_packet->attr, 1);
+ if (ret) {
+ ++ibd->fault_packet->n_faults;
+ trace_hfi1_fault_packet(packet);
+ }
+ return ret;
+}
+#endif
+
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
char name[sizeof("port0counters") + 1];
@@ -1112,12 +1332,22 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
!port_cntr_ops[i].ops.write ?
S_IRUGO : S_IRUGO | S_IWUSR);
}
+
+#ifdef CONFIG_FAULT_INJECTION
+ debugfs_create_bool("fault_suppress_err", 0600,
+ ibd->hfi1_ibdev_dbg,
+ &ibd->fault_suppress_err);
+ fault_init_debugfs(ibd);
+#endif
}
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
if (!hfi1_dbg_root)
goto out;
+#ifdef CONFIG_FAULT_INJECTION
+ fault_exit_debugfs(ibd);
+#endif
debugfs_remove(ibd->hfi1_ibdev_link);
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out:
diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h
index b6fb681..38c38a9 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.h
+++ b/drivers/infiniband/hw/hfi1/debugfs.h
@@ -53,23 +53,79 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
void hfi1_dbg_init(void);
void hfi1_dbg_exit(void);
+
+#ifdef CONFIG_FAULT_INJECTION
+#include <linux/fault-inject.h>
+struct fault_opcode {
+ struct fault_attr attr;
+ struct dentry *dir;
+ bool fault_by_opcode;
+ u64 n_rxfaults[256];
+ u64 n_txfaults[256];
+ u8 opcode;
+ u8 mask;
+};
+
+struct fault_packet {
+ struct fault_attr attr;
+ struct dentry *dir;
+ bool fault_by_packet;
+ u64 n_faults;
+};
+
+bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
+bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd);
+#else
+static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
+ u32 opcode, bool rx)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return false;
+}
+#endif
+
#else
static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
}
-void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
+static inline void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
+{
+}
+
+static inline void hfi1_dbg_init(void)
{
}
-void hfi1_dbg_init(void)
+static inline void hfi1_dbg_exit(void)
{
}
-void hfi1_dbg_exit(void)
+static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
{
+ return false;
}
+static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
+ u32 opcode, bool rx)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return false;
+}
#endif
#endif /* _HFI1_DEBUGFS_H */
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 3881c95..5278954 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -59,6 +59,8 @@
#include "trace.h"
#include "qp.h"
#include "sdma.h"
+#include "debugfs.h"
+#include "vnic.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -283,7 +285,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
{
struct ib_header *rhdr = packet->hdr;
u32 rte = rhf_rcv_type_err(packet->rhf);
- int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
+ int lnh = ib_get_lnh(rhdr);
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct hfi1_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
@@ -295,7 +297,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
/* For TIDERR and RC QPs preemptively schedule a NAK */
struct ib_other_headers *ohdr = NULL;
u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
- u16 lid = be16_to_cpu(rhdr->lrh[1]);
+ u16 lid = ib_get_dlid(rhdr);
u32 qp_num;
u32 rcv_flags = 0;
@@ -396,7 +398,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
u16 rlid;
u8 svc_type, sl, sc5;
- sc5 = hdr2sc(rhdr, packet->rhf);
+ sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
@@ -414,7 +416,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
svc_type = IB_CC_SVCTYPE_UD;
break;
case IB_QPT_UC:
- rlid = be16_to_cpu(rhdr->lrh[3]);
+ rlid = ib_get_slid(rhdr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_UC;
break;
@@ -460,7 +462,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
struct ib_other_headers *ohdr = pkt->ohdr;
struct ib_grh *grh = NULL;
u32 rqpn = 0, bth1;
- u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]);
+ u16 rlid, dlid = ib_get_dlid(hdr);
u8 sc, svc_type;
bool is_mcast = false;
@@ -471,19 +473,19 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_UD:
- rlid = be16_to_cpu(hdr->lrh[3]);
+ rlid = ib_get_slid(hdr);
rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
svc_type = IB_CC_SVCTYPE_UD;
is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
(dlid != be16_to_cpu(IB_LID_PERMISSIVE));
break;
case IB_QPT_UC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_UC;
break;
case IB_QPT_RC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_RC;
break;
@@ -491,16 +493,16 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
return;
}
- sc = hdr2sc(hdr, pkt->rhf);
+ sc = hfi1_9B_get_sc5(hdr, pkt->rhf);
bth1 = be32_to_cpu(ohdr->bth[1]);
- if (do_cnp && (bth1 & HFI1_FECN_SMASK)) {
+ if (do_cnp && (bth1 & IB_FECN_SMASK)) {
u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh);
}
- if (!is_mcast && (bth1 & HFI1_BECN_SMASK)) {
+ if (!is_mcast && (bth1 & IB_BECN_SMASK)) {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 lqpn = bth1 & RVT_QPN_MASK;
u8 sl = ibp->sc_to_sl[sc];
@@ -621,8 +623,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
packet->hdr = hfi1_get_msgheader(dd, rhf_addr);
hdr = packet->hdr;
-
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
@@ -634,7 +635,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
}
bth1 = be32_to_cpu(packet->ohdr->bth[1]);
- is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
+ is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK));
if (!is_ecn)
goto next;
@@ -652,7 +653,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
rcu_read_unlock();
/* turn off BECN, FECN */
- bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK);
+ bth1 &= ~(IB_FECN_SMASK | IB_BECN_SMASK);
packet->ohdr->bth[1] = cpu_to_be32(bth1);
next:
update_ps_mdata(&mdata, rcd);
@@ -872,20 +873,42 @@ bail:
return last;
}
-static inline void set_all_nodma_rtail(struct hfi1_devdata *dd)
+static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt)
{
int i;
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
+ /*
+ * For dynamically allocated kernel contexts (like vnic) switch
+ * interrupt handler only for that context. Otherwise, switch
+ * interrupt handler for all statically allocated kernel contexts.
+ */
+ if (ctxt >= dd->first_dyn_alloc_ctxt) {
+ dd->rcd[ctxt]->do_interrupt =
+ &handle_receive_interrupt_nodma_rtail;
+ return;
+ }
+
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++)
dd->rcd[i]->do_interrupt =
&handle_receive_interrupt_nodma_rtail;
}
-static inline void set_all_dma_rtail(struct hfi1_devdata *dd)
+static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt)
{
int i;
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
+ /*
+ * For dynamically allocated kernel contexts (like vnic) switch
+ * interrupt handler only for that context. Otherwise, switch
+ * interrupt handler for all statically allocated kernel contexts.
+ */
+ if (ctxt >= dd->first_dyn_alloc_ctxt) {
+ dd->rcd[ctxt]->do_interrupt =
+ &handle_receive_interrupt_dma_rtail;
+ return;
+ }
+
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++)
dd->rcd[i]->do_interrupt =
&handle_receive_interrupt_dma_rtail;
}
@@ -895,8 +918,13 @@ void set_all_slowpath(struct hfi1_devdata *dd)
int i;
/* HFI1_CTRL_CTXT must always use the slow path interrupt handler */
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
- dd->rcd[i]->do_interrupt = &handle_receive_interrupt;
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) {
+ struct hfi1_ctxtdata *rcd = dd->rcd[i];
+
+ if ((i < dd->first_dyn_alloc_ctxt) ||
+ (rcd && rcd->sc && (rcd->sc->type == SC_KERNEL)))
+ rcd->do_interrupt = &handle_receive_interrupt;
+ }
}
static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
@@ -908,7 +936,8 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
packet->rhf_addr);
u8 etype = rhf_rcv_type(packet->rhf);
- if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) {
+ if (etype == RHF_RCV_TYPE_IB &&
+ hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) {
int hwstate = read_logical_state(dd);
if (hwstate != LSTATE_ACTIVE) {
@@ -1006,7 +1035,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
last = RCV_PKT_DONE;
if (needset) {
dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
- set_all_nodma_rtail(dd);
+ set_nodma_rtail(dd, rcd->ctxt);
needset = 0;
}
} else {
@@ -1028,7 +1057,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
if (needset) {
dd_dev_info(dd,
"Switching to DMA_RTAIL\n");
- set_all_dma_rtail(dd);
+ set_dma_rtail(dd, rcd->ctxt);
needset = 0;
}
}
@@ -1077,10 +1106,10 @@ void receive_interrupt_work(struct work_struct *work)
set_link_state(ppd, HLS_UP_ACTIVE);
/*
- * Interrupt all kernel contexts that could have had an
- * interrupt during auto activation.
+ * Interrupt all statically allocated kernel contexts that could
+ * have had an interrupt during auto activation.
*/
- for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++)
+ for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++)
force_recv_intr(dd->rcd[i]);
}
@@ -1294,7 +1323,8 @@ int hfi1_reset_device(int unit)
spin_lock_irqsave(&dd->uctxt_lock, flags);
if (dd->rcd)
- for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) {
+ for (i = dd->first_dyn_alloc_ctxt;
+ i < dd->num_rcv_contexts; i++) {
if (!dd->rcd[i] || !dd->rcd[i]->cnt)
continue;
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
@@ -1354,6 +1384,9 @@ void handle_eflags(struct hfi1_packet *packet)
*/
int process_receive_ib(struct hfi1_packet *packet)
{
+ if (unlikely(hfi1_dbg_fault_packet(packet)))
+ return RHF_RCV_CONTINUE;
+
trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
packet->rcd->ctxt,
rhf_err_flags(packet->rhf),
@@ -1363,6 +1396,11 @@ int process_receive_ib(struct hfi1_packet *packet)
packet->updegr,
rhf_egr_index(packet->rhf));
+ if (unlikely(
+ (hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+ (packet->rhf & RHF_DC_ERR))))
+ return RHF_RCV_CONTINUE;
+
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
return RHF_RCV_CONTINUE;
@@ -1372,15 +1410,31 @@ int process_receive_ib(struct hfi1_packet *packet)
return RHF_RCV_CONTINUE;
}
+static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
+{
+ /* Packet received in VNIC context via RSM */
+ if (packet->rcd->is_vnic)
+ return true;
+
+ if ((HFI1_GET_L2_TYPE(packet->ebuf) == OPA_VNIC_L2_TYPE) &&
+ (HFI1_GET_L4_TYPE(packet->ebuf) == OPA_VNIC_L4_ETHR))
+ return true;
+
+ return false;
+}
+
int process_receive_bypass(struct hfi1_packet *packet)
{
struct hfi1_devdata *dd = packet->rcd->dd;
- if (unlikely(rhf_err_flags(packet->rhf)))
+ if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
+ } else if (hfi1_is_vnic_packet(packet)) {
+ hfi1_vnic_bypass_rcv(packet);
+ return RHF_RCV_CONTINUE;
+ }
- dd_dev_err(dd,
- "Bypass packets are not supported in normal operation. Dropping\n");
+ dd_dev_err(dd, "Unsupported bypass packet. Dropping\n");
incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
u64 *flits = packet->ebuf;
@@ -1398,6 +1452,12 @@ int process_receive_bypass(struct hfi1_packet *packet)
int process_receive_error(struct hfi1_packet *packet)
{
+ /* KHdrHCRCErr -- KDETH packet with a bad HCRC */
+ if (unlikely(
+ hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+ rhf_rcv_type_err(packet->rhf) == 3))
+ return RHF_RCV_CONTINUE;
+
handle_eflags(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
@@ -1409,6 +1469,8 @@ int process_receive_error(struct hfi1_packet *packet)
int kdeth_process_expected(struct hfi1_packet *packet)
{
+ if (unlikely(hfi1_dbg_fault_packet(packet)))
+ return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
@@ -1421,6 +1483,8 @@ int kdeth_process_eager(struct hfi1_packet *packet)
{
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
+ if (unlikely(hfi1_dbg_fault_packet(packet)))
+ return RHF_RCV_CONTINUE;
dd_dev_err(packet->rcd->dd,
"Unhandled eager packet received. Dropping.\n");
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index f78c739..3d9bce4 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -586,8 +586,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
* knows where it's own bitmap is within the page.
*/
memaddr = (unsigned long)(dd->events +
- ((uctxt->ctxt - dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK;
+ ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
+ HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK;
memlen = PAGE_SIZE;
/*
* v3.7 removes VM_RESERVED but the effect is kept by
@@ -597,6 +597,10 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
vmf = 1;
break;
case STATUS:
+ if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) {
+ ret = -EPERM;
+ goto done;
+ }
memaddr = kvirt_to_phys((void *)dd->status);
memlen = PAGE_SIZE;
flags |= VM_IO | VM_DONTEXPAND;
@@ -756,7 +760,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
* Clear any left over, unhandled events so the next process that
* gets this context doesn't get confused.
*/
- ev = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) *
+ ev = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fdata->subctxt;
*ev = 0;
@@ -909,12 +913,18 @@ static int find_shared_ctxt(struct file *fp,
if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase))
continue;
- for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) {
+ for (i = dd->first_dyn_alloc_ctxt;
+ i < dd->num_rcv_contexts; i++) {
struct hfi1_ctxtdata *uctxt = dd->rcd[i];
/* Skip ctxts which are not yet open */
if (!uctxt || !uctxt->cnt)
continue;
+
+ /* Skip dynamically allocted kernel contexts */
+ if (uctxt->sc && (uctxt->sc->type == SC_KERNEL))
+ continue;
+
/* Skip ctxt if it doesn't match the requested one */
if (memcmp(uctxt->uuid, uinfo->uuid,
sizeof(uctxt->uuid)) ||
@@ -960,7 +970,8 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
return -EIO;
}
- for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; ctxt++)
+ for (ctxt = dd->first_dyn_alloc_ctxt;
+ ctxt < dd->num_rcv_contexts; ctxt++)
if (!dd->rcd[ctxt])
break;
@@ -1306,7 +1317,7 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len)
*/
binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt,
fd->subctxt, 0);
- offset = offset_in_page((((uctxt->ctxt - dd->first_user_ctxt) *
+ offset = offset_in_page((((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fd->subctxt) *
sizeof(*dd->events));
binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt,
@@ -1400,12 +1411,12 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit)
}
spin_lock_irqsave(&dd->uctxt_lock, flags);
- for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts;
+ for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts;
ctxt++) {
uctxt = dd->rcd[ctxt];
if (uctxt) {
unsigned long *evs = dd->events +
- (uctxt->ctxt - dd->first_user_ctxt) *
+ (uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS;
int i;
/*
@@ -1477,7 +1488,7 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
if (!dd->events)
return 0;
- evs = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) *
+ evs = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + subctxt;
for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) {
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 0dd50cd..4042c11 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -1004,7 +1004,9 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
{
u64 reg;
int ret;
- u8 ver_a, ver_b;
+ u8 ver_major;
+ u8 ver_minor;
+ u8 ver_patch;
/*
* DC Reset sequence
@@ -1073,10 +1075,10 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
return -ETIMEDOUT;
}
- read_misc_status(dd, &ver_a, &ver_b);
- dd_dev_info(dd, "8051 firmware version %d.%d\n",
- (int)ver_b, (int)ver_a);
- dd->dc8051_ver = dc8051_ver(ver_b, ver_a);
+ read_misc_status(dd, &ver_major, &ver_minor, &ver_patch);
+ dd_dev_info(dd, "8051 firmware version %d.%d.%d\n",
+ (int)ver_major, (int)ver_minor, (int)ver_patch);
+ dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch);
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 0808e3c3..f066743 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_KERNEL_H
#define _HFI1_KERNEL_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -54,6 +54,7 @@
#include <linux/list.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
+#include <linux/idr.h>
#include <linux/io.h>
#include <linux/fs.h>
#include <linux/completion.h>
@@ -66,6 +67,7 @@
#include <linux/i2c-algo-bit.h>
#include <rdma/ib_hdrs.h>
#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
#include <rdma/rdma_vt.h>
#include "chip_registers.h"
@@ -278,6 +280,8 @@ struct hfi1_ctxtdata {
struct hfi1_devdata *dd;
/* so functions that need physical port can get it easily */
struct hfi1_pportdata *ppd;
+ /* associated msix interrupt */
+ u32 msix_intr;
/* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
void *subctxt_uregbase;
/* An array of pages for the eager receive buffers * N */
@@ -337,6 +341,12 @@ struct hfi1_ctxtdata {
* packets with the wrong interrupt handler.
*/
int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+
+ /* Indicates that this is vnic context */
+ bool is_vnic;
+
+ /* vnic queue index this context is mapped to */
+ u8 vnic_q_idx;
};
/*
@@ -474,7 +484,7 @@ struct rvt_sge_state;
#define HFI1_PART_ENFORCE_OUT 0x2
/* how often we check for synthetic counter wrap around */
-#define SYNTH_CNT_TIME 2
+#define SYNTH_CNT_TIME 3
/* Counter flags */
#define CNTR_NORMAL 0x0 /* Normal counters, just read register */
@@ -808,6 +818,32 @@ struct hfi1_asic_data {
struct hfi1_i2c_bus *i2c_bus1;
};
+/* sizes for both the QP and RSM map tables */
+#define NUM_MAP_ENTRIES 256
+#define NUM_MAP_REGS 32
+
+/*
+ * Number of VNIC contexts used. Ensure it is less than or equal to
+ * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
+ */
+#define HFI1_NUM_VNIC_CTXT 8
+
+/* Number of VNIC RSM entries */
+#define NUM_VNIC_MAP_ENTRIES 8
+
+/* Virtual NIC information */
+struct hfi1_vnic_data {
+ struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT];
+ struct kmem_cache *txreq_cache;
+ u8 num_vports;
+ struct idr vesw_idr;
+ u8 rmt_start;
+ u8 num_ctxt;
+ u32 msix_idx;
+};
+
+struct hfi1_vnic_vport_info;
+
/* device data struct now contains only "general per-device" info.
* fields related to a physical IB port are in a hfi1_pportdata struct.
*/
@@ -926,8 +962,9 @@ struct hfi1_devdata {
spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
/* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */
spinlock_t uctxt_lock; /* rcd and user context changes */
- /* exclusive access to 8051 */
- spinlock_t dc8051_lock;
+ struct mutex dc8051_lock; /* exclusive access to 8051 */
+ struct workqueue_struct *update_cntr_wq;
+ struct work_struct update_cntr_work;
/* exclusive access to 8051 memory */
spinlock_t dc8051_memlock;
int dc8051_timed_out; /* remember if the 8051 timed out */
@@ -1020,7 +1057,7 @@ struct hfi1_devdata {
u8 qos_shift;
u16 irev; /* implementation revision */
- u16 dc8051_ver; /* 8051 firmware version */
+ u32 dc8051_ver; /* 8051 firmware version */
spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
struct platform_config platform_config;
@@ -1031,6 +1068,7 @@ struct hfi1_devdata {
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
u32 num_msix_entries;
+ u32 first_dyn_msix_idx;
/* INTx information */
u32 requested_intx_irq; /* did we request one? */
@@ -1115,6 +1153,9 @@ struct hfi1_devdata {
send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
+ int (*process_vnic_dma_send)(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen);
/* hfi1_pportdata, points to array of (physical) port-specific
* data structs, indexed by pidx (0..n-1)
*/
@@ -1126,8 +1167,8 @@ struct hfi1_devdata {
u16 flags;
/* Number of physical ports available */
u8 num_pports;
- /* Lowest context number which can be used by user processes */
- u8 first_user_ctxt;
+ /* Lowest context number which can be used by user processes or VNIC */
+ u8 first_dyn_alloc_ctxt;
/* adding a new field here would make it part of this cacheline */
/* seqlock for sc2vl */
@@ -1167,15 +1208,24 @@ struct hfi1_devdata {
bool eprom_available; /* true if EPROM is available for this device */
bool aspm_supported; /* Does HW support ASPM */
bool aspm_enabled; /* ASPM state: enabled/disabled */
- struct rhashtable sdma_rht;
+ struct rhashtable *sdma_rht;
struct kobject kobj;
+
+ /* vnic data */
+ struct hfi1_vnic_data vnic;
};
+static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
+{
+ return (dd->vnic.rmt_start + spare) > NUM_MAP_ENTRIES;
+}
+
/* 8051 firmware version helper */
-#define dc8051_ver(a, b) ((a) << 8 | (b))
-#define dc8051_ver_maj(a) ((a & 0xff00) >> 8)
-#define dc8051_ver_min(a) (a & 0x00ff)
+#define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c))
+#define dc8051_ver_maj(a) (((a) & 0xff0000) >> 16)
+#define dc8051_ver_min(a) (((a) & 0x00ff00) >> 8)
+#define dc8051_ver_patch(a) ((a) & 0x0000ff)
/* f_put_tid types */
#define PT_EXPECTED 0
@@ -1235,6 +1285,9 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *, int);
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int);
void set_all_slowpath(struct hfi1_devdata *dd);
+void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd);
+void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd);
+void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd);
extern const struct pci_device_id hfi1_pci_tbl[];
@@ -1254,16 +1307,24 @@ int hfi1_reset_device(int);
/* return the driver's idea of the logical OPA port state */
static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
{
- return ppd->lstate; /* use the cached value */
+ /*
+ * The driver does some processing from the time the logical
+ * link state is at INIT to the time the SM can be notified
+ * as such. Return IB_PORT_DOWN until the software state
+ * is ready.
+ */
+ if (ppd->lstate == IB_PORT_INIT && !(ppd->host_link_state & HLS_UP))
+ return IB_PORT_DOWN;
+ else
+ return ppd->lstate;
}
void receive_interrupt_work(struct work_struct *work);
/* extract service channel from header and rhf */
-static inline int hdr2sc(struct ib_header *hdr, u64 rhf)
+static inline int hfi1_9B_get_sc5(struct ib_header *hdr, u64 rhf)
{
- return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
- ((!!(rhf_dc_info(rhf))) << 4);
+ return ib_get_sc(hdr) | ((!!(rhf_dc_info(rhf))) << 4);
}
#define HFI1_JKEY_WIDTH 16
@@ -1597,9 +1658,9 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
u32 bth1;
bth1 = be32_to_cpu(ohdr->bth[1]);
- if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) {
+ if (unlikely(bth1 & (IB_BECN_SMASK | IB_FECN_SMASK))) {
hfi1_process_ecn_slowpath(qp, pkt, do_cnp);
- return bth1 & HFI1_FECN_SMASK;
+ return !!(bth1 & IB_FECN_SMASK);
}
return false;
}
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index f40864e..4d6b9f8 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -65,6 +65,7 @@
#include "verbs.h"
#include "aspm.h"
#include "affinity.h"
+#include "vnic.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -139,7 +140,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
goto nomem;
/* create one or more kernel contexts */
- for (i = 0; i < dd->first_user_ctxt; ++i) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) {
struct hfi1_pportdata *ppd;
struct hfi1_ctxtdata *rcd;
@@ -214,9 +215,9 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
u32 base;
if (dd->rcv_entries.nctxt_extra >
- dd->num_rcv_contexts - dd->first_user_ctxt)
+ dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt)
kctxt_ngroups = (dd->rcv_entries.nctxt_extra -
- (dd->num_rcv_contexts - dd->first_user_ctxt));
+ (dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt));
rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa);
if (rcd) {
u32 rcvtids, max_entries;
@@ -238,27 +239,29 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
* Calculate the context's RcvArray entry starting point.
* We do this here because we have to take into account all
* the RcvArray entries that previous context would have
- * taken and we have to account for any extra groups
- * assigned to the kernel or user contexts.
+ * taken and we have to account for any extra groups assigned
+ * to the static (kernel) or dynamic (vnic/user) contexts.
*/
- if (ctxt < dd->first_user_ctxt) {
+ if (ctxt < dd->first_dyn_alloc_ctxt) {
if (ctxt < kctxt_ngroups) {
base = ctxt * (dd->rcv_entries.ngroups + 1);
rcd->rcv_array_groups++;
- } else
+ } else {
base = kctxt_ngroups +
(ctxt * dd->rcv_entries.ngroups);
+ }
} else {
- u16 ct = ctxt - dd->first_user_ctxt;
+ u16 ct = ctxt - dd->first_dyn_alloc_ctxt;
base = ((dd->n_krcv_queues * dd->rcv_entries.ngroups) +
kctxt_ngroups);
if (ct < dd->rcv_entries.nctxt_extra) {
base += ct * (dd->rcv_entries.ngroups + 1);
rcd->rcv_array_groups++;
- } else
+ } else {
base += dd->rcv_entries.nctxt_extra +
(ct * dd->rcv_entries.ngroups);
+ }
}
rcd->eager_base = base * dd->rcv_entries.group_size;
@@ -322,7 +325,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
}
rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE;
- if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */
+ /* Applicable only for statically created kernel contexts */
+ if (ctxt < dd->first_dyn_alloc_ctxt) {
rcd->opstats = kzalloc_node(sizeof(*rcd->opstats),
GFP_KERNEL, numa);
if (!rcd->opstats)
@@ -482,6 +486,9 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
default_pkey_idx = 1;
ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY;
+ ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
+ ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
+
if (loopback) {
hfi1_early_err(&pdev->dev,
"Faking data partition 0x8001 in idx %u\n",
@@ -585,7 +592,7 @@ static void enable_chip(struct hfi1_devdata *dd)
* Enable kernel ctxts' receive and receive interrupt.
* Other ctxts done as user opens and initializes them.
*/
- for (i = 0; i < dd->first_user_ctxt; ++i) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) {
rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
@@ -679,6 +686,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
dd->process_pio_send = hfi1_verbs_send_pio;
dd->process_dma_send = hfi1_verbs_send_dma;
dd->pio_inline_send = pio_copy;
+ dd->process_vnic_dma_send = hfi1_vnic_send_dma;
if (is_ax(dd)) {
atomic_set(&dd->drop_packet, DROP_PACKET_ON);
@@ -714,7 +722,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
}
/* dd->rcd can be NULL if early initialization failed */
- for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) {
+ for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) {
/*
* Set up the (kernel) rcvhdr queue and egr TIDs. If doing
* re-init, the simplest way to handle this is to free
@@ -1078,11 +1086,11 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
spin_lock_init(&dd->uctxt_lock);
spin_lock_init(&dd->hfi1_diag_trans_lock);
spin_lock_init(&dd->sc_init_lock);
- spin_lock_init(&dd->dc8051_lock);
spin_lock_init(&dd->dc8051_memlock);
seqlock_init(&dd->sc2vl_lock);
spin_lock_init(&dd->sde_map_lock);
spin_lock_init(&dd->pio_map_lock);
+ mutex_init(&dd->dc8051_lock);
init_waitqueue_head(&dd->event_queue);
dd->int_counter = alloc_percpu(u64);
@@ -1425,6 +1433,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* First, lock the non-writable module parameters */
HFI1_CAP_LOCK();
+ /* Validate dev ids */
+ if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
+ ent->device == PCI_DEVICE_ID_INTEL1)) {
+ hfi1_early_err(&pdev->dev,
+ "Failing on unknown Intel deviceid 0x%x\n",
+ ent->device);
+ ret = -ENODEV;
+ goto bail;
+ }
+
/* Validate some global module parameters */
ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
if (ret)
@@ -1470,15 +1488,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (ret)
goto bail;
- if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
- ent->device == PCI_DEVICE_ID_INTEL1)) {
- hfi1_early_err(&pdev->dev,
- "Failing on unknown Intel deviceid 0x%x\n",
- ent->device);
- ret = -ENODEV;
- goto clean_bail;
- }
-
/*
* Do device-specific initialization, function table setup, dd
* allocation, etc.
@@ -1497,6 +1506,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* do the generic initialization */
initfail = hfi1_init(dd, 0);
+ /* setup vnic */
+ hfi1_vnic_setup(dd);
+
ret = hfi1_register_ib_device(dd);
/*
@@ -1530,6 +1542,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
hfi1_device_remove(dd);
if (!ret)
hfi1_unregister_ib_device(dd);
+ hfi1_vnic_cleanup(dd);
postinit_cleanup(dd);
if (initfail)
ret = initfail;
@@ -1574,6 +1587,9 @@ static void remove_one(struct pci_dev *pdev)
/* unregister from IB core */
hfi1_unregister_ib_device(dd);
+ /* cleanup vnic */
+ hfi1_vnic_cleanup(dd);
+
/*
* Disable the IB link, disable interrupts on the device,
* clear dma engines, etc.
@@ -1613,8 +1629,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize *
sizeof(u32));
- gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
- GFP_USER : GFP_KERNEL;
+ if ((rcd->ctxt < dd->first_dyn_alloc_ctxt) ||
+ (rcd->sc && (rcd->sc->type == SC_KERNEL)))
+ gfp_flags = GFP_KERNEL;
+ else
+ gfp_flags = GFP_USER;
rcd->rcvhdrq = dma_zalloc_coherent(
&dd->pcidev->dev, amt, &rcd->rcvhdrq_dma,
gfp_flags | __GFP_COMP);
diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 65348d1..232014d 100644
--- a/drivers/infiniband/hw/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
@@ -131,19 +131,24 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
set_up_vl15(dd, dd->vau, dd->vl15_init);
assign_remote_cm_au_table(dd, dd->vcu);
- ppd->neighbor_guid =
- read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
- ppd->neighbor_type =
- read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
- DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
- ppd->neighbor_port_number =
- read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
- DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
- dd_dev_info(dd, "Neighbor GUID: %llx Neighbor type %d\n",
- ppd->neighbor_guid,
- ppd->neighbor_type);
}
+ ppd->neighbor_guid =
+ read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
+ ppd->neighbor_type =
+ read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
+ DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
+ ppd->neighbor_port_number =
+ read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
+ DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
+ ppd->neighbor_fm_security =
+ read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
+ DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
+ dd_dev_info(dd,
+ "Neighbor Guid %llx, Type %d, Port Num %d\n",
+ ppd->neighbor_guid, ppd->neighbor_type,
+ ppd->neighbor_port_number);
+
/* physical link went up */
ppd->linkup = 1;
ppd->offline_disabled_reason =
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 09cda3c..5977673 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -53,6 +53,7 @@
#include "mad.h"
#include "trace.h"
#include "qp.h"
+#include "vnic.h"
/* the reset value from the FM is supposed to be 0xffff, handle both */
#define OPA_LINK_WIDTH_RESET_OLD 0x0fff
@@ -650,9 +651,11 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
pi->port_packet_format.supported =
- cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
+ cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
+ OPA_PORT_PACKET_FORMAT_16B);
pi->port_packet_format.enabled =
- cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
+ cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
+ OPA_PORT_PACKET_FORMAT_16B);
/* flit_control.interleave is (OPA V1, version .76):
* bits use
@@ -701,7 +704,13 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
pi->buffer_units = cpu_to_be32(buffer_units);
- pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported);
+ pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported |
+ OPA_CAP_MASK3_IsEthOnFabricSupported);
+ /* Driver does not support mcast/collective configuration */
+ pi->opa_cap_mask &=
+ cpu_to_be16(~OPA_CAP_MASK3_IsAddrRangeConfigSupported);
+ pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7)
+ << 3 | (HFI1_MCAST_NR & 0x7));
/* HFI supports a replay buffer 128 LTPs in size */
pi->replay_depth.buffer = 0x80;
@@ -1146,16 +1155,6 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
ppd->linkinit_reason =
(pi->partenforce_filterraw &
OPA_PI_MASK_LINKINIT_REASON);
- /* enable/disable SW pkey checking as per FM control */
- if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_IN)
- ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
- else
- ppd->part_enforce &= ~HFI1_PART_ENFORCE_IN;
-
- if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_OUT)
- ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
- else
- ppd->part_enforce &= ~HFI1_PART_ENFORCE_OUT;
/* Must be a valid unicast LID address. */
if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
@@ -1167,9 +1166,9 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
spin_lock_irqsave(&ibp->rvp.lock, flags);
if (ibp->rvp.sm_ah) {
if (smlid != ibp->rvp.sm_lid)
- ibp->rvp.sm_ah->attr.dlid = smlid;
+ rdma_ah_set_dlid(&ibp->rvp.sm_ah->attr, smlid);
if (msl != ibp->rvp.sm_sl)
- ibp->rvp.sm_ah->attr.sl = msl;
+ rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl);
}
spin_unlock_irqrestore(&ibp->rvp.lock, flags);
if (smlid != ibp->rvp.sm_lid)
@@ -1465,25 +1464,15 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len);
}
-static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
-{
- u64 *val = data;
-
- *val++ = read_csr(dd, SEND_SC2VLT0);
- *val++ = read_csr(dd, SEND_SC2VLT1);
- *val++ = read_csr(dd, SEND_SC2VLT2);
- *val++ = read_csr(dd, SEND_SC2VLT3);
- return 0;
-}
-
#define ILLEGAL_VL 12
/*
* filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
* for SC15, which must map to VL15). If we don't remap things this
* way it is possible for VL15 counters to increment when we try to
* send on a SC which is mapped to an invalid VL.
+ * When getting the table convert ILLEGAL_VL back to VL15.
*/
-static void filter_sc2vlt(void *data)
+static void filter_sc2vlt(void *data, bool set)
{
int i;
u8 *pd = data;
@@ -1491,8 +1480,14 @@ static void filter_sc2vlt(void *data)
for (i = 0; i < OPA_MAX_SCS; i++) {
if (i == 15)
continue;
- if ((pd[i] & 0x1f) == 0xf)
- pd[i] = ILLEGAL_VL;
+
+ if (set) {
+ if ((pd[i] & 0x1f) == 0xf)
+ pd[i] = ILLEGAL_VL;
+ } else {
+ if ((pd[i] & 0x1f) == ILLEGAL_VL)
+ pd[i] = 0xf;
+ }
}
}
@@ -1500,7 +1495,7 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
{
u64 *val = data;
- filter_sc2vlt(data);
+ filter_sc2vlt(data, true);
write_csr(dd, SEND_SC2VLT0, *val++);
write_csr(dd, SEND_SC2VLT1, *val++);
@@ -1512,6 +1507,19 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
return 0;
}
+static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
+{
+ u64 *val = (u64 *)data;
+
+ *val++ = read_csr(dd, SEND_SC2VLT0);
+ *val++ = read_csr(dd, SEND_SC2VLT1);
+ *val++ = read_csr(dd, SEND_SC2VLT2);
+ *val++ = read_csr(dd, SEND_SC2VLT3);
+
+ filter_sc2vlt((u64 *)data, false);
+ return 0;
+}
+
static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
u32 *resp_len)
@@ -1986,31 +1994,6 @@ struct opa_pma_mad {
u8 data[2024];
} __packed;
-struct opa_class_port_info {
- u8 base_version;
- u8 class_version;
- __be16 cap_mask;
- __be32 cap_mask2_resp_time;
-
- u8 redirect_gid[16];
- __be32 redirect_tc_fl;
- __be32 redirect_lid;
- __be32 redirect_sl_qp;
- __be32 redirect_qkey;
-
- u8 trap_gid[16];
- __be32 trap_tc_fl;
- __be32 trap_lid;
- __be32 trap_hl_qp;
- __be32 trap_qkey;
-
- __be16 trap_pkey;
- __be16 redirect_pkey;
-
- u8 trap_sl_rsvd;
- u8 reserved[3];
-} __packed;
-
struct opa_port_status_req {
__u8 port_num;
__u8 reserved[3];
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 0829fce..e39e01b 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -583,7 +583,7 @@ pci_mmio_enabled(struct pci_dev *pdev)
if (words == ~0ULL)
ret = PCI_ERS_RESULT_NEED_RESET;
dd_dev_info(dd,
- "HFI1 mmio_enabled function called, read wordscntr %Lx, returning %d\n",
+ "HFI1 mmio_enabled function called, read wordscntr %llx, returning %d\n",
words, ret);
}
return ret;
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 615be68..ed72b5a 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -703,6 +703,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
{
struct send_context_info *sci;
struct send_context *sc = NULL;
+ int req_type = type;
dma_addr_t dma;
unsigned long flags;
u64 reg;
@@ -729,6 +730,13 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
return NULL;
}
+ /*
+ * VNIC contexts are dynamically allocated.
+ * Hence, pick a user context for VNIC.
+ */
+ if (type == SC_VNIC)
+ type = SC_USER;
+
spin_lock_irqsave(&dd->sc_lock, flags);
ret = sc_hw_alloc(dd, type, &sw_index, &hw_context);
if (ret) {
@@ -738,6 +746,15 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
return NULL;
}
+ /*
+ * VNIC contexts are used by kernel driver.
+ * Hence, mark them as kernel contexts.
+ */
+ if (req_type == SC_VNIC) {
+ dd->send_contexts[sw_index].type = SC_KERNEL;
+ type = SC_KERNEL;
+ }
+
sci = &dd->send_contexts[sw_index];
sci->sc = sc;
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 867e5ff..99ca5ed 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -1,7 +1,7 @@
#ifndef _PIO_H
#define _PIO_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -54,6 +54,12 @@
#define SC_USER 3 /* must be the last one: it may take all left */
#define SC_MAX 4 /* count of send context types */
+/*
+ * SC_VNIC types are allocated (dynamically) from the user context pool,
+ * (SC_USER) and used by kernel driver as kernel contexts (SC_KERNEL).
+ */
+#define SC_VNIC SC_MAX
+
/* invalid send context index */
#define INVALID_SCI 0xff
@@ -195,7 +201,7 @@ struct sc_config_sizes {
* | mask | --/ |--------------------|
* |--------------------------| -/ | * |
* | actual_vls (max 8) | -/ |--------------------|
- * |--------------------------| --/ | ksc[n] -> sc n |
+ * |--------------------------| --/ | ksc[n-1] -> sc n |
* | vls (max 8) | -/ +--------------------+
* |--------------------------| --/
* | map[0] |-/
@@ -208,21 +214,21 @@ struct sc_config_sizes {
* |--------------------------| |--------------------|
* | map[vls - 1] |- | * |
* +--------------------------+ \- |--------------------|
- * \- | ksc[m] -> sc m+n |
+ * \- | ksc[m-1] -> sc m+n |
* \ +--------------------+
* \-
* \
- * \- +--------------------+
- * \- | mask |
- * \ |--------------------|
- * \- | ksc[0] -> sc 1+m+n |
- * \- |--------------------|
- * >| ksc[1] -> sc 2+m+n |
- * |--------------------|
- * | * |
- * |--------------------|
- * | ksc[o] -> sc o+m+n |
- * +--------------------+
+ * \- +----------------------+
+ * \- | mask |
+ * \ |----------------------|
+ * \- | ksc[0] -> sc 1+m+n |
+ * \- |----------------------|
+ * >| ksc[1] -> sc 2+m+n |
+ * |----------------------|
+ * | * |
+ * |----------------------|
+ * | ksc[o-1] -> sc o+m+n |
+ * +----------------------+
*
*/
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index c4ebd09..4573e4c 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -294,7 +294,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
ah = ibah_to_rvtah(wqe->ud_wr.ah);
if (wqe->length > (1 << ah->log_pmtu))
return -EINVAL;
- if (ibp->sl_to_sc[ah->attr.sl] == 0xf)
+ if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf)
return -EINVAL;
default:
break;
@@ -631,8 +631,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
qp->s_tail, qp->s_head, qp->s_size,
qp->s_avail,
qp->remote_qpn,
- qp->remote_ah_attr.dlid,
- qp->remote_ah_attr.sl,
+ rdma_ah_get_dlid(&qp->remote_ah_attr),
+ rdma_ah_get_sl(&qp->remote_ah_attr),
qp->pmtu,
qp->s_retry,
qp->s_retry_cnt,
@@ -748,7 +748,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp)
qp->s_mig_state = IB_MIG_MIGRATED;
qp->remote_ah_attr = qp->alt_ah_attr;
- qp->port_num = qp->alt_ah_attr.port_num;
+ qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
qp->s_pkey_index = qp->s_alt_pkey_index;
qp->s_flags |= RVT_S_AHG_CLEAR;
priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
@@ -778,7 +778,7 @@ u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
u8 sc, vl;
ibp = &dd->pport[qp->port_num - 1].ibport_data;
- sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ sc = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
vl = sc_to_vlt(dd, sc);
mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu);
@@ -861,7 +861,7 @@ void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl)
if (qp->port_num == ppd->port &&
(qp->ibqp.qp_type == IB_QPT_UC ||
qp->ibqp.qp_type == IB_QPT_RC) &&
- qp->remote_ah_attr.sl == sl &&
+ rdma_ah_get_sl(&qp->remote_ah_attr) == sl &&
(ib_rvt_state_ops[qp->state] &
RVT_POST_SEND_OK)) {
spin_lock_irq(&qp->r_lock);
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 7382be1..75a729c 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -274,7 +274,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
goto bail_no_tx;
ohdr = &ps->s_txreq->phdr.hdr.u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
/* Sending responses has higher priority over sending requests. */
@@ -744,9 +744,10 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
/* Construct the header */
/* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */
hwords = 6;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+ if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
hwords += hfi1_make_grh(ibp, &hdr.u.l.grh,
- &qp->remote_ah_attr.grh, hwords, 0);
+ rdma_ah_read_grh(&qp->remote_ah_attr),
+ hwords, 0);
ohdr = &hdr.u.l.oth;
lrh0 = HFI1_LRH_GRH;
} else {
@@ -763,17 +764,19 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
IB_AETH_CREDIT_SHIFT));
else
ohdr->u.aeth = rvt_compute_aeth(qp);
- sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT);
- lrh0 |= (sc5 & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
+ lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr)
+ & 0xf) << 4;
hdr.lrh[0] = cpu_to_be16(lrh0);
- hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ hdr.lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr));
hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
- hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
+ hdr.lrh[3] = cpu_to_be16(ppd->lid |
+ rdma_ah_get_path_bits(&qp->remote_ah_attr));
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
- ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << HFI1_BECN_SHIFT);
+ ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << IB_BECN_SHIFT);
ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn));
/* Don't try to send ACKs if the link isn't ACTIVE */
@@ -994,12 +997,12 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
return;
/* Find out where the BTH is */
- if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH)
+ if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
ohdr = &hdr->u.oth;
else
ohdr = &hdr->u.l.oth;
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ opcode = ib_bth_get_opcode(ohdr);
if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
WARN_ON(!qp->s_rdma_ack_cnt);
@@ -1028,13 +1031,17 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break;
s_last = qp->s_last;
+ trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
rvt_put_swqe(wqe);
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before re-sending,
@@ -1076,12 +1083,16 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
rvt_put_swqe(wqe);
s_last = qp->s_last;
+ trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
} else {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -1092,10 +1103,11 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
*/
if (ppd->dd->flags & HFI1_HAS_SEND_DMA) {
struct sdma_engine *engine;
+ u8 sl = rdma_ah_get_sl(&qp->remote_ah_attr);
u8 sc5;
/* For now use sc to find engine */
- sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ sc5 = ibp->sl_to_sc[sl];
engine = qp_to_sdma_engine(qp, sc5);
sdma_engine_progress_schedule(engine);
}
@@ -1516,7 +1528,7 @@ read_middle:
if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
goto ack_done;
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/*
* Check that the data size is >= 0 && <= pmtu.
* Remember to account for ICRC (4).
@@ -1540,7 +1552,7 @@ read_middle:
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
goto ack_op_err;
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/*
* Check that the data size is >= 1 && <= pmtu.
* Remember to account for ICRC (4).
@@ -1922,7 +1934,8 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
int diff;
struct ib_reth *reth;
unsigned long flags;
- int ret, is_fecn = 0;
+ int ret;
+ bool is_fecn = false;
bool copy_last = false;
u32 rkey;
@@ -1934,7 +1947,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
is_fecn = process_ecn(qp, packet, false);
psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ opcode = ib_bth_get_opcode(ohdr);
/*
* Process responses (ACKs) before anything else. Note that the
@@ -2065,7 +2078,7 @@ no_immediate_data:
wc.ex.imm_data = 0;
send_last:
/* Get the number of bytes the message was padded by. */
- pad = (bth0 >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -2089,7 +2102,7 @@ send_last:
wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
/*
* It seems that IB mandates the presence of an SL in a
* work completion only for the UD transport (see section
@@ -2101,7 +2114,7 @@ send_last:
*
* See also OPA Vol. 1, section 9.7.6, and table 9-17.
*/
- wc.sl = qp->remote_ah_attr.sl;
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
@@ -2378,7 +2391,7 @@ void hfi1_rc_hdrerr(
return;
psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ opcode = ib_bth_get_opcode(ohdr);
/* Only deal with RDMA Writes for now */
if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index aa15bcb..891ba0a 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -219,72 +219,84 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
{
__be64 guid;
unsigned long flags;
- u8 sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
if (!has_grh) {
- if (qp->alt_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
+ IB_AH_GRH)
goto err;
} else {
- if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
+ const struct ib_global_route *grh;
+
+ if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
+ IB_AH_GRH))
goto err;
- guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
+ grh = rdma_ah_read_grh(&qp->alt_ah_attr);
+ guid = get_sguid(ibp, grh->sgid_index);
if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
guid))
goto err;
if (!gid_ok(
&hdr->u.l.grh.sgid,
- qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
- qp->alt_ah_attr.grh.dgid.global.interface_id))
+ grh->dgid.global.subnet_prefix,
+ grh->dgid.global.interface_id))
goto err;
}
- if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
- sc5, be16_to_cpu(hdr->lrh[3])))) {
+ if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
+ ib_get_slid(hdr)))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
(u16)bth0,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ ib_get_sl(hdr),
0, qp->ibqp.qp_num,
- be16_to_cpu(hdr->lrh[3]),
- be16_to_cpu(hdr->lrh[1]));
+ ib_get_slid(hdr),
+ ib_get_dlid(hdr));
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
- if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
- ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
+ if (ib_get_slid(hdr) !=
+ rdma_ah_get_dlid(&qp->alt_ah_attr) ||
+ ppd_from_ibp(ibp)->port !=
+ rdma_ah_get_port_num(&qp->alt_ah_attr))
goto err;
spin_lock_irqsave(&qp->s_lock, flags);
hfi1_migrate_qp(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else {
if (!has_grh) {
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
+ IB_AH_GRH)
goto err;
} else {
- if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH))
+ const struct ib_global_route *grh;
+
+ if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
+ IB_AH_GRH))
goto err;
- guid = get_sguid(ibp,
- qp->remote_ah_attr.grh.sgid_index);
+ grh = rdma_ah_read_grh(&qp->remote_ah_attr);
+ guid = get_sguid(ibp, grh->sgid_index);
if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
guid))
goto err;
if (!gid_ok(
&hdr->u.l.grh.sgid,
- qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
- qp->remote_ah_attr.grh.dgid.global.interface_id))
+ grh->dgid.global.subnet_prefix,
+ grh->dgid.global.interface_id))
goto err;
}
- if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
- sc5, be16_to_cpu(hdr->lrh[3])))) {
+ if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
+ ib_get_slid(hdr)))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
(u16)bth0,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ ib_get_sl(hdr),
0, qp->ibqp.qp_num,
- be16_to_cpu(hdr->lrh[3]),
- be16_to_cpu(hdr->lrh[1]));
+ ib_get_slid(hdr),
+ ib_get_dlid(hdr));
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 */
- if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid ||
+ if (ib_get_slid(hdr) !=
+ rdma_ah_get_dlid(&qp->remote_ah_attr) ||
ppd_from_ibp(ibp)->port != qp->port_num)
goto err;
if (qp->s_mig_state == IB_MIG_REARM &&
@@ -542,8 +554,8 @@ do_write:
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
- wc.sl = qp->remote_ah_attr.sl;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
@@ -637,7 +649,7 @@ done:
* Return the size of the header in 32 bit words.
*/
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
- struct ib_global_route *grh, u32 hwords, u32 nwords)
+ const struct ib_global_route *grh, u32 hwords, u32 nwords)
{
hdr->version_tclass_flow =
cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
@@ -731,15 +743,17 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
extra_bytes = -ps->s_txreq->s_cur_size & 3;
nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2;
lrh0 = HFI1_LRH_BTH;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
- qp->s_hdrwords += hfi1_make_grh(ibp,
- &ps->s_txreq->phdr.hdr.u.l.grh,
- &qp->remote_ah_attr.grh,
- qp->s_hdrwords, nwords);
+ if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
+ qp->s_hdrwords +=
+ hfi1_make_grh(ibp,
+ &ps->s_txreq->phdr.hdr.u.l.grh,
+ rdma_ah_read_grh(&qp->remote_ah_attr),
+ qp->s_hdrwords, nwords);
lrh0 = HFI1_LRH_GRH;
middle = 0;
}
- lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
+ lrh0 |= (priv->s_sc & 0xf) << 12 |
+ (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4;
/*
* reset s_ahg/AHG fields
*
@@ -763,11 +777,13 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
else
qp->s_flags &= ~RVT_S_AHG_VALID;
ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
- ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ ps->s_txreq->phdr.hdr.lrh[1] =
+ cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr));
ps->s_txreq->phdr.hdr.lrh[2] =
cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
- qp->remote_ah_attr.src_path_bits);
+ ps->s_txreq->phdr.hdr.lrh[3] =
+ cpu_to_be16(ppd_from_ibp(ibp)->lid |
+ rdma_ah_get_path_bits(&qp->remote_ah_attr));
bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
bth0 |= extra_bytes << 20;
ohdr->bth[0] = cpu_to_be32(bth0);
@@ -775,7 +791,7 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
if (qp->s_flags & RVT_S_ECN) {
qp->s_flags &= ~RVT_S_ECN;
/* we recently received a FECN, so return a BECN */
- bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT);
+ bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT);
}
ohdr->bth[1] = cpu_to_be32(bth1);
ohdr->bth[2] = cpu_to_be32(bth2);
@@ -784,23 +800,29 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
/* when sending, force a reschedule every one of these periods */
#define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
+void hfi1_do_send_from_rvt(struct rvt_qp *qp)
+{
+ hfi1_do_send(qp, false);
+}
+
void _hfi1_do_send(struct work_struct *work)
{
struct iowait *wait = container_of(work, struct iowait, iowork);
struct rvt_qp *qp = iowait_to_qp(wait);
- hfi1_do_send(qp);
+ hfi1_do_send(qp, true);
}
/**
* hfi1_do_send - perform a send on a QP
* @work: contains a pointer to the QP
+ * @in_thread: true if in a workqueue thread
*
* Process entries in the send work queue until credit or queue is
* exhausted. Only allow one CPU to send a packet per QP.
* Otherwise, two threads could send packets out of order.
*/
-void hfi1_do_send(struct rvt_qp *qp)
+void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
{
struct hfi1_pkt_state ps;
struct hfi1_qp_priv *priv = qp->priv;
@@ -815,9 +837,9 @@ void hfi1_do_send(struct rvt_qp *qp)
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
- if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
- ) - 1)) ==
- ps.ppd->lid)) {
+ if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
+ ~((1 << ps.ppd->lmc) - 1)) ==
+ ps.ppd->lid)) {
ruc_loopback(qp);
return;
}
@@ -825,9 +847,9 @@ void hfi1_do_send(struct rvt_qp *qp)
timeout_int = (qp->timeout_jiffies);
break;
case IB_QPT_UC:
- if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
- ) - 1)) ==
- ps.ppd->lid)) {
+ if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
+ ~((1 << ps.ppd->lmc) - 1)) ==
+ ps.ppd->lid)) {
ruc_loopback(qp);
return;
}
@@ -868,8 +890,10 @@ void hfi1_do_send(struct rvt_qp *qp)
qp->s_hdrwords = 0;
/* allow other tasks to run */
if (unlikely(time_after(jiffies, timeout))) {
- if (workqueue_congested(cpu,
- ps.ppd->hfi1_wq)) {
+ if (!in_thread ||
+ workqueue_congested(
+ cpu,
+ ps.ppd->hfi1_wq)) {
spin_lock_irqsave(
&qp->s_lock,
ps.flags);
@@ -882,11 +906,9 @@ void hfi1_do_send(struct rvt_qp *qp)
*ps.ppd->dd->send_schedule);
return;
}
- if (!irqs_disabled()) {
- cond_resched();
- this_cpu_inc(
- *ps.ppd->dd->send_schedule);
- }
+ cond_resched();
+ this_cpu_inc(
+ *ps.ppd->dd->send_schedule);
timeout = jiffies + (timeout_int) / 8;
}
spin_lock_irqsave(&qp->s_lock, ps.flags);
@@ -909,8 +931,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
last = qp->s_last;
old_last = last;
+ trace_hfi1_qp_send_completion(qp, wqe, last);
if (++last >= qp->s_size)
last = 0;
+ trace_hfi1_qp_send_completion(qp, wqe, last);
qp->s_last = last;
/* See post_send() */
barrier();
@@ -920,7 +944,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
- rvt_qp_swqe_complete(qp, wqe, status);
+ rvt_qp_swqe_complete(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ status);
if (qp->s_acked == old_last)
qp->s_acked = last;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 5cde1ec..bfd0d51 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -868,7 +868,7 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
cpu_id = smp_processor_id();
rcu_read_lock();
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id,
sdma_rht_params);
if (rht_node && rht_node->map[vl]) {
@@ -962,7 +962,12 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
continue;
}
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ if (vl >= ARRAY_SIZE(rht_node->map)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params);
if (!rht_node) {
rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
@@ -982,7 +987,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
rht_node->map[vl]->ctr = 1;
rht_node->map[vl]->sde[0] = sde;
- ret = rhashtable_insert_fast(&dd->sdma_rht,
+ ret = rhashtable_insert_fast(dd->sdma_rht,
&rht_node->node,
sdma_rht_params);
if (ret) {
@@ -1025,7 +1030,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
if (cpumask_test_cpu(cpu, mask))
continue;
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params);
if (rht_node) {
bool empty = true;
@@ -1049,7 +1054,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
}
if (empty) {
- ret = rhashtable_remove_fast(&dd->sdma_rht,
+ ret = rhashtable_remove_fast(dd->sdma_rht,
&rht_node->node,
sdma_rht_params);
WARN_ON(ret);
@@ -1108,7 +1113,7 @@ void sdma_seqfile_dump_cpu_list(struct seq_file *s,
struct sdma_rht_node *rht_node;
int i, j;
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid,
sdma_rht_params);
if (!rht_node)
return;
@@ -1322,6 +1327,12 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
synchronize_rcu();
kfree(dd->per_sdma);
dd->per_sdma = NULL;
+
+ if (dd->sdma_rht) {
+ rhashtable_free_and_destroy(dd->sdma_rht, sdma_rht_free, NULL);
+ kfree(dd->sdma_rht);
+ dd->sdma_rht = NULL;
+ }
}
/**
@@ -1341,12 +1352,14 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
{
unsigned this_idx;
struct sdma_engine *sde;
+ struct rhashtable *tmp_sdma_rht;
u16 descq_cnt;
void *curr_head;
struct hfi1_pportdata *ppd = dd->pport + port;
u32 per_sdma_credits;
uint idle_cnt = sdma_idle_cnt;
size_t num_engines = dd->chip_sdma_engines;
+ int ret = -ENOMEM;
if (!HFI1_CAP_IS_KSET(SDMA)) {
HFI1_CAP_CLEAR(SDMA_AHG);
@@ -1378,7 +1391,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
/* alloc memory for array of send engines */
dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL);
if (!dd->per_sdma)
- return -ENOMEM;
+ return ret;
idle_cnt = ns_to_cclock(dd, idle_cnt);
if (!sdma_desct_intr)
@@ -1507,18 +1520,27 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
dd->flags |= HFI1_HAS_SEND_DMA;
dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0;
dd->num_sdma = num_engines;
- if (sdma_map_init(dd, port, ppd->vls_operational, NULL))
+ ret = sdma_map_init(dd, port, ppd->vls_operational, NULL);
+ if (ret < 0)
goto bail;
- if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params))
+ tmp_sdma_rht = kzalloc(sizeof(*tmp_sdma_rht), GFP_KERNEL);
+ if (!tmp_sdma_rht) {
+ ret = -ENOMEM;
goto bail;
+ }
+
+ ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
+ if (ret < 0)
+ goto bail;
+ dd->sdma_rht = tmp_sdma_rht;
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
return 0;
bail:
sdma_clean(dd, num_engines);
- return -ENOMEM;
+ return ret;
}
/**
@@ -1604,7 +1626,6 @@ void sdma_exit(struct hfi1_devdata *dd)
sdma_finalput(&sde->state);
}
sdma_clean(dd, dd->num_sdma);
- rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL);
}
/*
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 21f1e28..64f10b8 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -966,34 +966,34 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status);
* | mask | --/ |--------------------|
* |--------------------------| -/ | * |
* | actual_vls (max 8) | -/ |--------------------|
- * |--------------------------| --/ | sde[n] -> eng n |
+ * |--------------------------| --/ | sde[n-1] -> eng n |
* | vls (max 8) | -/ +--------------------+
* |--------------------------| --/
* | map[0] |-/
- * |--------------------------| +--------------------+
- * | map[1] |--- | mask |
- * |--------------------------| \---- |--------------------|
- * | * | \-- | sde[0] -> eng 1+n |
- * | * | \---- |--------------------|
- * | * | \->| sde[1] -> eng 2+n |
- * |--------------------------| |--------------------|
- * | map[vls - 1] |- | * |
- * +--------------------------+ \- |--------------------|
- * \- | sde[m] -> eng m+n |
- * \ +--------------------+
+ * |--------------------------| +---------------------+
+ * | map[1] |--- | mask |
+ * |--------------------------| \---- |---------------------|
+ * | * | \-- | sde[0] -> eng 1+n |
+ * | * | \---- |---------------------|
+ * | * | \->| sde[1] -> eng 2+n |
+ * |--------------------------| |---------------------|
+ * | map[vls - 1] |- | * |
+ * +--------------------------+ \- |---------------------|
+ * \- | sde[m-1] -> eng m+n |
+ * \ +---------------------+
* \-
* \
- * \- +--------------------+
- * \- | mask |
- * \ |--------------------|
- * \- | sde[0] -> eng 1+m+n|
- * \- |--------------------|
- * >| sde[1] -> eng 2+m+n|
- * |--------------------|
- * | * |
- * |--------------------|
- * | sde[o] -> eng o+m+n|
- * +--------------------+
+ * \- +----------------------+
+ * \- | mask |
+ * \ |----------------------|
+ * \- | sde[0] -> eng 1+m+n |
+ * \- |----------------------|
+ * >| sde[1] -> eng 2+m+n |
+ * |----------------------|
+ * | * |
+ * |----------------------|
+ * | sde[o-1] -> eng o+m+n|
+ * +----------------------+
*
*/
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 919a547..50d140d 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -542,7 +542,7 @@ static ssize_t show_nctxts(struct device *device,
* give a more accurate picture of total contexts available.
*/
return scnprintf(buf, PAGE_SIZE, "%u\n",
- min(dd->num_rcv_contexts - dd->first_user_ctxt,
+ min(dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt,
(u32)dd->sc_sizes[SC_USER].count));
}
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index e86798a..eafae48 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -51,13 +51,12 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr)
{
struct ib_other_headers *ohdr;
u8 opcode;
- u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
- if (lnh == HFI1_LRH_BTH)
+ if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
ohdr = &hdr->u.oth;
else
ohdr = &hdr->u.l.oth;
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ opcode = ib_bth_get_opcode(ohdr);
return hdr_len_by_opcode[opcode] == 0 ?
0 : hdr_len_by_opcode[opcode] - (12 + 8);
}
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index 382fcda..090f6b5 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -139,11 +139,11 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
__entry->pkey =
be32_to_cpu(ohdr->bth[0]) & 0xffff;
__entry->f =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) &
- HFI1_FECN_MASK;
+ (be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) &
+ IB_FECN_MASK;
__entry->b =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) &
- HFI1_BECN_MASK;
+ (be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) &
+ IB_BECN_MASK;
__entry->qpn =
be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
__entry->a =
diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h
index d308454..deac77d 100644
--- a/drivers/infiniband/hw/hfi1/trace_misc.h
+++ b/drivers/infiniband/hw/hfi1/trace_misc.h
@@ -72,6 +72,54 @@ TRACE_EVENT(hfi1_interrupt,
__entry->src)
);
+#ifdef CONFIG_FAULT_INJECTION
+TRACE_EVENT(hfi1_fault_opcode,
+ TP_PROTO(struct rvt_qp *qp, u8 opcode),
+ TP_ARGS(qp, opcode),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u8, opcode)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->opcode = opcode;
+ ),
+ TP_printk("[%s] qpn 0x%x opcode 0x%x",
+ __get_str(dev), __entry->qpn, __entry->opcode)
+);
+
+TRACE_EVENT(hfi1_fault_packet,
+ TP_PROTO(struct hfi1_packet *packet),
+ TP_ARGS(packet),
+ TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->ppd->dd)
+ __field(u64, eflags)
+ __field(u32, ctxt)
+ __field(u32, hlen)
+ __field(u32, tlen)
+ __field(u32, updegr)
+ __field(u32, etail)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->ppd->dd);
+ __entry->eflags = rhf_err_flags(packet->rhf);
+ __entry->ctxt = packet->rcd->ctxt;
+ __entry->hlen = packet->hlen;
+ __entry->tlen = packet->tlen;
+ __entry->updegr = packet->updegr;
+ __entry->etail = rhf_egr_index(packet->rhf);
+ ),
+ TP_printk(
+ "[%s] ctxt %d eflags 0x%llx hlen %d tlen %d updegr %d etail %d",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->eflags,
+ __entry->hlen,
+ __entry->tlen,
+ __entry->updegr,
+ __entry->etail
+ )
+);
+#endif
+
#endif /* __HFI1_TRACE_MISC_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/trace_rc.h b/drivers/infiniband/hw/hfi1/trace_rc.h
index 5ea5005..8ce4765 100644
--- a/drivers/infiniband/hw/hfi1/trace_rc.h
+++ b/drivers/infiniband/hw/hfi1/trace_rc.h
@@ -1,5 +1,5 @@
/*
-* Copyright(c) 2015, 2016 Intel Corporation.
+* Copyright(c) 2015, 2016, 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -104,11 +104,6 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_ack,
TP_ARGS(qp, psn)
);
-DEFINE_EVENT(hfi1_rc_template, hfi1_timeout,
- TP_PROTO(struct rvt_qp *qp, u32 psn),
- TP_ARGS(qp, psn)
-);
-
DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h
index 415d6be..2c9ac57 100644
--- a/drivers/infiniband/hw/hfi1/trace_tx.h
+++ b/drivers/infiniband/hw/hfi1/trace_tx.h
@@ -633,6 +633,49 @@ DEFINE_EVENT(hfi1_bct_template, bct_get,
TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
TP_ARGS(dd, bc));
+TRACE_EVENT(
+ hfi1_qp_send_completion,
+ TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 idx),
+ TP_ARGS(qp, wqe, idx),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(struct rvt_swqe *, wqe)
+ __field(u64, wr_id)
+ __field(u32, qpn)
+ __field(u32, qpt)
+ __field(u32, length)
+ __field(u32, idx)
+ __field(u32, ssn)
+ __field(enum ib_wr_opcode, opcode)
+ __field(int, send_flags)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ __entry->wqe = wqe;
+ __entry->wr_id = wqe->wr.wr_id;
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->qpt = qp->ibqp.qp_type;
+ __entry->length = wqe->length;
+ __entry->idx = idx;
+ __entry->ssn = wqe->ssn;
+ __entry->opcode = wqe->wr.opcode;
+ __entry->send_flags = wqe->wr.send_flags;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x qpt %u wqe %p idx %u wr_id %llx length %u ssn %u opcode %x send_flags %x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->qpt,
+ __entry->wqe,
+ __entry->idx,
+ __entry->wr_id,
+ __entry->length,
+ __entry->ssn,
+ __entry->opcode,
+ __entry->send_flags
+ )
+);
+
#endif /* __HFI1_TRACE_TX_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 4b2a840..5da1e45 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -94,7 +94,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
ohdr = &ps->s_txreq->phdr.hdr.u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
/* Get the next send request. */
@@ -320,7 +320,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
process_ecn(qp, packet, true);
psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ opcode = ib_bth_get_opcode(ohdr);
/* Compare the PSN verses the expected PSN. */
if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) {
@@ -433,7 +433,7 @@ no_immediate_data:
wc.wc_flags = 0;
send_last:
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -451,7 +451,7 @@ last_imm:
wc.status = IB_WC_SUCCESS;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
/*
* It seems that IB mandates the presence of an SL in a
* work completion only for the UD transport (see section
@@ -463,7 +463,7 @@ last_imm:
*
* See also OPA Vol. 1, section 9.7.6, and table 9-17.
*/
- wc.sl = qp->remote_ah_attr.sl;
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
@@ -528,7 +528,7 @@ rdma_last_imm:
wc.wc_flags = IB_WC_WITH_IMM;
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -555,7 +555,7 @@ rdma_last_imm:
case OP(RDMA_WRITE_LAST):
rdma_last:
/* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 13ea4eb..6a4e95c 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -68,7 +68,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
struct hfi1_pportdata *ppd;
struct rvt_qp *qp;
- struct ib_ah_attr *ah_attr;
+ struct rdma_ah_attr *ah_attr;
unsigned long flags;
struct rvt_sge_state ssge;
struct rvt_sge *sge;
@@ -103,17 +103,17 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (qp->ibqp.qp_num > 1) {
u16 pkey;
u16 slid;
- u8 sc5 = ibp->sl_to_sc[ah_attr->sl];
+ u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
pkey = hfi1_get_pkey(ibp, sqp->s_pkey_index);
- slid = ppd->lid | (ah_attr->src_path_bits &
+ slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
if (unlikely(ingress_pkey_check(ppd, pkey, sc5,
qp->s_pkey_index, slid))) {
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, pkey,
- ah_attr->sl,
+ rdma_ah_get_sl(ah_attr),
sqp->ibqp.qp_num, qp->ibqp.qp_num,
- slid, ah_attr->dlid);
+ slid, rdma_ah_get_dlid(ah_attr));
goto drop;
}
}
@@ -131,13 +131,13 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (unlikely(qkey != qp->qkey)) {
u16 lid;
- lid = ppd->lid | (ah_attr->src_path_bits &
+ lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey,
- ah_attr->sl,
+ rdma_ah_get_sl(ah_attr),
sqp->ibqp.qp_num, qp->ibqp.qp_num,
lid,
- ah_attr->dlid);
+ rdma_ah_get_dlid(ah_attr));
goto drop;
}
}
@@ -183,11 +183,11 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
goto bail_unlock;
}
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
struct ib_grh grh;
- struct ib_global_route grd = ah_attr->grh;
+ const struct ib_global_route *grd = rdma_ah_read_grh(ah_attr);
- hfi1_make_grh(ibp, &grh, &grd, 0, 0);
+ hfi1_make_grh(ibp, &grh, grd, 0, 0);
hfi1_copy_sge(&qp->r_sge, &grh,
sizeof(grh), true, false);
wc.wc_flags |= IB_WC_GRH;
@@ -243,12 +243,13 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
} else {
wc.pkey_index = 0;
}
- wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
+ wc.slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1));
/* Check for loopback when the port lid is not set */
if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI)
wc.slid = be16_to_cpu(IB_LID_PERMISSIVE);
- wc.sl = ah_attr->sl;
- wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
+ wc.sl = rdma_ah_get_sl(ah_attr);
+ wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
@@ -272,7 +273,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
struct ib_other_headers *ohdr;
- struct ib_ah_attr *ah_attr;
+ struct rdma_ah_attr *ah_attr;
struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp;
struct rvt_swqe *wqe;
@@ -319,9 +320,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
- if (ah_attr->dlid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
- ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
- lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
+ if (rdma_ah_get_dlid(ah_attr) < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) {
+ lid = rdma_ah_get_dlid(ah_attr) & ~((1 << ppd->lmc) - 1);
if (unlikely(!loopback &&
(lid == ppd->lid ||
(lid == be16_to_cpu(IB_LID_PERMISSIVE) &&
@@ -356,7 +357,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_hdrwords = 7;
ps->s_txreq->s_cur_size = wqe->length;
ps->s_txreq->ss = &qp->s_sge;
- qp->s_srate = ah_attr->static_rate;
+ qp->s_srate = rdma_ah_get_static_rate(ah_attr);
qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
qp->s_wqe = wqe;
qp->s_sge.sge = wqe->sg_list[0];
@@ -364,11 +365,11 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_sge.total_len = wqe->length;
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
/* Header size in 32-bit words. */
qp->s_hdrwords += hfi1_make_grh(ibp,
&ps->s_txreq->phdr.hdr.u.l.grh,
- &ah_attr->grh,
+ rdma_ah_read_grh(ah_attr),
qp->s_hdrwords, nwords);
lrh0 = HFI1_LRH_GRH;
ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
@@ -388,8 +389,8 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
} else {
bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
}
- sc5 = ibp->sl_to_sc[ah_attr->sl];
- lrh0 |= (ah_attr->sl & 0xf) << 4;
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
+ lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4;
if (qp->ibqp.qp_type == IB_QPT_SMI) {
lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
priv->s_sc = 0xf;
@@ -402,15 +403,17 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
ps->s_txreq->psc = priv->s_sendcontext;
ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
- ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);
+ ps->s_txreq->phdr.hdr.lrh[1] =
+ cpu_to_be16(rdma_ah_get_dlid(ah_attr));
ps->s_txreq->phdr.hdr.lrh[2] =
cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
+ if (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) {
ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
} else {
lid = ppd->lid;
if (lid) {
- lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
+ lid |= rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1);
ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(lid);
} else {
ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
@@ -537,7 +540,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
bth0 = pkey | (IB_OPCODE_CNP << 24);
ohdr->bth[0] = cpu_to_be32(bth0);
- ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << HFI1_BECN_SHIFT));
+ ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT));
ohdr->bth[2] = 0; /* PSN 0 */
hdr.lrh[0] = cpu_to_be16(lrh0);
@@ -680,7 +683,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
bool has_grh = rcv_flags & HFI1_HAS_GRH;
- u8 sc5 = hdr2sc(hdr, packet->rhf);
+ u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf);
u32 bth1;
u8 sl_from_sc, sl;
u16 slid;
@@ -688,18 +691,16 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
- dlid = be16_to_cpu(hdr->lrh[1]);
+ dlid = ib_get_dlid(hdr);
bth1 = be32_to_cpu(ohdr->bth[1]);
- slid = be16_to_cpu(hdr->lrh[3]);
- pkey = (u16)be32_to_cpu(ohdr->bth[0]);
- sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
- extra_bytes = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ slid = ib_get_slid(hdr);
+ pkey = ib_bth_get_pkey(ohdr);
+ opcode = ib_bth_get_opcode(ohdr);
+ sl = ib_get_sl(hdr);
+ extra_bytes = ib_bth_get_pad(ohdr);
extra_bytes += (SIZE_OF_CRC << 2);
sl_from_sc = ibp->sc_to_sl[sc5];
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
- opcode &= 0xff;
-
process_ecn(qp, packet, (opcode != IB_OPCODE_CNP));
/*
* Get the number of bytes the message was padded by
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 4a82953..35c6e7e 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -200,8 +200,9 @@ int hfi1_user_exp_rcv_init(struct file *fp)
if (!HFI1_CAP_UGET_MASK(uctxt->flags, TID_UNMAP)) {
fd->invalid_tid_idx = 0;
- fd->invalid_tids = kzalloc(uctxt->expected_count *
- sizeof(u32), GFP_KERNEL);
+ fd->invalid_tids = kcalloc(uctxt->expected_count,
+ sizeof(*fd->invalid_tids),
+ GFP_KERNEL);
if (!fd->invalid_tids) {
ret = -ENOMEM;
goto done;
@@ -578,6 +579,9 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
u32 *tidinfo;
unsigned tididx;
+ if (unlikely(tinfo->tidcnt > fd->tid_used))
+ return -EINVAL;
+
tidinfo = memdup_user((void __user *)(unsigned long)tinfo->tidlist,
sizeof(tidinfo[0]) * tinfo->tidcnt);
if (IS_ERR(tidinfo))
@@ -607,7 +611,7 @@ int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo)
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
unsigned long *ev = uctxt->dd->events +
- (((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
+ (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
HFI1_MAX_SHARED_CTXTS) + fd->subctxt);
u32 *array;
int ret = 0;
@@ -1011,8 +1015,8 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
* process in question.
*/
ev = uctxt->dd->events +
- (((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
+ (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
+ HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
}
fdata->invalid_tid_idx++;
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 68295a1..e341e6d 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015-2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -73,7 +73,8 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
{
unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
size = (cache_size * (1UL << 20)); /* convert to bytes */
- unsigned usr_ctxts = dd->num_rcv_contexts - dd->first_user_ctxt;
+ unsigned int usr_ctxts =
+ dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
bool can_lock = capable(CAP_IPC_LOCK);
/*
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index e6811c4e..0749689 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -376,7 +376,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
{
struct hfi1_filedata *fd;
int ret = 0;
- unsigned memsize;
char buf[64];
struct hfi1_devdata *dd;
struct hfi1_user_sdma_comp_q *cq;
@@ -401,13 +400,15 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
if (!pq)
goto pq_nomem;
- memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size;
- pq->reqs = kzalloc(memsize, GFP_KERNEL);
+ pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
+ sizeof(*pq->reqs),
+ GFP_KERNEL);
if (!pq->reqs)
goto pq_reqs_nomem;
- memsize = BITS_TO_LONGS(hfi1_sdma_comp_ring_size) * sizeof(long);
- pq->req_in_use = kzalloc(memsize, GFP_KERNEL);
+ pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size),
+ sizeof(*pq->req_in_use),
+ GFP_KERNEL);
if (!pq->req_in_use)
goto pq_reqs_no_in_use;
@@ -442,8 +443,8 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
if (!cq)
goto cq_nomem;
- memsize = PAGE_ALIGN(sizeof(*cq->comps) * hfi1_sdma_comp_ring_size);
- cq->comps = vmalloc_user(memsize);
+ cq->comps = vmalloc_user(PAGE_ALIGN(sizeof(*cq->comps)
+ * hfi1_sdma_comp_ring_size));
if (!cq->comps)
goto cq_comps_nomem;
@@ -704,7 +705,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
/* Save all the IO vector structures */
for (i = 0; i < req->data_iovs; i++) {
INIT_LIST_HEAD(&req->iovs[i].list);
- memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
+ memcpy(&req->iovs[i].iov,
+ iovec + idx++,
+ sizeof(req->iovs[i].iov));
ret = pin_vector_pages(req, &req->iovs[i]);
if (ret) {
req->status = ret;
@@ -1615,9 +1618,10 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
{
hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
- cq->comps[idx].status = state;
if (state == ERROR)
cq->comps[idx].errcode = -ret;
+ smp_wmb(); /* make sure errcode is visible first */
+ cq->comps[idx].status = state;
trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
idx, state, ret);
}
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 222315f..90e7b77 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -60,6 +60,8 @@
#include "trace.h"
#include "qp.h"
#include "verbs_txreq.h"
+#include "debugfs.h"
+#include "vnic.h"
static unsigned int hfi1_lkey_table_size = 16;
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -297,6 +299,22 @@ static inline bool wss_exceeds_threshold(void)
}
/*
+ * Translate ib_wr_opcode into ib_wc_opcode.
+ */
+const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
+ [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [IB_WR_SEND] = IB_WC_SEND,
+ [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+ [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
+ [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
+ [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
+ [IB_WR_REG_MR] = IB_WC_REG_MR
+};
+
+/*
* Length of header by opcode, 0 --> not supported
*/
const u8 hdr_len_by_opcode[256] = {
@@ -501,6 +519,35 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
return NULL;
}
+static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
+{
+#ifdef CONFIG_FAULT_INJECTION
+ if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
+ /*
+ * In order to drop non-IB traffic we
+ * set PbcInsertHrc to NONE (0x2).
+ * The packet will still be delivered
+ * to the receiving node but a
+ * KHdrHCRCErr (KDETH packet with a bad
+ * HCRC) will be triggered and the
+ * packet will not be delivered to the
+ * correct context.
+ */
+ pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
+ else
+ /*
+ * In order to drop regular verbs
+ * traffic we set the PbcTestEbp
+ * flag. The packet will still be
+ * delivered to the receiving node but
+ * a 'late ebp error' will be
+ * triggered and will be dropped.
+ */
+ pbc |= PBC_TEST_EBP;
+#endif
+ return pbc;
+}
+
/**
* hfi1_ib_rcv - process an incoming packet
* @packet: data packet information
@@ -525,7 +572,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
u16 lid;
/* Check for GRH */
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
} else if (lnh == HFI1_LRH_GRH) {
@@ -544,12 +591,12 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
trace_input_ibhdr(rcd->dd, hdr);
- opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
+ opcode = ib_bth_get_opcode(packet->ohdr);
inc_opstats(tlen, &rcd->opstats->stats[opcode]);
/* Get the destination QP number. */
qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK;
- lid = be16_to_cpu(hdr->lrh[1]);
+ lid = ib_get_dlid(hdr);
if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
(lid != be16_to_cpu(IB_LID_PERMISSIVE)))) {
struct rvt_mcast *mcast;
@@ -557,7 +604,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
if (lnh != HFI1_LRH_GRH)
goto drop;
- mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
+ mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid);
if (!mcast)
goto drop;
list_for_each_entry_rcu(p, &mcast->qp_list, list) {
@@ -583,6 +630,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
rcu_read_unlock();
goto drop;
}
+ if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode,
+ true))) {
+ rcu_read_unlock();
+ goto drop;
+ }
spin_lock_irqsave(&packet->qp->r_lock, flags);
packet_handler = qp_ok(opcode, packet);
if (likely(packet_handler))
@@ -781,7 +833,6 @@ static int build_verbs_tx_desc(
if (ret)
goto bail_txadd;
}
-
/* add the ulp payload - if any. tx->ss can be NULL for acks */
if (tx->ss)
ret = build_verbs_ulp_payload(sde, length, tx);
@@ -800,7 +851,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_ibdev *dev = ps->dev;
struct hfi1_pportdata *ppd = ps->ppd;
struct verbs_txreq *tx;
- u64 pbc_flags = 0;
u8 sc5 = priv->s_sc;
int ret;
@@ -809,12 +859,16 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (!sdma_txreq_built(&tx->txreq)) {
if (likely(pbc == 0)) {
u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+ u8 opcode = get_opcode(&tx->phdr.hdr);
+
/* No vl15 here */
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
+ pbc = hfi1_fault_tx(qp, opcode, pbc);
pbc = create_pbc(ppd,
- pbc_flags,
+ pbc,
qp->srate_mbps,
vl,
plen);
@@ -917,7 +971,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u32 plen = hdrwords + dwords + 2; /* includes pbc */
struct hfi1_pportdata *ppd = ps->ppd;
u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
- u64 pbc_flags = 0;
u8 sc5;
unsigned long flags = 0;
struct send_context *sc;
@@ -942,9 +995,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (likely(pbc == 0)) {
u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+ struct verbs_txreq *tx = ps->s_txreq;
+ u8 opcode = get_opcode(&tx->phdr.hdr);
+
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
- pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
+ pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
+ pbc = hfi1_fault_tx(qp, opcode, pbc);
+ pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
}
if (cb)
iowait_pio_inc(&priv->s_iowait);
@@ -1173,7 +1231,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
hdr = &ps->s_txreq->phdr.hdr;
/* locate the pkey within the headers */
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_GRH)
ohdr = &hdr->u.l.oth;
else
@@ -1220,17 +1278,20 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
{
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
- u16 ver = dd->dc8051_ver;
+ u32 ver = dd->dc8051_ver;
memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
- rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) |
- (u64)dc8051_ver_min(ver);
+ rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 32) |
+ ((u64)(dc8051_ver_min(ver)) << 16) |
+ (u64)dc8051_ver_patch(ver);
+
rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
- IB_DEVICE_MEM_MGT_EXTENSIONS;
+ IB_DEVICE_MEM_MGT_EXTENSIONS |
+ IB_DEVICE_RDMA_NETDEV_OPA_VNIC;
rdi->dparms.props.page_size_cap = PAGE_SIZE;
rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
rdi->dparms.props.vendor_part_id = dd->pcidev->device;
@@ -1398,14 +1459,14 @@ static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
/*
* convert ah port,sl to sc
*/
-u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah)
+u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah)
{
- struct hfi1_ibport *ibp = to_iport(ibdev, ah->port_num);
+ struct hfi1_ibport *ibp = to_iport(ibdev, rdma_ah_get_port_num(ah));
- return ibp->sl_to_sc[ah->sl];
+ return ibp->sl_to_sc[rdma_ah_get_sl(ah)];
}
-static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
+static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
{
struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd;
@@ -1413,9 +1474,9 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
u8 sc5;
/* test the mapping for validity */
- ibp = to_iport(ibdev, ah_attr->port_num);
+ ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
ppd = ppd_from_ibp(ibp);
- sc5 = ibp->sl_to_sc[ah_attr->sl];
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
dd = dd_from_ppd(ppd);
if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
return -EINVAL;
@@ -1423,7 +1484,7 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
}
static void hfi1_notify_new_ah(struct ib_device *ibdev,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct rvt_ah *ah)
{
struct hfi1_ibport *ibp;
@@ -1436,9 +1497,9 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev,
* done being setup. We can however modify things which we need to set.
*/
- ibp = to_iport(ibdev, ah_attr->port_num);
+ ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
ppd = ppd_from_ibp(ibp);
- sc5 = ibp->sl_to_sc[ah->attr.sl];
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)];
dd = dd_from_ppd(ppd);
ah->vl = sc_to_vlt(dd, sc5);
if (ah->vl < num_vls || ah->vl == 15)
@@ -1447,17 +1508,21 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev,
struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid)
{
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
struct ib_ah *ah = ERR_PTR(-EINVAL);
struct rvt_qp *qp0;
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ struct hfi1_devdata *dd = dd_from_ppd(ppd);
+ u8 port_num = ppd->port;
memset(&attr, 0, sizeof(attr));
- attr.dlid = dlid;
- attr.port_num = ppd_from_ibp(ibp)->port;
+ attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
+ rdma_ah_set_dlid(&attr, dlid);
+ rdma_ah_set_port_num(&attr, ppd_from_ibp(ibp)->port);
rcu_read_lock();
qp0 = rcu_dereference(ibp->rvp.qp[0]);
if (qp0)
- ah = ib_create_ah(qp0->ibqp.pd, &attr);
+ ah = rdma_create_ah(qp0->ibqp.pd, &attr);
rcu_read_unlock();
return ah;
}
@@ -1504,10 +1569,10 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str,
{
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
struct hfi1_ibdev *dev = dev_from_rdi(rdi);
- u16 ver = dd_from_dev(dev)->dc8051_ver;
+ u32 ver = dd_from_dev(dev)->dc8051_ver;
- snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver),
- dc8051_ver_min(ver));
+ snprintf(str, str_len, "%u.%u.%u", dc8051_ver_maj(ver),
+ dc8051_ver_min(ver), dc8051_ver_patch(ver));
}
static const char * const driver_cntr_names[] = {
@@ -1524,6 +1589,7 @@ static const char * const driver_cntr_names[] = {
"DRIVER_EgrHdrFull"
};
+static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */
static const char **dev_cntr_names;
static const char **port_cntr_names;
static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
@@ -1578,6 +1644,7 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
{
int i, err;
+ mutex_lock(&cntr_names_lock);
if (!cntr_names_initialized) {
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -1586,8 +1653,10 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
num_driver_cntrs,
&num_dev_cntrs,
&dev_cntr_names);
- if (err)
+ if (err) {
+ mutex_unlock(&cntr_names_lock);
return NULL;
+ }
for (i = 0; i < num_driver_cntrs; i++)
dev_cntr_names[num_dev_cntrs + i] =
@@ -1601,10 +1670,12 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
if (err) {
kfree(dev_cntr_names);
dev_cntr_names = NULL;
+ mutex_unlock(&cntr_names_lock);
return NULL;
}
cntr_names_initialized = 1;
}
+ mutex_unlock(&cntr_names_lock);
if (!port_num)
return rdma_alloc_hw_stats_struct(
@@ -1707,6 +1778,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
ibdev->modify_device = modify_device;
ibdev->alloc_hw_stats = alloc_hw_stats;
ibdev->get_hw_stats = get_hw_stats;
+ ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn;
+ ibdev->free_rdma_netdev = hfi1_vnic_free_rn;
/* keep process mad in the driver */
ibdev->process_mad = hfi1_process_mad;
@@ -1751,7 +1824,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
- dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send;
+ dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send_from_rvt;
dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send;
dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send;
dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr;
@@ -1823,9 +1896,13 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
del_timer_sync(&dev->mem_timer);
verbs_txreq_exit(dev);
+ mutex_lock(&cntr_names_lock);
kfree(dev_cntr_names);
kfree(port_cntr_names);
+ dev_cntr_names = NULL;
+ port_cntr_names = NULL;
cntr_names_initialized = 0;
+ mutex_unlock(&cntr_names_lock);
}
void hfi1_cnp_rcv(struct hfi1_packet *packet)
@@ -1840,12 +1917,12 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
switch (packet->qp->ibqp.qp_type) {
case IB_QPT_UC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_UC;
break;
case IB_QPT_RC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_RC;
break;
@@ -1859,7 +1936,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
return;
}
- sc5 = hdr2sc(hdr, packet->rhf);
+ sc5 = hfi1_9B_get_sc5(hdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = qp->ibqp.qp_num;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 3a0b589..52ff275 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -195,6 +195,11 @@ struct hfi1_ibdev {
struct dentry *hfi1_ibdev_dbg;
/* per HFI symlinks to above */
struct dentry *hfi1_ibdev_link;
+#ifdef CONFIG_FAULT_INJECTION
+ struct fault_opcode *fault_opcode;
+ struct fault_packet *fault_packet;
+ bool fault_suppress_err;
+#endif
#endif
};
@@ -303,7 +308,7 @@ void hfi1_rc_hdrerr(
u32 rcv_flags,
struct rvt_qp *qp);
-u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid);
@@ -342,7 +347,7 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0);
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
- struct ib_global_route *grh, u32 hwords, u32 nwords);
+ const struct ib_global_route *grh, u32 hwords, u32 nwords);
void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
@@ -350,7 +355,9 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
void _hfi1_do_send(struct work_struct *work);
-void hfi1_do_send(struct rvt_qp *qp);
+void hfi1_do_send_from_rvt(struct rvt_qp *qp);
+
+void hfi1_do_send(struct rvt_qp *qp, bool in_thread);
void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status);
diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h
new file mode 100644
index 0000000..e2c4552
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -0,0 +1,184 @@
+#ifndef _HFI1_VNIC_H
+#define _HFI1_VNIC_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/opa_vnic.h>
+#include "hfi.h"
+#include "sdma.h"
+
+#define HFI1_VNIC_MAX_TXQ 16
+#define HFI1_VNIC_MAX_PAD 12
+
+/* L2 header definitions */
+#define HFI1_L2_TYPE_OFFSET 0x7
+#define HFI1_L2_TYPE_SHFT 0x5
+#define HFI1_L2_TYPE_MASK 0x3
+
+#define HFI1_GET_L2_TYPE(hdr) \
+ ((*((u8 *)(hdr) + HFI1_L2_TYPE_OFFSET) >> HFI1_L2_TYPE_SHFT) & \
+ HFI1_L2_TYPE_MASK)
+
+/* L4 type definitions */
+#define HFI1_L4_TYPE_OFFSET 8
+
+#define HFI1_GET_L4_TYPE(data) \
+ (*((u8 *)(data) + HFI1_L4_TYPE_OFFSET))
+
+/* L4 header definitions */
+#define HFI1_VNIC_L4_HDR_OFFSET OPA_VNIC_L2_HDR_LEN
+
+#define HFI1_VNIC_GET_L4_HDR(data) \
+ (*((u16 *)((u8 *)(data) + HFI1_VNIC_L4_HDR_OFFSET)))
+
+#define HFI1_VNIC_GET_VESWID(data) \
+ (HFI1_VNIC_GET_L4_HDR(data) & 0xFFF)
+
+/* Service class */
+#define HFI1_VNIC_SC_OFFSET_LOW 6
+#define HFI1_VNIC_SC_OFFSET_HI 7
+#define HFI1_VNIC_SC_SHIFT 4
+
+#define HFI1_VNIC_MAX_QUEUE 16
+
+/**
+ * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
+ * @dd - device data pointer
+ * @sde - sdma engine
+ * @vinfo - vnic info pointer
+ * @wait - iowait structure
+ * @stx - sdma tx request
+ * @state - vnic Tx ring SDMA state
+ * @q_idx - vnic Tx queue index
+ */
+struct hfi1_vnic_sdma {
+ struct hfi1_devdata *dd;
+ struct sdma_engine *sde;
+ struct hfi1_vnic_vport_info *vinfo;
+ struct iowait wait;
+ struct sdma_txreq stx;
+ unsigned int state;
+ u8 q_idx;
+};
+
+/**
+ * struct hfi1_vnic_rx_queue - HFI1 VNIC receive queue
+ * @idx: queue index
+ * @vinfo: pointer to vport information
+ * @netdev: network device
+ * @napi: netdev napi structure
+ * @skbq: queue of received socket buffers
+ */
+struct hfi1_vnic_rx_queue {
+ u8 idx;
+ struct hfi1_vnic_vport_info *vinfo;
+ struct net_device *netdev;
+ struct napi_struct napi;
+ struct sk_buff_head skbq;
+};
+
+/**
+ * struct hfi1_vnic_vport_info - HFI1 VNIC virtual port information
+ * @dd: device data pointer
+ * @netdev: net device pointer
+ * @flags: state flags
+ * @lock: vport lock
+ * @num_tx_q: number of transmit queues
+ * @num_rx_q: number of receive queues
+ * @vesw_id: virtual switch id
+ * @rxq: Array of receive queues
+ * @stats: per queue stats
+ * @sdma: VNIC SDMA structure per TXQ
+ */
+struct hfi1_vnic_vport_info {
+ struct hfi1_devdata *dd;
+ struct net_device *netdev;
+ unsigned long flags;
+
+ /* Lock used around state updates */
+ struct mutex lock;
+
+ u8 num_tx_q;
+ u8 num_rx_q;
+ u16 vesw_id;
+ struct hfi1_vnic_rx_queue rxq[HFI1_NUM_VNIC_CTXT];
+
+ struct opa_vnic_stats stats[HFI1_VNIC_MAX_QUEUE];
+ struct hfi1_vnic_sdma sdma[HFI1_VNIC_MAX_TXQ];
+};
+
+#define v_dbg(format, arg...) \
+ netdev_dbg(vinfo->netdev, format, ## arg)
+#define v_err(format, arg...) \
+ netdev_err(vinfo->netdev, format, ## arg)
+#define v_info(format, arg...) \
+ netdev_info(vinfo->netdev, format, ## arg)
+
+/* vnic hfi1 internal functions */
+void hfi1_vnic_setup(struct hfi1_devdata *dd);
+void hfi1_vnic_cleanup(struct hfi1_devdata *dd);
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
+
+void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet);
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo);
+bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx);
+
+/* vnic rdma netdev operations */
+struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
+ u8 port_num,
+ enum rdma_netdev_t type,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *));
+void hfi1_vnic_free_rn(struct net_device *netdev);
+int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen);
+
+#endif /* _HFI1_VNIC_H */
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
new file mode 100644
index 0000000..392f4d5
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -0,0 +1,907 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for VNIC functionality
+ */
+
+#include <linux/io.h>
+#include <linux/if_vlan.h>
+
+#include "vnic.h"
+
+#define HFI_TX_TIMEOUT_MS 1000
+
+#define HFI1_VNIC_RCV_Q_SIZE 1024
+
+#define HFI1_VNIC_UP 0
+
+static DEFINE_SPINLOCK(vport_cntr_lock);
+
+static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
+{
+ unsigned int rcvctrl_ops = 0;
+ int ret;
+
+ ret = hfi1_init_ctxt(uctxt->sc);
+ if (ret)
+ goto done;
+
+ uctxt->do_interrupt = &handle_receive_interrupt;
+
+ /* Now allocate the RcvHdr queue and eager buffers. */
+ ret = hfi1_create_rcvhdrq(dd, uctxt);
+ if (ret)
+ goto done;
+
+ ret = hfi1_setup_eagerbufs(uctxt);
+ if (ret)
+ goto done;
+
+ set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags);
+
+ if (uctxt->rcvhdrtail_kvaddr)
+ clear_rcvhdrtail(uctxt);
+
+ rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
+ rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
+
+ if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
+ rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
+ rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
+ rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
+ rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
+
+ hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
+
+ uctxt->is_vnic = true;
+done:
+ return ret;
+}
+
+static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata **vnic_ctxt)
+{
+ struct hfi1_ctxtdata *uctxt;
+ unsigned int ctxt;
+ int ret;
+
+ if (dd->flags & HFI1_FROZEN)
+ return -EIO;
+
+ for (ctxt = dd->first_dyn_alloc_ctxt;
+ ctxt < dd->num_rcv_contexts; ctxt++)
+ if (!dd->rcd[ctxt])
+ break;
+
+ if (ctxt == dd->num_rcv_contexts)
+ return -EBUSY;
+
+ uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, dd->node);
+ if (!uctxt) {
+ dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
+ return -ENOMEM;
+ }
+
+ uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
+ HFI1_CAP_KGET(NODROP_RHQ_FULL) |
+ HFI1_CAP_KGET(NODROP_EGR_FULL) |
+ HFI1_CAP_KGET(DMA_RTAIL);
+ uctxt->seq_cnt = 1;
+
+ /* Allocate and enable a PIO send context */
+ uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize,
+ uctxt->numa_id);
+
+ ret = uctxt->sc ? 0 : -ENOMEM;
+ if (ret)
+ goto bail;
+
+ dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n",
+ uctxt->sc->sw_index, uctxt->sc->hw_context);
+ ret = sc_enable(uctxt->sc);
+ if (ret)
+ goto bail;
+
+ if (dd->num_msix_entries)
+ hfi1_set_vnic_msix_info(uctxt);
+
+ hfi1_stats.sps_ctxts++;
+ dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
+ *vnic_ctxt = uctxt;
+
+ return ret;
+bail:
+ /*
+ * hfi1_free_ctxtdata() also releases send_context
+ * structure if uctxt->sc is not null
+ */
+ dd->rcd[uctxt->ctxt] = NULL;
+ hfi1_free_ctxtdata(dd, uctxt);
+ dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret);
+ return ret;
+}
+
+static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata *uctxt)
+{
+ unsigned long flags;
+
+ dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
+ flush_wc();
+
+ if (dd->num_msix_entries)
+ hfi1_reset_vnic_msix_info(uctxt);
+
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ /*
+ * Disable receive context and interrupt available, reset all
+ * RcvCtxtCtrl bits to default values.
+ */
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
+ HFI1_RCVCTRL_TIDFLOW_DIS |
+ HFI1_RCVCTRL_INTRAVAIL_DIS |
+ HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
+ HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
+ HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
+ /*
+ * VNIC contexts are allocated from user context pool.
+ * Release them back to user context pool.
+ *
+ * Reset context integrity checks to default.
+ * (writes to CSRs probably belong in chip.c)
+ */
+ write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
+ hfi1_pkt_default_send_ctxt_mask(dd, SC_USER));
+ sc_disable(uctxt->sc);
+
+ dd->send_contexts[uctxt->sc->sw_index].type = SC_USER;
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+
+ dd->rcd[uctxt->ctxt] = NULL;
+ uctxt->event_flags = 0;
+
+ hfi1_clear_tids(uctxt);
+ hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
+
+ hfi1_stats.sps_ctxts--;
+ hfi1_free_ctxtdata(dd, uctxt);
+}
+
+void hfi1_vnic_setup(struct hfi1_devdata *dd)
+{
+ idr_init(&dd->vnic.vesw_idr);
+}
+
+void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
+{
+ idr_destroy(&dd->vnic.vesw_idr);
+}
+
+#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
+ u64 *src64, *dst64; \
+ for (src64 = &qstats->x_grp.unicast, \
+ dst64 = &stats->x_grp.unicast; \
+ dst64 <= &stats->x_grp.s_1519_max;) { \
+ *dst64++ += *src64++; \
+ } \
+ } while (0)
+
+/* hfi1_vnic_update_stats - update statistics */
+static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
+ struct opa_vnic_stats *stats)
+{
+ struct net_device *netdev = vinfo->netdev;
+ u8 i;
+
+ /* add tx counters on different queues */
+ for (i = 0; i < vinfo->num_tx_q; i++) {
+ struct opa_vnic_stats *qstats = &vinfo->stats[i];
+ struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
+
+ stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
+ stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
+ stats->tx_drop_state += qstats->tx_drop_state;
+ stats->tx_dlid_zero += qstats->tx_dlid_zero;
+
+ SUM_GRP_COUNTERS(stats, qstats, tx_grp);
+ stats->netstats.tx_packets += qnstats->tx_packets;
+ stats->netstats.tx_bytes += qnstats->tx_bytes;
+ }
+
+ /* add rx counters on different queues */
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct opa_vnic_stats *qstats = &vinfo->stats[i];
+ struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
+
+ stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
+ stats->netstats.rx_nohandler += qnstats->rx_nohandler;
+ stats->rx_drop_state += qstats->rx_drop_state;
+ stats->rx_oversize += qstats->rx_oversize;
+ stats->rx_runt += qstats->rx_runt;
+
+ SUM_GRP_COUNTERS(stats, qstats, rx_grp);
+ stats->netstats.rx_packets += qnstats->rx_packets;
+ stats->netstats.rx_bytes += qnstats->rx_bytes;
+ }
+
+ stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
+ stats->netstats.tx_carrier_errors +
+ stats->tx_drop_state + stats->tx_dlid_zero;
+ stats->netstats.tx_dropped = stats->netstats.tx_errors;
+
+ stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
+ stats->netstats.rx_nohandler +
+ stats->rx_drop_state + stats->rx_oversize +
+ stats->rx_runt;
+ stats->netstats.rx_dropped = stats->netstats.rx_errors;
+
+ netdev->stats.tx_packets = stats->netstats.tx_packets;
+ netdev->stats.tx_bytes = stats->netstats.tx_bytes;
+ netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
+ netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
+ netdev->stats.tx_errors = stats->netstats.tx_errors;
+ netdev->stats.tx_dropped = stats->netstats.tx_dropped;
+
+ netdev->stats.rx_packets = stats->netstats.rx_packets;
+ netdev->stats.rx_bytes = stats->netstats.rx_bytes;
+ netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
+ netdev->stats.multicast = stats->rx_grp.mcastbcast;
+ netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
+ netdev->stats.rx_errors = stats->netstats.rx_errors;
+ netdev->stats.rx_dropped = stats->netstats.rx_dropped;
+}
+
+/* update_len_counters - update pkt's len histogram counters */
+static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
+ int len)
+{
+ /* account for 4 byte FCS */
+ if (len >= 1515)
+ grp->s_1519_max++;
+ else if (len >= 1020)
+ grp->s_1024_1518++;
+ else if (len >= 508)
+ grp->s_512_1023++;
+ else if (len >= 252)
+ grp->s_256_511++;
+ else if (len >= 124)
+ grp->s_128_255++;
+ else if (len >= 61)
+ grp->s_65_127++;
+ else
+ grp->s_64++;
+}
+
+/* hfi1_vnic_update_tx_counters - update transmit counters */
+static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx, struct sk_buff *skb, int err)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
+ struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
+ u16 vlan_tci;
+
+ stats->netstats.tx_packets++;
+ stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
+
+ update_len_counters(tx_grp, skb->len);
+
+ /* rest of the counts are for good packets only */
+ if (unlikely(err))
+ return;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ tx_grp->mcastbcast++;
+ else
+ tx_grp->unicast++;
+
+ if (!__vlan_get_tag(skb, &vlan_tci))
+ tx_grp->vlan++;
+ else
+ tx_grp->untagged++;
+}
+
+/* hfi1_vnic_update_rx_counters - update receive counters */
+static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx, struct sk_buff *skb, int err)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
+ struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
+ struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
+ u16 vlan_tci;
+
+ stats->netstats.rx_packets++;
+ stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
+
+ update_len_counters(rx_grp, skb->len);
+
+ /* rest of the counts are for good packets only */
+ if (unlikely(err))
+ return;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ rx_grp->mcastbcast++;
+ else
+ rx_grp->unicast++;
+
+ if (!__vlan_get_tag(skb, &vlan_tci))
+ rx_grp->vlan++;
+ else
+ rx_grp->untagged++;
+}
+
+/* This function is overloaded for opa_vnic specific implementation */
+static void hfi1_vnic_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ hfi1_vnic_update_stats(vinfo, vstats);
+}
+
+static u64 create_bypass_pbc(u32 vl, u32 dw_len)
+{
+ u64 pbc;
+
+ pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
+ | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
+ | PBC_PACKET_BYPASS
+ | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
+ | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
+
+ return pbc;
+}
+
+/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
+static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx)
+{
+ netif_stop_subqueue(vinfo->netdev, q_idx);
+ if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
+ return;
+
+ netif_start_subqueue(vinfo->netdev, q_idx);
+}
+
+static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ u8 pad_len, q_idx = skb->queue_mapping;
+ struct hfi1_devdata *dd = vinfo->dd;
+ struct opa_vnic_skb_mdata *mdata;
+ u32 pkt_len, total_len;
+ int err = -EINVAL;
+ u64 pbc;
+
+ v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
+ if (unlikely(!netif_oper_up(netdev))) {
+ vinfo->stats[q_idx].tx_drop_state++;
+ goto tx_finish;
+ }
+
+ /* take out meta data */
+ mdata = (struct opa_vnic_skb_mdata *)skb->data;
+ skb_pull(skb, sizeof(*mdata));
+ if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
+ vinfo->stats[q_idx].tx_dlid_zero++;
+ goto tx_finish;
+ }
+
+ /* add tail padding (for 8 bytes size alignment) and icrc */
+ pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
+ pad_len += OPA_VNIC_ICRC_TAIL_LEN;
+
+ /*
+ * pkt_len is how much data we have to write, includes header and data.
+ * total_len is length of the packet in Dwords plus the PBC should not
+ * include the CRC.
+ */
+ pkt_len = (skb->len + pad_len) >> 2;
+ total_len = pkt_len + 2; /* PBC + packet */
+
+ pbc = create_bypass_pbc(mdata->vl, total_len);
+
+ skb_get(skb);
+ v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
+ err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
+ if (unlikely(err)) {
+ if (err == -ENOMEM)
+ vinfo->stats[q_idx].netstats.tx_fifo_errors++;
+ else if (err != -EBUSY)
+ vinfo->stats[q_idx].netstats.tx_carrier_errors++;
+ }
+ /* remove the header before updating tx counters */
+ skb_pull(skb, OPA_VNIC_HDR_LEN);
+
+ if (unlikely(err == -EBUSY)) {
+ hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_BUSY;
+ }
+
+tx_finish:
+ /* update tx counters */
+ hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+}
+
+static u16 hfi1_vnic_select_queue(struct net_device *netdev,
+ struct sk_buff *skb,
+ void *accel_priv,
+ select_queue_fallback_t fallback)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ struct opa_vnic_skb_mdata *mdata;
+ struct sdma_engine *sde;
+
+ mdata = (struct opa_vnic_skb_mdata *)skb->data;
+ sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
+ return sde->this_idx;
+}
+
+/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
+static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
+ struct sk_buff *skb)
+{
+ struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
+ int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
+ int rc = -EFAULT;
+
+ skb_pull(skb, OPA_VNIC_HDR_LEN);
+
+ /* Validate Packet length */
+ if (unlikely(skb->len > max_len))
+ vinfo->stats[rxq->idx].rx_oversize++;
+ else if (unlikely(skb->len < ETH_ZLEN))
+ vinfo->stats[rxq->idx].rx_runt++;
+ else
+ rc = 0;
+ return rc;
+}
+
+static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
+{
+ unsigned char *pad_info;
+ struct sk_buff *skb;
+
+ skb = skb_dequeue(&rxq->skbq);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* remove tail padding and icrc */
+ pad_info = skb->data + skb->len - 1;
+ skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
+ ((*pad_info) & 0x7)));
+
+ return skb;
+}
+
+/* hfi1_vnic_handle_rx - handle skb receive */
+static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
+ int *work_done, int work_to_do)
+{
+ struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
+ struct sk_buff *skb;
+ int rc;
+
+ while (1) {
+ if (*work_done >= work_to_do)
+ break;
+
+ skb = hfi1_vnic_get_skb(rxq);
+ if (unlikely(!skb))
+ break;
+
+ rc = hfi1_vnic_decap_skb(rxq, skb);
+ /* update rx counters */
+ hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
+ if (unlikely(rc)) {
+ dev_kfree_skb_any(skb);
+ continue;
+ }
+
+ skb_checksum_none_assert(skb);
+ skb->protocol = eth_type_trans(skb, rxq->netdev);
+
+ napi_gro_receive(&rxq->napi, skb);
+ (*work_done)++;
+ }
+}
+
+/* hfi1_vnic_napi - napi receive polling callback function */
+static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
+{
+ struct hfi1_vnic_rx_queue *rxq = container_of(napi,
+ struct hfi1_vnic_rx_queue, napi);
+ struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
+ int work_done = 0;
+
+ v_dbg("napi %d budget %d\n", rxq->idx, budget);
+ hfi1_vnic_handle_rx(rxq, &work_done, budget);
+
+ v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
+ if (work_done < budget)
+ napi_complete(napi);
+
+ return work_done;
+}
+
+void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
+{
+ struct hfi1_devdata *dd = packet->rcd->dd;
+ struct hfi1_vnic_vport_info *vinfo = NULL;
+ struct hfi1_vnic_rx_queue *rxq;
+ struct sk_buff *skb;
+ int l4_type, vesw_id = -1;
+ u8 q_idx;
+
+ l4_type = HFI1_GET_L4_TYPE(packet->ebuf);
+ if (likely(l4_type == OPA_VNIC_L4_ETHR)) {
+ vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
+ vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id);
+
+ /*
+ * In case of invalid vesw id, count the error on
+ * the first available vport.
+ */
+ if (unlikely(!vinfo)) {
+ struct hfi1_vnic_vport_info *vinfo_tmp;
+ int id_tmp = 0;
+
+ vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp);
+ if (vinfo_tmp) {
+ spin_lock(&vport_cntr_lock);
+ vinfo_tmp->stats[0].netstats.rx_nohandler++;
+ spin_unlock(&vport_cntr_lock);
+ }
+ }
+ }
+
+ if (unlikely(!vinfo)) {
+ dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
+ l4_type, vesw_id, packet->rcd->ctxt);
+ return;
+ }
+
+ q_idx = packet->rcd->vnic_q_idx;
+ rxq = &vinfo->rxq[q_idx];
+ if (unlikely(!netif_oper_up(vinfo->netdev))) {
+ vinfo->stats[q_idx].rx_drop_state++;
+ skb_queue_purge(&rxq->skbq);
+ return;
+ }
+
+ if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
+ vinfo->stats[q_idx].netstats.rx_fifo_errors++;
+ return;
+ }
+
+ skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
+ if (unlikely(!skb)) {
+ vinfo->stats[q_idx].netstats.rx_fifo_errors++;
+ return;
+ }
+
+ memcpy(skb->data, packet->ebuf, packet->tlen);
+ skb_put(skb, packet->tlen);
+ skb_queue_tail(&rxq->skbq, skb);
+
+ if (napi_schedule_prep(&rxq->napi)) {
+ v_dbg("napi %d scheduling\n", q_idx);
+ __napi_schedule(&rxq->napi);
+ }
+}
+
+static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ struct net_device *netdev = vinfo->netdev;
+ int i, rc;
+
+ /* ensure virtual eth switch id is valid */
+ if (!vinfo->vesw_id)
+ return -EINVAL;
+
+ rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id,
+ vinfo->vesw_id + 1, GFP_NOWAIT);
+ if (rc < 0)
+ return rc;
+
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
+
+ skb_queue_head_init(&rxq->skbq);
+ napi_enable(&rxq->napi);
+ }
+
+ netif_carrier_on(netdev);
+ netif_tx_start_all_queues(netdev);
+ set_bit(HFI1_VNIC_UP, &vinfo->flags);
+
+ return 0;
+}
+
+static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ u8 i;
+
+ clear_bit(HFI1_VNIC_UP, &vinfo->flags);
+ netif_carrier_off(vinfo->netdev);
+ netif_tx_disable(vinfo->netdev);
+ idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
+
+ /* ensure irqs see the change */
+ hfi1_vnic_synchronize_irq(dd);
+
+ /* remove unread skbs */
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
+
+ napi_disable(&rxq->napi);
+ skb_queue_purge(&rxq->skbq);
+ }
+}
+
+static int hfi1_netdev_open(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ int rc;
+
+ mutex_lock(&vinfo->lock);
+ rc = hfi1_vnic_up(vinfo);
+ mutex_unlock(&vinfo->lock);
+ return rc;
+}
+
+static int hfi1_netdev_close(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ mutex_lock(&vinfo->lock);
+ if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
+ hfi1_vnic_down(vinfo);
+ mutex_unlock(&vinfo->lock);
+ return 0;
+}
+
+static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata **vnic_ctxt)
+{
+ int rc;
+
+ rc = allocate_vnic_ctxt(dd, vnic_ctxt);
+ if (rc) {
+ dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
+ return rc;
+ }
+
+ rc = setup_vnic_ctxt(dd, *vnic_ctxt);
+ if (rc) {
+ dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
+ deallocate_vnic_ctxt(dd, *vnic_ctxt);
+ *vnic_ctxt = NULL;
+ }
+
+ return rc;
+}
+
+static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ int i, rc = 0;
+
+ mutex_lock(&hfi1_mutex);
+ if (!dd->vnic.num_vports) {
+ rc = hfi1_vnic_txreq_init(dd);
+ if (rc)
+ goto txreq_fail;
+
+ dd->vnic.msix_idx = dd->first_dyn_msix_idx;
+ }
+
+ for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
+ rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
+ if (rc)
+ break;
+ dd->vnic.ctxt[i]->vnic_q_idx = i;
+ }
+
+ if (i < vinfo->num_rx_q) {
+ /*
+ * If required amount of contexts is not
+ * allocated successfully then remaining contexts
+ * are released.
+ */
+ while (i-- > dd->vnic.num_ctxt) {
+ deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+ dd->vnic.ctxt[i] = NULL;
+ }
+ goto alloc_fail;
+ }
+
+ if (dd->vnic.num_ctxt != i) {
+ dd->vnic.num_ctxt = i;
+ hfi1_init_vnic_rsm(dd);
+ }
+
+ dd->vnic.num_vports++;
+ hfi1_vnic_sdma_init(vinfo);
+alloc_fail:
+ if (!dd->vnic.num_vports)
+ hfi1_vnic_txreq_deinit(dd);
+txreq_fail:
+ mutex_unlock(&hfi1_mutex);
+ return rc;
+}
+
+static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ int i;
+
+ mutex_lock(&hfi1_mutex);
+ if (--dd->vnic.num_vports == 0) {
+ for (i = 0; i < dd->vnic.num_ctxt; i++) {
+ deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+ dd->vnic.ctxt[i] = NULL;
+ }
+ hfi1_deinit_vnic_rsm(dd);
+ dd->vnic.num_ctxt = 0;
+ hfi1_vnic_txreq_deinit(dd);
+ }
+ mutex_unlock(&hfi1_mutex);
+}
+
+static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ bool reopen = false;
+
+ /*
+ * If vesw_id is being changed, and if the vnic port is up,
+ * reset the vnic port to ensure new vesw_id gets picked up
+ */
+ if (id != vinfo->vesw_id) {
+ mutex_lock(&vinfo->lock);
+ if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
+ hfi1_vnic_down(vinfo);
+ reopen = true;
+ }
+
+ vinfo->vesw_id = id;
+ if (reopen)
+ hfi1_vnic_up(vinfo);
+
+ mutex_unlock(&vinfo->lock);
+ }
+}
+
+/* netdev ops */
+static const struct net_device_ops hfi1_netdev_ops = {
+ .ndo_open = hfi1_netdev_open,
+ .ndo_stop = hfi1_netdev_close,
+ .ndo_start_xmit = hfi1_netdev_start_xmit,
+ .ndo_select_queue = hfi1_vnic_select_queue,
+ .ndo_get_stats64 = hfi1_vnic_get_stats64,
+};
+
+struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
+ u8 port_num,
+ enum rdma_netdev_t type,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *))
+{
+ struct hfi1_devdata *dd = dd_from_ibdev(device);
+ struct hfi1_vnic_vport_info *vinfo;
+ struct net_device *netdev;
+ struct rdma_netdev *rn;
+ int i, size, rc;
+
+ if (!port_num || (port_num > dd->num_pports))
+ return ERR_PTR(-EINVAL);
+
+ if (type != RDMA_NETDEV_OPA_VNIC)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
+ netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
+ dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT);
+ if (!netdev)
+ return ERR_PTR(-ENOMEM);
+
+ rn = netdev_priv(netdev);
+ vinfo = opa_vnic_dev_priv(netdev);
+ vinfo->dd = dd;
+ vinfo->num_tx_q = dd->chip_sdma_engines;
+ vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT;
+ vinfo->netdev = netdev;
+ rn->set_id = hfi1_vnic_set_vesw_id;
+
+ netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
+ netdev->hw_features = netdev->features;
+ netdev->vlan_features = netdev->features;
+ netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
+ netdev->netdev_ops = &hfi1_netdev_ops;
+ mutex_init(&vinfo->lock);
+
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
+
+ rxq->idx = i;
+ rxq->vinfo = vinfo;
+ rxq->netdev = netdev;
+ netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
+ }
+
+ rc = hfi1_vnic_init(vinfo);
+ if (rc)
+ goto init_fail;
+
+ return netdev;
+init_fail:
+ mutex_destroy(&vinfo->lock);
+ free_netdev(netdev);
+ return ERR_PTR(rc);
+}
+
+void hfi1_vnic_free_rn(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ hfi1_vnic_deinit(vinfo);
+ mutex_destroy(&vinfo->lock);
+ free_netdev(netdev);
+}
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
new file mode 100644
index 0000000..51a817d
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for VNIC SDMA functionality
+ */
+
+#include "sdma.h"
+#include "vnic.h"
+
+#define HFI1_VNIC_SDMA_Q_ACTIVE BIT(0)
+#define HFI1_VNIC_SDMA_Q_DEFERRED BIT(1)
+
+#define HFI1_VNIC_TXREQ_NAME_LEN 32
+#define HFI1_VNIC_SDMA_DESC_WTRMRK 64
+#define HFI1_VNIC_SDMA_RETRY_COUNT 1
+
+/*
+ * struct vnic_txreq - VNIC transmit descriptor
+ * @txreq: sdma transmit request
+ * @sdma: vnic sdma pointer
+ * @skb: skb to send
+ * @pad: pad buffer
+ * @plen: pad length
+ * @pbc_val: pbc value
+ * @retry_count: tx retry count
+ */
+struct vnic_txreq {
+ struct sdma_txreq txreq;
+ struct hfi1_vnic_sdma *sdma;
+
+ struct sk_buff *skb;
+ unsigned char pad[HFI1_VNIC_MAX_PAD];
+ u16 plen;
+ __le64 pbc_val;
+
+ u32 retry_count;
+};
+
+static void vnic_sdma_complete(struct sdma_txreq *txreq,
+ int status)
+{
+ struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
+ struct hfi1_vnic_sdma *vnic_sdma = tx->sdma;
+
+ sdma_txclean(vnic_sdma->dd, txreq);
+ dev_kfree_skb_any(tx->skb);
+ kmem_cache_free(vnic_sdma->dd->vnic.txreq_cache, tx);
+}
+
+static noinline int build_vnic_ulp_payload(struct sdma_engine *sde,
+ struct vnic_txreq *tx)
+{
+ int i, ret = 0;
+
+ ret = sdma_txadd_kvaddr(
+ sde->dd,
+ &tx->txreq,
+ tx->skb->data,
+ skb_headlen(tx->skb));
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ for (i = 0; i < skb_shinfo(tx->skb)->nr_frags; i++) {
+ struct skb_frag_struct *frag = &skb_shinfo(tx->skb)->frags[i];
+
+ /* combine physically continuous fragments later? */
+ ret = sdma_txadd_page(sde->dd,
+ &tx->txreq,
+ skb_frag_page(frag),
+ frag->page_offset,
+ skb_frag_size(frag));
+ if (unlikely(ret))
+ goto bail_txadd;
+ }
+
+ if (tx->plen)
+ ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq,
+ tx->pad + HFI1_VNIC_MAX_PAD - tx->plen,
+ tx->plen);
+
+bail_txadd:
+ return ret;
+}
+
+static int build_vnic_tx_desc(struct sdma_engine *sde,
+ struct vnic_txreq *tx,
+ u64 pbc)
+{
+ int ret = 0;
+ u16 hdrbytes = 2 << 2; /* PBC */
+
+ ret = sdma_txinit_ahg(
+ &tx->txreq,
+ 0,
+ hdrbytes + tx->skb->len + tx->plen,
+ 0,
+ 0,
+ NULL,
+ 0,
+ vnic_sdma_complete);
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ /* add pbc */
+ tx->pbc_val = cpu_to_le64(pbc);
+ ret = sdma_txadd_kvaddr(
+ sde->dd,
+ &tx->txreq,
+ &tx->pbc_val,
+ hdrbytes);
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ /* add the ulp payload */
+ ret = build_vnic_ulp_payload(sde, tx);
+bail_txadd:
+ return ret;
+}
+
+/* setup the last plen bypes of pad */
+static inline void hfi1_vnic_update_pad(unsigned char *pad, u8 plen)
+{
+ pad[HFI1_VNIC_MAX_PAD - 1] = plen - OPA_VNIC_ICRC_TAIL_LEN;
+}
+
+int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen)
+{
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx];
+ struct sdma_engine *sde = vnic_sdma->sde;
+ struct vnic_txreq *tx;
+ int ret = -ECOMM;
+
+ if (unlikely(READ_ONCE(vnic_sdma->state) != HFI1_VNIC_SDMA_Q_ACTIVE))
+ goto tx_err;
+
+ if (unlikely(!sde || !sdma_running(sde)))
+ goto tx_err;
+
+ tx = kmem_cache_alloc(dd->vnic.txreq_cache, GFP_ATOMIC);
+ if (unlikely(!tx)) {
+ ret = -ENOMEM;
+ goto tx_err;
+ }
+
+ tx->sdma = vnic_sdma;
+ tx->skb = skb;
+ hfi1_vnic_update_pad(tx->pad, plen);
+ tx->plen = plen;
+ ret = build_vnic_tx_desc(sde, tx, pbc);
+ if (unlikely(ret))
+ goto free_desc;
+ tx->retry_count = 0;
+
+ ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq);
+ /* When -ECOMM, sdma callback will be called with ABORT status */
+ if (unlikely(ret && unlikely(ret != -ECOMM)))
+ goto free_desc;
+
+ return ret;
+
+free_desc:
+ sdma_txclean(dd, &tx->txreq);
+ kmem_cache_free(dd->vnic.txreq_cache, tx);
+tx_err:
+ if (ret != -EBUSY)
+ dev_kfree_skb_any(skb);
+ return ret;
+}
+
+/*
+ * hfi1_vnic_sdma_sleep - vnic sdma sleep function
+ *
+ * This function gets called from sdma_send_txreq() when there are not enough
+ * sdma descriptors available to send the packet. It adds Tx queue's wait
+ * structure to sdma engine's dmawait list to be woken up when descriptors
+ * become available.
+ */
+static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
+ struct iowait *wait,
+ struct sdma_txreq *txreq,
+ unsigned int seq)
+{
+ struct hfi1_vnic_sdma *vnic_sdma =
+ container_of(wait, struct hfi1_vnic_sdma, wait);
+ struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
+ struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
+
+ if (sdma_progress(sde, seq, txreq))
+ if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT)
+ return -EAGAIN;
+
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
+ write_seqlock(&dev->iowait_lock);
+ if (list_empty(&vnic_sdma->wait.list))
+ list_add_tail(&vnic_sdma->wait.list, &sde->dmawait);
+ write_sequnlock(&dev->iowait_lock);
+ return -EBUSY;
+}
+
+/*
+ * hfi1_vnic_sdma_wakeup - vnic sdma wakeup function
+ *
+ * This function gets called when SDMA descriptors becomes available and Tx
+ * queue's wait structure was previously added to sdma engine's dmawait list.
+ * It notifies the upper driver about Tx queue wakeup.
+ */
+static void hfi1_vnic_sdma_wakeup(struct iowait *wait, int reason)
+{
+ struct hfi1_vnic_sdma *vnic_sdma =
+ container_of(wait, struct hfi1_vnic_sdma, wait);
+ struct hfi1_vnic_vport_info *vinfo = vnic_sdma->vinfo;
+
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE;
+ if (__netif_subqueue_stopped(vinfo->netdev, vnic_sdma->q_idx))
+ netif_wake_subqueue(vinfo->netdev, vnic_sdma->q_idx);
+};
+
+inline bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx)
+{
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx];
+
+ return (READ_ONCE(vnic_sdma->state) == HFI1_VNIC_SDMA_Q_ACTIVE);
+}
+
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
+{
+ int i;
+
+ for (i = 0; i < vinfo->num_tx_q; i++) {
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i];
+
+ iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep,
+ hfi1_vnic_sdma_wakeup, NULL);
+ vnic_sdma->sde = &vinfo->dd->per_sdma[i];
+ vnic_sdma->dd = vinfo->dd;
+ vnic_sdma->vinfo = vinfo;
+ vnic_sdma->q_idx = i;
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE;
+
+ /* Add a free descriptor watermark for wakeups */
+ if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) {
+ INIT_LIST_HEAD(&vnic_sdma->stx.list);
+ vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK;
+ list_add_tail(&vnic_sdma->stx.list,
+ &vnic_sdma->wait.tx_head);
+ }
+ }
+}
+
+static void hfi1_vnic_txreq_kmem_cache_ctor(void *obj)
+{
+ struct vnic_txreq *tx = (struct vnic_txreq *)obj;
+
+ memset(tx, 0, sizeof(*tx));
+}
+
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd)
+{
+ char buf[HFI1_VNIC_TXREQ_NAME_LEN];
+
+ snprintf(buf, sizeof(buf), "hfi1_%u_vnic_txreq_cache", dd->unit);
+ dd->vnic.txreq_cache = kmem_cache_create(buf,
+ sizeof(struct vnic_txreq),
+ 0, SLAB_HWCACHE_ALIGN,
+ hfi1_vnic_txreq_kmem_cache_ctor);
+ if (!dd->vnic.txreq_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd)
+{
+ kmem_cache_destroy(dd->vnic.txreq_cache);
+ dd->vnic.txreq_cache = NULL;
+}
OpenPOWER on IntegriCloud