diff options
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
38 files changed, 3006 insertions, 570 deletions
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 0cf97a0..88085f6 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ - verbs_txreq.o + verbs_txreq.o vnic_main.o vnic_sdma.o hfi1-$(CONFIG_DEBUG_FS) += debugfs.o CFLAGS_trace.o = -I$(src) diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h index 0d58fe3..794e681 100644 --- a/drivers/infiniband/hw/hfi1/aspm.h +++ b/drivers/infiniband/hw/hfi1/aspm.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -229,14 +229,17 @@ static inline void aspm_ctx_timer_function(unsigned long data) spin_unlock_irqrestore(&rcd->aspm_lock, flags); } -/* Disable interrupt processing for verbs contexts when PSM contexts are open */ +/* + * Disable interrupt processing for verbs contexts when PSM or VNIC contexts + * are open. + */ static inline void aspm_disable_all(struct hfi1_devdata *dd) { struct hfi1_ctxtdata *rcd; unsigned long flags; unsigned i; - for (i = 0; i < dd->first_user_ctxt; i++) { + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { rcd = dd->rcd[i]; del_timer_sync(&rcd->aspm_timer); spin_lock_irqsave(&rcd->aspm_lock, flags); @@ -260,7 +263,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd) if (aspm_mode != ASPM_MODE_DYNAMIC) return; - for (i = 0; i < dd->first_user_ctxt; i++) { + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { rcd = dd->rcd[i]; spin_lock_irqsave(&rcd->aspm_lock, flags); rcd->aspm_intr_enable = true; @@ -276,7 +279,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd) (unsigned long)rcd); rcd->aspm_intr_supported = rcd->dd->aspm_supported && aspm_mode == ASPM_MODE_DYNAMIC && - rcd->ctxt < rcd->dd->first_user_ctxt; + rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt; } static inline void aspm_init(struct hfi1_devdata *dd) @@ -286,7 +289,7 @@ static inline void aspm_init(struct hfi1_devdata *dd) spin_lock_init(&dd->aspm_lock); dd->aspm_supported = aspm_hw_l1_supported(dd); - for (i = 0; i < dd->first_user_ctxt; i++) + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) aspm_ctx_init(dd->rcd[i]); /* Start with ASPM disabled */ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 121a4c9..0f6916d 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -64,6 +64,7 @@ #include "platform.h" #include "aspm.h" #include "affinity.h" +#include "debugfs.h" #define NUM_IB_PORTS 1 @@ -125,9 +126,16 @@ struct flag_table { #define DEFAULT_KRCVQS 2 #define MIN_KERNEL_KCTXTS 2 #define FIRST_KERNEL_KCTXT 1 -/* sizes for both the QP and RSM map tables */ -#define NUM_MAP_ENTRIES 256 -#define NUM_MAP_REGS 32 + +/* + * RSM instance allocation + * 0 - Verbs + * 1 - User Fecn Handling + * 2 - Vnic + */ +#define RSM_INS_VERBS 0 +#define RSM_INS_FECN 1 +#define RSM_INS_VNIC 2 /* Bit offset into the GUID which carries HFI id information */ #define GUID_HFI_INDEX_SHIFT 39 @@ -138,8 +146,7 @@ struct flag_table { #define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3) #define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4) -/* RSM fields */ - +/* RSM fields for Verbs */ /* packet type */ #define IB_PACKET_TYPE 2ull #define QW_SHIFT 6ull @@ -169,6 +176,28 @@ struct flag_table { /* QPN[m+n:1] QW 1, OFFSET 1 */ #define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull)) +/* RSM fields for Vnic */ +/* L2_TYPE: QW 0, OFFSET 61 - for match */ +#define L2_TYPE_QW 0ull +#define L2_TYPE_BIT_OFFSET 61ull +#define L2_TYPE_OFFSET(off) ((L2_TYPE_QW << QW_SHIFT) | (off)) +#define L2_TYPE_MATCH_OFFSET L2_TYPE_OFFSET(L2_TYPE_BIT_OFFSET) +#define L2_TYPE_MASK 3ull +#define L2_16B_VALUE 2ull + +/* L4_TYPE QW 1, OFFSET 0 - for match */ +#define L4_TYPE_QW 1ull +#define L4_TYPE_BIT_OFFSET 0ull +#define L4_TYPE_OFFSET(off) ((L4_TYPE_QW << QW_SHIFT) | (off)) +#define L4_TYPE_MATCH_OFFSET L4_TYPE_OFFSET(L4_TYPE_BIT_OFFSET) +#define L4_16B_TYPE_MASK 0xFFull +#define L4_16B_ETH_VALUE 0x78ull + +/* 16B VESWID - for select */ +#define L4_16B_HDR_VESWID_OFFSET ((2 << QW_SHIFT) | (16ull)) +/* 16B ENTROPY - for select */ +#define L2_16B_ENTROPY_OFFSET ((1 << QW_SHIFT) | (32ull)) + /* defines to build power on SC2VL table */ #define SC2VL_VAL( \ num, \ @@ -1045,6 +1074,8 @@ static void dc_start(struct hfi1_devdata *); static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, unsigned int *np); static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd); +static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms); +static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index); /* * Error interrupt table entry. This is used as input to the interrupt @@ -6379,18 +6410,17 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort) * * The expectation is that the caller of this routine would have taken * care of properly transitioning the link into the correct state. + * NOTE: the caller needs to acquire the dd->dc8051_lock lock + * before calling this function. */ -static void dc_shutdown(struct hfi1_devdata *dd) +static void _dc_shutdown(struct hfi1_devdata *dd) { - unsigned long flags; + lockdep_assert_held(&dd->dc8051_lock); - spin_lock_irqsave(&dd->dc8051_lock, flags); - if (dd->dc_shutdown) { - spin_unlock_irqrestore(&dd->dc8051_lock, flags); + if (dd->dc_shutdown) return; - } + dd->dc_shutdown = 1; - spin_unlock_irqrestore(&dd->dc8051_lock, flags); /* Shutdown the LCB */ lcb_shutdown(dd, 1); /* @@ -6401,35 +6431,45 @@ static void dc_shutdown(struct hfi1_devdata *dd) write_csr(dd, DC_DC8051_CFG_RST, 0x1); } +static void dc_shutdown(struct hfi1_devdata *dd) +{ + mutex_lock(&dd->dc8051_lock); + _dc_shutdown(dd); + mutex_unlock(&dd->dc8051_lock); +} + /* * Calling this after the DC has been brought out of reset should not * do any damage. + * NOTE: the caller needs to acquire the dd->dc8051_lock lock + * before calling this function. */ -static void dc_start(struct hfi1_devdata *dd) +static void _dc_start(struct hfi1_devdata *dd) { - unsigned long flags; - int ret; + lockdep_assert_held(&dd->dc8051_lock); - spin_lock_irqsave(&dd->dc8051_lock, flags); if (!dd->dc_shutdown) - goto done; - spin_unlock_irqrestore(&dd->dc8051_lock, flags); + return; + /* Take the 8051 out of reset */ write_csr(dd, DC_DC8051_CFG_RST, 0ull); /* Wait until 8051 is ready */ - ret = wait_fm_ready(dd, TIMEOUT_8051_START); - if (ret) { + if (wait_fm_ready(dd, TIMEOUT_8051_START)) dd_dev_err(dd, "%s: timeout starting 8051 firmware\n", __func__); - } + /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */ write_csr(dd, DCC_CFG_RESET, 0x10); /* lcb_shutdown() with abort=1 does not restore these */ write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en); - spin_lock_irqsave(&dd->dc8051_lock, flags); dd->dc_shutdown = 0; -done: - spin_unlock_irqrestore(&dd->dc8051_lock, flags); +} + +static void dc_start(struct hfi1_devdata *dd) +{ + mutex_lock(&dd->dc8051_lock); + _dc_start(dd); + mutex_unlock(&dd->dc8051_lock); } /* @@ -6701,7 +6741,13 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) int i; /* enable all kernel contexts */ - for (i = 0; i < dd->n_krcv_queues; i++) { + for (i = 0; i < dd->num_rcv_contexts; i++) { + struct hfi1_ctxtdata *rcd = dd->rcd[i]; + + /* Ensure all non-user contexts(including vnic) are enabled */ + if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER)) + continue; + rcvmask = HFI1_RCVCTRL_CTXT_ENB; /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */ rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ? @@ -7077,7 +7123,7 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; - /* Sanity check - ppd->pkeys[2] should be 0, or already initalized */ + /* Sanity check - ppd->pkeys[2] should be 0, or already initialized */ if (!((ppd->pkeys[2] == 0) || (ppd->pkeys[2] == FULL_MGMT_P_KEY))) dd_dev_warn(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n", __func__, ppd->pkeys[2], FULL_MGMT_P_KEY); @@ -7165,7 +7211,7 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width, * set the max_rate field in handle_verify_cap until v0.19. */ if ((dd->icode == ICODE_RTL_SILICON) && - (dd->dc8051_ver < dc8051_ver(0, 19))) { + (dd->dc8051_ver < dc8051_ver(0, 19, 0))) { /* max_rate: 0 = 12.5G, 1 = 25G */ switch (max_rate) { case 0: @@ -7277,15 +7323,6 @@ void handle_verify_cap(struct work_struct *work) lcb_shutdown(dd, 0); adjust_lcb_for_fpga_serdes(dd); - /* - * These are now valid: - * remote VerifyCap fields in the general LNI config - * CSR DC8051_STS_REMOTE_GUID - * CSR DC8051_STS_REMOTE_NODE_TYPE - * CSR DC8051_STS_REMOTE_FM_SECURITY - * CSR DC8051_STS_REMOTE_PORT_NO - */ - read_vc_remote_phy(dd, &power_management, &continious); read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf, &partner_supported_crc); @@ -7350,7 +7387,7 @@ void handle_verify_cap(struct work_struct *work) } ppd->link_speed_active = 0; /* invalid value */ - if (dd->dc8051_ver < dc8051_ver(0, 20)) { + if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) { /* remote_tx_rate: 0 = 12.5G, 1 = 25G */ switch (remote_tx_rate) { case 0: @@ -7416,20 +7453,6 @@ void handle_verify_cap(struct work_struct *work) write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */ set_8051_lcb_access(dd); - ppd->neighbor_guid = - read_csr(dd, DC_DC8051_STS_REMOTE_GUID); - ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) & - DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK; - ppd->neighbor_type = - read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) & - DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK; - ppd->neighbor_fm_security = - read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) & - DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK; - dd_dev_info(dd, - "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n", - ppd->neighbor_guid, ppd->neighbor_type, - ppd->mgmt_allowed, ppd->neighbor_fm_security); if (ppd->mgmt_allowed) add_full_mgmt_pkey(ppd); @@ -7897,6 +7920,9 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK; } + if (unlikely(hfi1_dbg_fault_suppress_err(&dd->verbs_dev))) + reg &= ~DCC_ERR_FLG_LATE_EBP_ERR_SMASK; + /* report any remaining errors */ if (reg) dd_dev_info_ratelimited(dd, "DCC Error: %s\n", @@ -7995,7 +8021,9 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) if (likely(source < dd->num_rcv_contexts)) { rcd = dd->rcd[source]; if (rcd) { - if (source < dd->first_user_ctxt) + /* Check for non-user contexts, including vnic */ + if ((source < dd->first_dyn_alloc_ctxt) || + (rcd->sc && (rcd->sc->type == SC_KERNEL))) rcd->do_interrupt(rcd, 0); else handle_user_interrupt(rcd); @@ -8023,7 +8051,8 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) rcd = dd->rcd[source]; if (rcd) { /* only pay attention to user urgent interrupts */ - if (source >= dd->first_user_ctxt) + if ((source >= dd->first_dyn_alloc_ctxt) && + (!rcd->sc || (rcd->sc->type == SC_USER))) handle_user_interrupt(rcd); return; /* OK */ } @@ -8156,10 +8185,10 @@ static irqreturn_t sdma_interrupt(int irq, void *data) /* handle the interrupt(s) */ sdma_engine_interrupt(sde, status); - } else + } else { dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n", sde->this_idx); - + } return IRQ_HANDLED; } @@ -8344,6 +8373,52 @@ static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data) } /* + * Provide a cache for some of the LCB registers in case the LCB is + * unavailable. + * (The LCB is unavailable in certain link states, for example.) + */ +struct lcb_datum { + u32 off; + u64 val; +}; + +static struct lcb_datum lcb_cache[] = { + { DC_LCB_ERR_INFO_RX_REPLAY_CNT, 0}, + { DC_LCB_ERR_INFO_SEQ_CRC_CNT, 0 }, + { DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT, 0 }, +}; + +static void update_lcb_cache(struct hfi1_devdata *dd) +{ + int i; + int ret; + u64 val; + + for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) { + ret = read_lcb_csr(dd, lcb_cache[i].off, &val); + + /* Update if we get good data */ + if (likely(ret != -EBUSY)) + lcb_cache[i].val = val; + } +} + +static int read_lcb_cache(u32 off, u64 *val) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) { + if (lcb_cache[i].off == off) { + *val = lcb_cache[i].val; + return 0; + } + } + + pr_warn("%s bad offset 0x%x\n", __func__, off); + return -1; +} + +/* * Read an LCB CSR. Access may not be in host control, so check. * Return 0 on success, -EBUSY on failure. */ @@ -8354,9 +8429,13 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data) /* if up, go through the 8051 for the value */ if (ppd->host_link_state & HLS_UP) return read_lcb_via_8051(dd, addr, data); - /* if going up or down, no access */ - if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE)) - return -EBUSY; + /* if going up or down, check the cache, otherwise, no access */ + if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE)) { + if (read_lcb_cache(addr, data)) + return -EBUSY; + return 0; + } + /* otherwise, host has access */ *data = read_csr(dd, addr); return 0; @@ -8371,7 +8450,7 @@ static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data) int ret; if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR || - (dd->dc8051_ver < dc8051_ver(0, 20))) { + (dd->dc8051_ver < dc8051_ver(0, 20, 0))) { if (acquire_lcb_access(dd, 0) == 0) { write_csr(dd, addr, data); release_lcb_access(dd, 0); @@ -8420,16 +8499,11 @@ static int do_8051_command( { u64 reg, completed; int return_code; - unsigned long flags; unsigned long timeout; hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data); - /* - * Alternative to holding the lock for a long time: - * - keep busy wait - have other users bounce off - */ - spin_lock_irqsave(&dd->dc8051_lock, flags); + mutex_lock(&dd->dc8051_lock); /* We can't send any commands to the 8051 if it's in reset */ if (dd->dc_shutdown) { @@ -8455,10 +8529,8 @@ static int do_8051_command( return_code = -ENXIO; goto fail; } - spin_unlock_irqrestore(&dd->dc8051_lock, flags); - dc_shutdown(dd); - dc_start(dd); - spin_lock_irqsave(&dd->dc8051_lock, flags); + _dc_shutdown(dd); + _dc_start(dd); } /* @@ -8539,8 +8611,7 @@ static int do_8051_command( write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0); fail: - spin_unlock_irqrestore(&dd->dc8051_lock, flags); - + mutex_unlock(&dd->dc8051_lock); return return_code; } @@ -8677,13 +8748,20 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id, & REMOTE_DEVICE_REV_MASK; } -void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b) +void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, + u8 *ver_patch) { u32 frame; read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame); - *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK; - *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK; + *ver_major = (frame >> STS_FM_VERSION_MAJOR_SHIFT) & + STS_FM_VERSION_MAJOR_MASK; + *ver_minor = (frame >> STS_FM_VERSION_MINOR_SHIFT) & + STS_FM_VERSION_MINOR_MASK; + + read_8051_config(dd, VERSION_PATCH, GENERAL_CONFIG, &frame); + *ver_patch = (frame >> STS_FM_VERSION_PATCH_SHIFT) & + STS_FM_VERSION_PATCH_MASK; } static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management, @@ -8891,8 +8969,6 @@ int send_idle_sma(struct hfi1_devdata *dd, u64 message) */ static int do_quick_linkup(struct hfi1_devdata *dd) { - u64 reg; - unsigned long timeout; int ret; lcb_shutdown(dd, 0); @@ -8915,19 +8991,9 @@ static int do_quick_linkup(struct hfi1_devdata *dd) write_csr(dd, DC_LCB_CFG_RUN, 1ull << DC_LCB_CFG_RUN_EN_SHIFT); - /* watch LCB_STS_LINK_TRANSFER_ACTIVE */ - timeout = jiffies + msecs_to_jiffies(10); - while (1) { - reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE); - if (reg) - break; - if (time_after(jiffies, timeout)) { - dd_dev_err(dd, - "timeout waiting for LINK_TRANSFER_ACTIVE\n"); - return -ETIMEDOUT; - } - udelay(2); - } + ret = wait_link_transfer_active(dd, 10); + if (ret) + return ret; write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT); @@ -9091,7 +9157,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) if (ret) goto set_local_link_attributes_fail; - if (dd->dc8051_ver < dc8051_ver(0, 20)) { + if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) { /* set the tx rate to the fastest enabled */ if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G) ppd->local_tx_rate = 1; @@ -9274,7 +9340,7 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) || (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING)) - dd_dev_info(dd, "%s: QSFP cable on fire\n", + dd_dev_info(dd, "%s: QSFP cable temperature too high\n", __func__); if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) || @@ -9494,8 +9560,11 @@ static int test_qsfp_read(struct hfi1_pportdata *ppd) int ret; u8 status; - /* report success if not a QSFP */ - if (ppd->port_type != PORT_TYPE_QSFP) + /* + * Report success if not a QSFP or, if it is a QSFP, but the cable is + * not present + */ + if (ppd->port_type != PORT_TYPE_QSFP || !qsfp_mod_present(ppd)) return 0; /* read byte 2, the status byte */ @@ -10082,6 +10151,64 @@ static void check_lni_states(struct hfi1_pportdata *ppd) decode_state_complete(ppd, last_remote_state, "received"); } +/* wait for wait_ms for LINK_TRANSFER_ACTIVE to go to 1 */ +static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms) +{ + u64 reg; + unsigned long timeout; + + /* watch LCB_STS_LINK_TRANSFER_ACTIVE */ + timeout = jiffies + msecs_to_jiffies(wait_ms); + while (1) { + reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE); + if (reg) + break; + if (time_after(jiffies, timeout)) { + dd_dev_err(dd, + "timeout waiting for LINK_TRANSFER_ACTIVE\n"); + return -ETIMEDOUT; + } + udelay(2); + } + return 0; +} + +/* called when the logical link state is not down as it should be */ +static void force_logical_link_state_down(struct hfi1_pportdata *ppd) +{ + struct hfi1_devdata *dd = ppd->dd; + + /* + * Bring link up in LCB loopback + */ + write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1); + write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, + DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK); + + write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0); + write_csr(dd, DC_LCB_CFG_REINIT_AS_SLAVE, 0); + write_csr(dd, DC_LCB_CFG_CNT_FOR_SKIP_STALL, 0x110); + write_csr(dd, DC_LCB_CFG_LOOPBACK, 0x2); + + write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0); + (void)read_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET); + udelay(3); + write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 1); + write_csr(dd, DC_LCB_CFG_RUN, 1ull << DC_LCB_CFG_RUN_EN_SHIFT); + + wait_link_transfer_active(dd, 100); + + /* + * Bring the link down again. + */ + write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1); + write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0); + write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0); + + /* call again to adjust ppd->statusp, if needed */ + get_logical_state(ppd); +} + /* * Helper for set_link_state(). Do not call except from that routine. * Expects ppd->hls_mutex to be held. @@ -10098,6 +10225,8 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) int do_transition; int do_wait; + update_lcb_cache(dd); + previous_state = ppd->host_link_state; ppd->host_link_state = HLS_GOING_OFFLINE; pstate = read_physical_state(dd); @@ -10135,15 +10264,18 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) return ret; } - /* make sure the logical state is also down */ - wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000); - /* * Now in charge of LCB - must be after the physical state is * offline.quiet and before host_link_state is changed. */ set_host_lcb_access(dd); write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */ + + /* make sure the logical state is also down */ + ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000); + if (ret) + force_logical_link_state_down(ppd); + ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */ if (ppd->port_type == PORT_TYPE_QSFP && @@ -10380,11 +10512,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) goto unexpected; } - ppd->host_link_state = HLS_UP_INIT; ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000); if (ret) { - /* logical state didn't change, stay at going_up */ - ppd->host_link_state = HLS_GOING_UP; dd_dev_err(dd, "%s: logical state did not change to INIT\n", __func__); @@ -10398,6 +10527,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); handle_linkup_change(dd, 1); + ppd->host_link_state = HLS_UP_INIT; } break; case HLS_UP_ARMED: @@ -11853,6 +11983,10 @@ static void free_cntrs(struct hfi1_devdata *dd) dd->scntrs = NULL; kfree(dd->cntrnames); dd->cntrnames = NULL; + if (dd->update_cntr_wq) { + destroy_workqueue(dd->update_cntr_wq); + dd->update_cntr_wq = NULL; + } } static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry, @@ -12008,7 +12142,7 @@ u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data) return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data); } -static void update_synth_timer(unsigned long opaque) +static void do_update_synth_timer(struct work_struct *work) { u64 cur_tx; u64 cur_rx; @@ -12017,8 +12151,8 @@ static void update_synth_timer(unsigned long opaque) int i, j, vl; struct hfi1_pportdata *ppd; struct cntr_entry *entry; - - struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque; + struct hfi1_devdata *dd = container_of(work, struct hfi1_devdata, + update_cntr_work); /* * Rather than keep beating on the CSRs pick a minimal set that we can @@ -12101,7 +12235,13 @@ static void update_synth_timer(unsigned long opaque) } else { hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit); } +} +static void update_synth_timer(unsigned long opaque) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque; + + queue_work(dd->update_cntr_wq, &dd->update_cntr_work); mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); } @@ -12337,6 +12477,13 @@ static int init_cntrs(struct hfi1_devdata *dd) if (init_cpu_counters(dd)) goto bail; + dd->update_cntr_wq = alloc_ordered_workqueue("hfi1_update_cntr_%d", + WQ_MEM_RECLAIM, dd->unit); + if (!dd->update_cntr_wq) + goto bail; + + INIT_WORK(&dd->update_cntr_work, do_update_synth_timer); + mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); return 0; bail: @@ -12726,7 +12873,10 @@ static int request_msix_irqs(struct hfi1_devdata *dd) first_sdma = last_general; last_sdma = first_sdma + dd->num_sdma; first_rx = last_sdma; - last_rx = first_rx + dd->n_krcv_queues; + last_rx = first_rx + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT; + + /* VNIC MSIx interrupts get mapped when VNIC contexts are created */ + dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues; /* * Sanity check - the code expects all SDMA chip source @@ -12740,7 +12890,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) const char *err_info; irq_handler_t handler; irq_handler_t thread = NULL; - void *arg; + void *arg = NULL; int idx; struct hfi1_ctxtdata *rcd = NULL; struct sdma_engine *sde = NULL; @@ -12767,24 +12917,25 @@ static int request_msix_irqs(struct hfi1_devdata *dd) } else if (first_rx <= i && i < last_rx) { idx = i - first_rx; rcd = dd->rcd[idx]; - /* no interrupt if no rcd */ - if (!rcd) - continue; - /* - * Set the interrupt register and mask for this - * context's interrupt. - */ - rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; - rcd->imask = ((u64)1) << - ((IS_RCVAVAIL_START + idx) % 64); - handler = receive_context_interrupt; - thread = receive_context_thread; - arg = rcd; - snprintf(me->name, sizeof(me->name), - DRIVER_NAME "_%d kctxt%d", dd->unit, idx); - err_info = "receive context"; - remap_intr(dd, IS_RCVAVAIL_START + idx, i); - me->type = IRQ_RCVCTXT; + if (rcd) { + /* + * Set the interrupt register and mask for this + * context's interrupt. + */ + rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; + rcd->imask = ((u64)1) << + ((IS_RCVAVAIL_START + idx) % 64); + handler = receive_context_interrupt; + thread = receive_context_thread; + arg = rcd; + snprintf(me->name, sizeof(me->name), + DRIVER_NAME "_%d kctxt%d", + dd->unit, idx); + err_info = "receive context"; + remap_intr(dd, IS_RCVAVAIL_START + idx, i); + me->type = IRQ_RCVCTXT; + rcd->msix_intr = i; + } } else { /* not in our expected range - complain, then * ignore it @@ -12822,6 +12973,84 @@ static int request_msix_irqs(struct hfi1_devdata *dd) return ret; } +void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd) +{ + int i; + + if (!dd->num_msix_entries) { + synchronize_irq(dd->pcidev->irq); + return; + } + + for (i = 0; i < dd->vnic.num_ctxt; i++) { + struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; + struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; + + synchronize_irq(me->msix.vector); + } +} + +void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd) +{ + struct hfi1_devdata *dd = rcd->dd; + struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; + + if (!me->arg) /* => no irq, no affinity */ + return; + + hfi1_put_irq_affinity(dd, me); + free_irq(me->msix.vector, me->arg); + + me->arg = NULL; +} + +void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) +{ + struct hfi1_devdata *dd = rcd->dd; + struct hfi1_msix_entry *me; + int idx = rcd->ctxt; + void *arg = rcd; + int ret; + + rcd->msix_intr = dd->vnic.msix_idx++; + me = &dd->msix_entries[rcd->msix_intr]; + + /* + * Set the interrupt register and mask for this + * context's interrupt. + */ + rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; + rcd->imask = ((u64)1) << + ((IS_RCVAVAIL_START + idx) % 64); + + snprintf(me->name, sizeof(me->name), + DRIVER_NAME "_%d kctxt%d", dd->unit, idx); + me->name[sizeof(me->name) - 1] = 0; + me->type = IRQ_RCVCTXT; + + remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr); + + ret = request_threaded_irq(me->msix.vector, receive_context_interrupt, + receive_context_thread, 0, me->name, arg); + if (ret) { + dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n", + me->msix.vector, idx, ret); + return; + } + /* + * assign arg after request_irq call, so it will be + * cleaned up + */ + me->arg = arg; + + ret = hfi1_get_irq_affinity(dd, me); + if (ret) { + dd_dev_err(dd, + "unable to pin IRQ %d\n", ret); + free_irq(me->msix.vector, me->arg); + } +} + /* * Set the general handler to accept all interrupts, remap all * chip interrupts back to MSI-X 0. @@ -12853,7 +13082,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd) * N interrupts - one per used SDMA engine * M interrupt - one per kernel receive context */ - total = 1 + dd->num_sdma + dd->n_krcv_queues; + total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT; entries = kcalloc(total, sizeof(*entries), GFP_KERNEL); if (!entries) { @@ -12918,7 +13147,8 @@ fail: * * num_rcv_contexts - number of contexts being used * n_krcv_queues - number of kernel contexts - * first_user_ctxt - first non-kernel context in array of contexts + * first_dyn_alloc_ctxt - first dynamically allocated context + * in array of contexts * freectxts - number of free user contexts * num_send_contexts - number of PIO send contexts being used */ @@ -12995,10 +13225,14 @@ static int set_up_context_variables(struct hfi1_devdata *dd) total_contexts = num_kernel_contexts + num_user_contexts; } - /* the first N are kernel contexts, the rest are user contexts */ + /* Accommodate VNIC contexts */ + if ((total_contexts + HFI1_NUM_VNIC_CTXT) <= dd->chip_rcv_contexts) + total_contexts += HFI1_NUM_VNIC_CTXT; + + /* the first N are kernel contexts, the rest are user/vnic contexts */ dd->num_rcv_contexts = total_contexts; dd->n_krcv_queues = num_kernel_contexts; - dd->first_user_ctxt = num_kernel_contexts; + dd->first_dyn_alloc_ctxt = num_kernel_contexts; dd->num_user_contexts = num_user_contexts; dd->freectxts = num_user_contexts; dd_dev_info(dd, @@ -13454,11 +13688,8 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd) write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0); for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++) write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0); - for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) { - write_csr(dd, RCV_RSM_CFG + (8 * i), 0); - write_csr(dd, RCV_RSM_SELECT + (8 * i), 0); - write_csr(dd, RCV_RSM_MATCH + (8 * i), 0); - } + for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) + clear_rsm_rule(dd, i); for (i = 0; i < 32; i++) write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0); @@ -13817,6 +14048,16 @@ static void add_rsm_rule(struct hfi1_devdata *dd, u8 rule_index, (u64)rrd->value2 << RCV_RSM_MATCH_VALUE2_SHIFT); } +/* + * Clear a receive side mapping rule. + */ +static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index) +{ + write_csr(dd, RCV_RSM_CFG + (8 * rule_index), 0); + write_csr(dd, RCV_RSM_SELECT + (8 * rule_index), 0); + write_csr(dd, RCV_RSM_MATCH + (8 * rule_index), 0); +} + /* return the number of RSM map table entries that will be used for QOS */ static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, unsigned int *np) @@ -13932,7 +14173,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt) rrd.value2 = LRH_SC_VALUE; /* add rule 0 */ - add_rsm_rule(dd, 0, &rrd); + add_rsm_rule(dd, RSM_INS_VERBS, &rrd); /* mark RSM map entries as used */ rmt->used += rmt_entries; @@ -13962,7 +14203,7 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd, /* * RSM will extract the destination context as an index into the * map table. The destination contexts are a sequential block - * in the range first_user_ctxt...num_rcv_contexts-1 (inclusive). + * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive). * Map entries are accessed as offset + extracted value. Adjust * the added offset so this sequence can be placed anywhere in * the table - as long as the entries themselves do not wrap. @@ -13970,9 +14211,9 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd, * start with that to allow for a "negative" offset. */ offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used - - (int)dd->first_user_ctxt); + (int)dd->first_dyn_alloc_ctxt); - for (i = dd->first_user_ctxt, idx = rmt->used; + for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used; i < dd->num_rcv_contexts; i++, idx++) { /* replace with identity mapping */ regoff = (idx % 8) * 8; @@ -14006,11 +14247,84 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd, rrd.value2 = 1; /* add rule 1 */ - add_rsm_rule(dd, 1, &rrd); + add_rsm_rule(dd, RSM_INS_FECN, &rrd); rmt->used += dd->num_user_contexts; } +/* Initialize RSM for VNIC */ +void hfi1_init_vnic_rsm(struct hfi1_devdata *dd) +{ + u8 i, j; + u8 ctx_id = 0; + u64 reg; + u32 regoff; + struct rsm_rule_data rrd; + + if (hfi1_vnic_is_rsm_full(dd, NUM_VNIC_MAP_ENTRIES)) { + dd_dev_err(dd, "Vnic RSM disabled, rmt entries used = %d\n", + dd->vnic.rmt_start); + return; + } + + dev_dbg(&(dd)->pcidev->dev, "Vnic rsm start = %d, end %d\n", + dd->vnic.rmt_start, + dd->vnic.rmt_start + NUM_VNIC_MAP_ENTRIES); + + /* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */ + regoff = RCV_RSM_MAP_TABLE + (dd->vnic.rmt_start / 8) * 8; + reg = read_csr(dd, regoff); + for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) { + /* Update map register with vnic context */ + j = (dd->vnic.rmt_start + i) % 8; + reg &= ~(0xffllu << (j * 8)); + reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8); + /* Wrap up vnic ctx index */ + ctx_id %= dd->vnic.num_ctxt; + /* Write back map register */ + if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) { + dev_dbg(&(dd)->pcidev->dev, + "Vnic rsm map reg[%d] =0x%llx\n", + regoff - RCV_RSM_MAP_TABLE, reg); + + write_csr(dd, regoff, reg); + regoff += 8; + if (i < (NUM_VNIC_MAP_ENTRIES - 1)) + reg = read_csr(dd, regoff); + } + } + + /* Add rule for vnic */ + rrd.offset = dd->vnic.rmt_start; + rrd.pkt_type = 4; + /* Match 16B packets */ + rrd.field1_off = L2_TYPE_MATCH_OFFSET; + rrd.mask1 = L2_TYPE_MASK; + rrd.value1 = L2_16B_VALUE; + /* Match ETH L4 packets */ + rrd.field2_off = L4_TYPE_MATCH_OFFSET; + rrd.mask2 = L4_16B_TYPE_MASK; + rrd.value2 = L4_16B_ETH_VALUE; + /* Calc context from veswid and entropy */ + rrd.index1_off = L4_16B_HDR_VESWID_OFFSET; + rrd.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES); + rrd.index2_off = L2_16B_ENTROPY_OFFSET; + rrd.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES); + add_rsm_rule(dd, RSM_INS_VNIC, &rrd); + + /* Enable RSM if not already enabled */ + add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); +} + +void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd) +{ + clear_rsm_rule(dd, RSM_INS_VNIC); + + /* Disable RSM if used only by vnic */ + if (dd->vnic.rmt_start == 0) + clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); +} + static void init_rxe(struct hfi1_devdata *dd) { struct rsm_map_table *rmt; @@ -14023,6 +14337,8 @@ static void init_rxe(struct hfi1_devdata *dd) init_qos(dd, rmt); init_user_fecn_handling(dd, rmt); complete_rsm_map_table(dd, rmt); + /* record number of used rsm map entries for vnic */ + dd->vnic.rmt_start = rmt->used; kfree(rmt); /* diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 043fd21..b9dbf16 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1,7 +1,7 @@ #ifndef _CHIP_H #define _CHIP_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -394,7 +394,8 @@ #define LAST_REMOTE_STATE_COMPLETE 0x13 #define LINK_QUALITY_INFO 0x14 #define REMOTE_DEVICE_ID 0x15 -#define LINK_DOWN_REASON 0x16 +#define LINK_DOWN_REASON 0x16 /* first byte of offset 0x16 */ +#define VERSION_PATCH 0x16 /* last byte of offset 0x16 */ /* 8051 lane specific register field IDs */ #define TX_EQ_SETTINGS 0x00 @@ -524,10 +525,12 @@ enum { #define SUPPORTED_CRCS (CAP_CRC_14B | CAP_CRC_48B) /* misc status version fields */ -#define STS_FM_VERSION_A_SHIFT 16 -#define STS_FM_VERSION_A_MASK 0xff -#define STS_FM_VERSION_B_SHIFT 24 -#define STS_FM_VERSION_B_MASK 0xff +#define STS_FM_VERSION_MINOR_SHIFT 16 +#define STS_FM_VERSION_MINOR_MASK 0xff +#define STS_FM_VERSION_MAJOR_SHIFT 24 +#define STS_FM_VERSION_MAJOR_MASK 0xff +#define STS_FM_VERSION_PATCH_SHIFT 24 +#define STS_FM_VERSION_PATCH_MASK 0xff /* LCB_CFG_CRC_MODE TX_VAL and RX_VAL CRC mode values */ #define LCB_CRC_16B 0x0 /* 16b CRC */ @@ -698,7 +701,8 @@ void fabric_serdes_reset(struct hfi1_devdata *dd); int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result); /* chip.c */ -void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b); +void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, + u8 *ver_patch); void read_guid(struct hfi1_devdata *dd); int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout); void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, @@ -1358,6 +1362,8 @@ int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt); int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey); int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt); void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality); +void hfi1_init_vnic_rsm(struct hfi1_devdata *dd); +void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd); /* * Interrupt source table. diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 1b783bb..995d62c 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -331,12 +331,15 @@ struct diag_pkt { #define FULL_MGMT_P_KEY 0xFFFF #define DEFAULT_P_KEY LIM_MGMT_P_KEY -#define HFI1_FECN_SHIFT 31 -#define HFI1_FECN_MASK 1 -#define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT) -#define HFI1_BECN_SHIFT 30 -#define HFI1_BECN_MASK 1 -#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT) + +/** + * 0xF8 - 4 bits of multicast range and 1 bit for collective range + * Example: For 24 bit LID space, + * Multicast range: 0xF00000 to 0xF7FFFF + * Collective range: 0xF80000 to 0xFFFFFE + */ +#define HFI1_MCAST_NR 0x4 /* Number of top bits set */ +#define HFI1_COLLECTIVE_NR 0x1 /* Number of bits after MCAST_NR */ #define HFI1_PSM_IOC_BASE_SEQ 0x0 diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 7fe9dd8..e9fa3c2 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -1,6 +1,6 @@ #ifdef CONFIG_DEBUG_FS /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -51,8 +51,12 @@ #include <linux/export.h> #include <linux/module.h> #include <linux/string.h> +#include <linux/types.h> +#include <linux/ratelimit.h> +#include <linux/fault-inject.h> #include "hfi.h" +#include "trace.h" #include "debugfs.h" #include "device.h" #include "qp.h" @@ -170,7 +174,7 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v) struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; struct hfi1_devdata *dd = dd_from_dev(ibd); - for (j = 0; j < dd->first_user_ctxt; j++) { + for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { if (!dd->rcd[j]) continue; n_packets += dd->rcd[j]->opstats->stats[i].n_packets; @@ -196,7 +200,7 @@ static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos) if (!*pos) return SEQ_START_TOKEN; - if (*pos >= dd->first_user_ctxt) + if (*pos >= dd->first_dyn_alloc_ctxt) return NULL; return pos; } @@ -210,7 +214,7 @@ static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) return pos; ++*pos; - if (*pos >= dd->first_user_ctxt) + if (*pos >= dd->first_dyn_alloc_ctxt) return NULL; return pos; } @@ -1063,6 +1067,222 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list); DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list) DEBUGFS_FILE_OPS(sdma_cpu_list); +#ifdef CONFIG_FAULT_INJECTION +static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct hfi1_opcode_stats_perctx *opstats; + + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct hfi1_opcode_stats_perctx *opstats; + + ++*pos; + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void _fault_stats_seq_stop(struct seq_file *s, void *v) +{ +} + +static int _fault_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos = v; + loff_t i = *spos, j; + u64 n_packets = 0, n_bytes = 0; + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + + for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { + if (!dd->rcd[j]) + continue; + n_packets += dd->rcd[j]->opstats->stats[i].n_packets; + n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes; + } + if (!n_packets && !n_bytes) + return SEQ_SKIP; + if (!ibd->fault_opcode->n_rxfaults[i] && + !ibd->fault_opcode->n_txfaults[i]) + return SEQ_SKIP; + seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i, + (unsigned long long)n_packets, + (unsigned long long)n_bytes, + (unsigned long long)ibd->fault_opcode->n_rxfaults[i], + (unsigned long long)ibd->fault_opcode->n_txfaults[i]); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(fault_stats); +DEBUGFS_SEQ_FILE_OPEN(fault_stats); +DEBUGFS_FILE_OPS(fault_stats); + +static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd) +{ + debugfs_remove_recursive(ibd->fault_opcode->dir); + kfree(ibd->fault_opcode); + ibd->fault_opcode = NULL; +} + +static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd) +{ + struct dentry *parent = ibd->hfi1_ibdev_dbg; + + ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL); + if (!ibd->fault_opcode) + return -ENOMEM; + + ibd->fault_opcode->attr.interval = 1; + ibd->fault_opcode->attr.require_end = ULONG_MAX; + ibd->fault_opcode->attr.stacktrace_depth = 32; + ibd->fault_opcode->attr.dname = NULL; + ibd->fault_opcode->attr.verbose = 0; + ibd->fault_opcode->fault_by_opcode = false; + ibd->fault_opcode->opcode = 0; + ibd->fault_opcode->mask = 0xff; + + ibd->fault_opcode->dir = + fault_create_debugfs_attr("fault_opcode", + parent, + &ibd->fault_opcode->attr); + if (IS_ERR(ibd->fault_opcode->dir)) { + kfree(ibd->fault_opcode); + return -ENOENT; + } + + DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd); + if (!debugfs_create_bool("fault_by_opcode", 0600, + ibd->fault_opcode->dir, + &ibd->fault_opcode->fault_by_opcode)) + goto fail; + if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir, + &ibd->fault_opcode->opcode)) + goto fail; + if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir, + &ibd->fault_opcode->mask)) + goto fail; + + return 0; +fail: + fault_exit_opcode_debugfs(ibd); + return -ENOMEM; +} + +static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd) +{ + debugfs_remove_recursive(ibd->fault_packet->dir); + kfree(ibd->fault_packet); + ibd->fault_packet = NULL; +} + +static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd) +{ + struct dentry *parent = ibd->hfi1_ibdev_dbg; + + ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL); + if (!ibd->fault_packet) + return -ENOMEM; + + ibd->fault_packet->attr.interval = 1; + ibd->fault_packet->attr.require_end = ULONG_MAX; + ibd->fault_packet->attr.stacktrace_depth = 32; + ibd->fault_packet->attr.dname = NULL; + ibd->fault_packet->attr.verbose = 0; + ibd->fault_packet->fault_by_packet = false; + + ibd->fault_packet->dir = + fault_create_debugfs_attr("fault_packet", + parent, + &ibd->fault_opcode->attr); + if (IS_ERR(ibd->fault_packet->dir)) { + kfree(ibd->fault_packet); + return -ENOENT; + } + + if (!debugfs_create_bool("fault_by_packet", 0600, + ibd->fault_packet->dir, + &ibd->fault_packet->fault_by_packet)) + goto fail; + if (!debugfs_create_u64("fault_stats", 0400, + ibd->fault_packet->dir, + &ibd->fault_packet->n_faults)) + goto fail; + + return 0; +fail: + fault_exit_packet_debugfs(ibd); + return -ENOMEM; +} + +static void fault_exit_debugfs(struct hfi1_ibdev *ibd) +{ + fault_exit_opcode_debugfs(ibd); + fault_exit_packet_debugfs(ibd); +} + +static int fault_init_debugfs(struct hfi1_ibdev *ibd) +{ + int ret = 0; + + ret = fault_init_opcode_debugfs(ibd); + if (ret) + return ret; + + ret = fault_init_packet_debugfs(ibd); + if (ret) + fault_exit_opcode_debugfs(ibd); + + return ret; +} + +bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) +{ + return ibd->fault_suppress_err; +} + +bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx) +{ + bool ret = false; + struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device); + + if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode) + return false; + if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask)) + return false; + ret = should_fail(&ibd->fault_opcode->attr, 1); + if (ret) { + trace_hfi1_fault_opcode(qp, opcode); + if (rx) + ibd->fault_opcode->n_rxfaults[opcode]++; + else + ibd->fault_opcode->n_txfaults[opcode]++; + } + return ret; +} + +bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) +{ + struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi; + struct hfi1_ibdev *ibd = dev_from_rdi(rdi); + bool ret = false; + + if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet) + return false; + + ret = should_fail(&ibd->fault_packet->attr, 1); + if (ret) { + ++ibd->fault_packet->n_faults; + trace_hfi1_fault_packet(packet); + } + return ret; +} +#endif + void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) { char name[sizeof("port0counters") + 1]; @@ -1112,12 +1332,22 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) !port_cntr_ops[i].ops.write ? S_IRUGO : S_IRUGO | S_IWUSR); } + +#ifdef CONFIG_FAULT_INJECTION + debugfs_create_bool("fault_suppress_err", 0600, + ibd->hfi1_ibdev_dbg, + &ibd->fault_suppress_err); + fault_init_debugfs(ibd); +#endif } void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) { if (!hfi1_dbg_root) goto out; +#ifdef CONFIG_FAULT_INJECTION + fault_exit_debugfs(ibd); +#endif debugfs_remove(ibd->hfi1_ibdev_link); debugfs_remove_recursive(ibd->hfi1_ibdev_dbg); out: diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h index b6fb681..38c38a9 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.h +++ b/drivers/infiniband/hw/hfi1/debugfs.h @@ -53,23 +53,79 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd); void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd); void hfi1_dbg_init(void); void hfi1_dbg_exit(void); + +#ifdef CONFIG_FAULT_INJECTION +#include <linux/fault-inject.h> +struct fault_opcode { + struct fault_attr attr; + struct dentry *dir; + bool fault_by_opcode; + u64 n_rxfaults[256]; + u64 n_txfaults[256]; + u8 opcode; + u8 mask; +}; + +struct fault_packet { + struct fault_attr attr; + struct dentry *dir; + bool fault_by_packet; + u64 n_faults; +}; + +bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx); +bool hfi1_dbg_fault_packet(struct hfi1_packet *packet); +bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd); +#else +static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) +{ + return false; +} + +static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, + u32 opcode, bool rx) +{ + return false; +} + +static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) +{ + return false; +} +#endif + #else static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) { } -void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) +static inline void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) +{ +} + +static inline void hfi1_dbg_init(void) { } -void hfi1_dbg_init(void) +static inline void hfi1_dbg_exit(void) { } -void hfi1_dbg_exit(void) +static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) { + return false; } +static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, + u32 opcode, bool rx) +{ + return false; +} + +static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) +{ + return false; +} #endif #endif /* _HFI1_DEBUGFS_H */ diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 3881c95..5278954 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -59,6 +59,8 @@ #include "trace.h" #include "qp.h" #include "sdma.h" +#include "debugfs.h" +#include "vnic.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -283,7 +285,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, { struct ib_header *rhdr = packet->hdr; u32 rte = rhf_rcv_type_err(packet->rhf); - int lnh = be16_to_cpu(rhdr->lrh[0]) & 3; + int lnh = ib_get_lnh(rhdr); struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct hfi1_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; @@ -295,7 +297,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, /* For TIDERR and RC QPs preemptively schedule a NAK */ struct ib_other_headers *ohdr = NULL; u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */ - u16 lid = be16_to_cpu(rhdr->lrh[1]); + u16 lid = ib_get_dlid(rhdr); u32 qp_num; u32 rcv_flags = 0; @@ -396,7 +398,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, u16 rlid; u8 svc_type, sl, sc5; - sc5 = hdr2sc(rhdr, packet->rhf); + sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK; @@ -414,7 +416,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, svc_type = IB_CC_SVCTYPE_UD; break; case IB_QPT_UC: - rlid = be16_to_cpu(rhdr->lrh[3]); + rlid = ib_get_slid(rhdr); rqpn = qp->remote_qpn; svc_type = IB_CC_SVCTYPE_UC; break; @@ -460,7 +462,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = NULL; u32 rqpn = 0, bth1; - u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]); + u16 rlid, dlid = ib_get_dlid(hdr); u8 sc, svc_type; bool is_mcast = false; @@ -471,19 +473,19 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: - rlid = be16_to_cpu(hdr->lrh[3]); + rlid = ib_get_slid(hdr); rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; svc_type = IB_CC_SVCTYPE_UD; is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); break; case IB_QPT_UC: - rlid = qp->remote_ah_attr.dlid; + rlid = rdma_ah_get_dlid(&qp->remote_ah_attr); rqpn = qp->remote_qpn; svc_type = IB_CC_SVCTYPE_UC; break; case IB_QPT_RC: - rlid = qp->remote_ah_attr.dlid; + rlid = rdma_ah_get_dlid(&qp->remote_ah_attr); rqpn = qp->remote_qpn; svc_type = IB_CC_SVCTYPE_RC; break; @@ -491,16 +493,16 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, return; } - sc = hdr2sc(hdr, pkt->rhf); + sc = hfi1_9B_get_sc5(hdr, pkt->rhf); bth1 = be32_to_cpu(ohdr->bth[1]); - if (do_cnp && (bth1 & HFI1_FECN_SMASK)) { + if (do_cnp && (bth1 & IB_FECN_SMASK)) { u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]); return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); } - if (!is_mcast && (bth1 & HFI1_BECN_SMASK)) { + if (!is_mcast && (bth1 & IB_BECN_SMASK)) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 lqpn = bth1 & RVT_QPN_MASK; u8 sl = ibp->sc_to_sl[sc]; @@ -621,8 +623,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) packet->hdr = hfi1_get_msgheader(dd, rhf_addr); hdr = packet->hdr; - - lnh = be16_to_cpu(hdr->lrh[0]) & 3; + lnh = ib_get_lnh(hdr); if (lnh == HFI1_LRH_BTH) { packet->ohdr = &hdr->u.oth; @@ -634,7 +635,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) } bth1 = be32_to_cpu(packet->ohdr->bth[1]); - is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK)); + is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK)); if (!is_ecn) goto next; @@ -652,7 +653,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) rcu_read_unlock(); /* turn off BECN, FECN */ - bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK); + bth1 &= ~(IB_FECN_SMASK | IB_BECN_SMASK); packet->ohdr->bth[1] = cpu_to_be32(bth1); next: update_ps_mdata(&mdata, rcd); @@ -872,20 +873,42 @@ bail: return last; } -static inline void set_all_nodma_rtail(struct hfi1_devdata *dd) +static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt) { int i; - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++) + /* + * For dynamically allocated kernel contexts (like vnic) switch + * interrupt handler only for that context. Otherwise, switch + * interrupt handler for all statically allocated kernel contexts. + */ + if (ctxt >= dd->first_dyn_alloc_ctxt) { + dd->rcd[ctxt]->do_interrupt = + &handle_receive_interrupt_nodma_rtail; + return; + } + + for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) dd->rcd[i]->do_interrupt = &handle_receive_interrupt_nodma_rtail; } -static inline void set_all_dma_rtail(struct hfi1_devdata *dd) +static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt) { int i; - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++) + /* + * For dynamically allocated kernel contexts (like vnic) switch + * interrupt handler only for that context. Otherwise, switch + * interrupt handler for all statically allocated kernel contexts. + */ + if (ctxt >= dd->first_dyn_alloc_ctxt) { + dd->rcd[ctxt]->do_interrupt = + &handle_receive_interrupt_dma_rtail; + return; + } + + for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) dd->rcd[i]->do_interrupt = &handle_receive_interrupt_dma_rtail; } @@ -895,8 +918,13 @@ void set_all_slowpath(struct hfi1_devdata *dd) int i; /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */ - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++) - dd->rcd[i]->do_interrupt = &handle_receive_interrupt; + for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) { + struct hfi1_ctxtdata *rcd = dd->rcd[i]; + + if ((i < dd->first_dyn_alloc_ctxt) || + (rcd && rcd->sc && (rcd->sc->type == SC_KERNEL))) + rcd->do_interrupt = &handle_receive_interrupt; + } } static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, @@ -908,7 +936,8 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, packet->rhf_addr); u8 etype = rhf_rcv_type(packet->rhf); - if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) { + if (etype == RHF_RCV_TYPE_IB && + hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) { int hwstate = read_logical_state(dd); if (hwstate != LSTATE_ACTIVE) { @@ -1006,7 +1035,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) last = RCV_PKT_DONE; if (needset) { dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n"); - set_all_nodma_rtail(dd); + set_nodma_rtail(dd, rcd->ctxt); needset = 0; } } else { @@ -1028,7 +1057,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) if (needset) { dd_dev_info(dd, "Switching to DMA_RTAIL\n"); - set_all_dma_rtail(dd); + set_dma_rtail(dd, rcd->ctxt); needset = 0; } } @@ -1077,10 +1106,10 @@ void receive_interrupt_work(struct work_struct *work) set_link_state(ppd, HLS_UP_ACTIVE); /* - * Interrupt all kernel contexts that could have had an - * interrupt during auto activation. + * Interrupt all statically allocated kernel contexts that could + * have had an interrupt during auto activation. */ - for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++) + for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++) force_recv_intr(dd->rcd[i]); } @@ -1294,7 +1323,8 @@ int hfi1_reset_device(int unit) spin_lock_irqsave(&dd->uctxt_lock, flags); if (dd->rcd) - for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) { + for (i = dd->first_dyn_alloc_ctxt; + i < dd->num_rcv_contexts; i++) { if (!dd->rcd[i] || !dd->rcd[i]->cnt) continue; spin_unlock_irqrestore(&dd->uctxt_lock, flags); @@ -1354,6 +1384,9 @@ void handle_eflags(struct hfi1_packet *packet) */ int process_receive_ib(struct hfi1_packet *packet) { + if (unlikely(hfi1_dbg_fault_packet(packet))) + return RHF_RCV_CONTINUE; + trace_hfi1_rcvhdr(packet->rcd->ppd->dd, packet->rcd->ctxt, rhf_err_flags(packet->rhf), @@ -1363,6 +1396,11 @@ int process_receive_ib(struct hfi1_packet *packet) packet->updegr, rhf_egr_index(packet->rhf)); + if (unlikely( + (hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) && + (packet->rhf & RHF_DC_ERR)))) + return RHF_RCV_CONTINUE; + if (unlikely(rhf_err_flags(packet->rhf))) { handle_eflags(packet); return RHF_RCV_CONTINUE; @@ -1372,15 +1410,31 @@ int process_receive_ib(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } +static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet) +{ + /* Packet received in VNIC context via RSM */ + if (packet->rcd->is_vnic) + return true; + + if ((HFI1_GET_L2_TYPE(packet->ebuf) == OPA_VNIC_L2_TYPE) && + (HFI1_GET_L4_TYPE(packet->ebuf) == OPA_VNIC_L4_ETHR)) + return true; + + return false; +} + int process_receive_bypass(struct hfi1_packet *packet) { struct hfi1_devdata *dd = packet->rcd->dd; - if (unlikely(rhf_err_flags(packet->rhf))) + if (unlikely(rhf_err_flags(packet->rhf))) { handle_eflags(packet); + } else if (hfi1_is_vnic_packet(packet)) { + hfi1_vnic_bypass_rcv(packet); + return RHF_RCV_CONTINUE; + } - dd_dev_err(dd, - "Bypass packets are not supported in normal operation. Dropping\n"); + dd_dev_err(dd, "Unsupported bypass packet. Dropping\n"); incr_cntr64(&dd->sw_rcv_bypass_packet_errors); if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) { u64 *flits = packet->ebuf; @@ -1398,6 +1452,12 @@ int process_receive_bypass(struct hfi1_packet *packet) int process_receive_error(struct hfi1_packet *packet) { + /* KHdrHCRCErr -- KDETH packet with a bad HCRC */ + if (unlikely( + hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) && + rhf_rcv_type_err(packet->rhf) == 3)) + return RHF_RCV_CONTINUE; + handle_eflags(packet); if (unlikely(rhf_err_flags(packet->rhf))) @@ -1409,6 +1469,8 @@ int process_receive_error(struct hfi1_packet *packet) int kdeth_process_expected(struct hfi1_packet *packet) { + if (unlikely(hfi1_dbg_fault_packet(packet))) + return RHF_RCV_CONTINUE; if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); @@ -1421,6 +1483,8 @@ int kdeth_process_eager(struct hfi1_packet *packet) { if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); + if (unlikely(hfi1_dbg_fault_packet(packet))) + return RHF_RCV_CONTINUE; dd_dev_err(packet->rcd->dd, "Unhandled eager packet received. Dropping.\n"); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index f78c739..3d9bce4 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -586,8 +586,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) * knows where it's own bitmap is within the page. */ memaddr = (unsigned long)(dd->events + - ((uctxt->ctxt - dd->first_user_ctxt) * - HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * + HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; memlen = PAGE_SIZE; /* * v3.7 removes VM_RESERVED but the effect is kept by @@ -597,6 +597,10 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) vmf = 1; break; case STATUS: + if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) { + ret = -EPERM; + goto done; + } memaddr = kvirt_to_phys((void *)dd->status); memlen = PAGE_SIZE; flags |= VM_IO | VM_DONTEXPAND; @@ -756,7 +760,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) * Clear any left over, unhandled events so the next process that * gets this context doesn't get confused. */ - ev = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) * + ev = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; *ev = 0; @@ -909,12 +913,18 @@ static int find_shared_ctxt(struct file *fp, if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase)) continue; - for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) { + for (i = dd->first_dyn_alloc_ctxt; + i < dd->num_rcv_contexts; i++) { struct hfi1_ctxtdata *uctxt = dd->rcd[i]; /* Skip ctxts which are not yet open */ if (!uctxt || !uctxt->cnt) continue; + + /* Skip dynamically allocted kernel contexts */ + if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) + continue; + /* Skip ctxt if it doesn't match the requested one */ if (memcmp(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)) || @@ -960,7 +970,8 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, return -EIO; } - for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; ctxt++) + for (ctxt = dd->first_dyn_alloc_ctxt; + ctxt < dd->num_rcv_contexts; ctxt++) if (!dd->rcd[ctxt]) break; @@ -1306,7 +1317,7 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len) */ binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, fd->subctxt, 0); - offset = offset_in_page((((uctxt->ctxt - dd->first_user_ctxt) * + offset = offset_in_page((((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * HFI1_MAX_SHARED_CTXTS) + fd->subctxt) * sizeof(*dd->events)); binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, @@ -1400,12 +1411,12 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) } spin_lock_irqsave(&dd->uctxt_lock, flags); - for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; + for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts; ctxt++) { uctxt = dd->rcd[ctxt]; if (uctxt) { unsigned long *evs = dd->events + - (uctxt->ctxt - dd->first_user_ctxt) * + (uctxt->ctxt - dd->first_dyn_alloc_ctxt) * HFI1_MAX_SHARED_CTXTS; int i; /* @@ -1477,7 +1488,7 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt, if (!dd->events) return 0; - evs = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) * + evs = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * HFI1_MAX_SHARED_CTXTS) + subctxt; for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) { diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 0dd50cd..4042c11 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1004,7 +1004,9 @@ static int load_8051_firmware(struct hfi1_devdata *dd, { u64 reg; int ret; - u8 ver_a, ver_b; + u8 ver_major; + u8 ver_minor; + u8 ver_patch; /* * DC Reset sequence @@ -1073,10 +1075,10 @@ static int load_8051_firmware(struct hfi1_devdata *dd, return -ETIMEDOUT; } - read_misc_status(dd, &ver_a, &ver_b); - dd_dev_info(dd, "8051 firmware version %d.%d\n", - (int)ver_b, (int)ver_a); - dd->dc8051_ver = dc8051_ver(ver_b, ver_a); + read_misc_status(dd, &ver_major, &ver_minor, &ver_patch); + dd_dev_info(dd, "8051 firmware version %d.%d.%d\n", + (int)ver_major, (int)ver_minor, (int)ver_patch); + dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 0808e3c3..f066743 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1,7 +1,7 @@ #ifndef _HFI1_KERNEL_H #define _HFI1_KERNEL_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -54,6 +54,7 @@ #include <linux/list.h> #include <linux/scatterlist.h> #include <linux/slab.h> +#include <linux/idr.h> #include <linux/io.h> #include <linux/fs.h> #include <linux/completion.h> @@ -66,6 +67,7 @@ #include <linux/i2c-algo-bit.h> #include <rdma/ib_hdrs.h> #include <linux/rhashtable.h> +#include <linux/netdevice.h> #include <rdma/rdma_vt.h> #include "chip_registers.h" @@ -278,6 +280,8 @@ struct hfi1_ctxtdata { struct hfi1_devdata *dd; /* so functions that need physical port can get it easily */ struct hfi1_pportdata *ppd; + /* associated msix interrupt */ + u32 msix_intr; /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ void *subctxt_uregbase; /* An array of pages for the eager receive buffers * N */ @@ -337,6 +341,12 @@ struct hfi1_ctxtdata { * packets with the wrong interrupt handler. */ int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded); + + /* Indicates that this is vnic context */ + bool is_vnic; + + /* vnic queue index this context is mapped to */ + u8 vnic_q_idx; }; /* @@ -474,7 +484,7 @@ struct rvt_sge_state; #define HFI1_PART_ENFORCE_OUT 0x2 /* how often we check for synthetic counter wrap around */ -#define SYNTH_CNT_TIME 2 +#define SYNTH_CNT_TIME 3 /* Counter flags */ #define CNTR_NORMAL 0x0 /* Normal counters, just read register */ @@ -808,6 +818,32 @@ struct hfi1_asic_data { struct hfi1_i2c_bus *i2c_bus1; }; +/* sizes for both the QP and RSM map tables */ +#define NUM_MAP_ENTRIES 256 +#define NUM_MAP_REGS 32 + +/* + * Number of VNIC contexts used. Ensure it is less than or equal to + * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE). + */ +#define HFI1_NUM_VNIC_CTXT 8 + +/* Number of VNIC RSM entries */ +#define NUM_VNIC_MAP_ENTRIES 8 + +/* Virtual NIC information */ +struct hfi1_vnic_data { + struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT]; + struct kmem_cache *txreq_cache; + u8 num_vports; + struct idr vesw_idr; + u8 rmt_start; + u8 num_ctxt; + u32 msix_idx; +}; + +struct hfi1_vnic_vport_info; + /* device data struct now contains only "general per-device" info. * fields related to a physical IB port are in a hfi1_pportdata struct. */ @@ -926,8 +962,9 @@ struct hfi1_devdata { spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ /* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */ spinlock_t uctxt_lock; /* rcd and user context changes */ - /* exclusive access to 8051 */ - spinlock_t dc8051_lock; + struct mutex dc8051_lock; /* exclusive access to 8051 */ + struct workqueue_struct *update_cntr_wq; + struct work_struct update_cntr_work; /* exclusive access to 8051 memory */ spinlock_t dc8051_memlock; int dc8051_timed_out; /* remember if the 8051 timed out */ @@ -1020,7 +1057,7 @@ struct hfi1_devdata { u8 qos_shift; u16 irev; /* implementation revision */ - u16 dc8051_ver; /* 8051 firmware version */ + u32 dc8051_ver; /* 8051 firmware version */ spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ struct platform_config platform_config; @@ -1031,6 +1068,7 @@ struct hfi1_devdata { /* MSI-X information */ struct hfi1_msix_entry *msix_entries; u32 num_msix_entries; + u32 first_dyn_msix_idx; /* INTx information */ u32 requested_intx_irq; /* did we request one? */ @@ -1115,6 +1153,9 @@ struct hfi1_devdata { send_routine process_dma_send; void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, const void *from, size_t count); + int (*process_vnic_dma_send)(struct hfi1_devdata *dd, u8 q_idx, + struct hfi1_vnic_vport_info *vinfo, + struct sk_buff *skb, u64 pbc, u8 plen); /* hfi1_pportdata, points to array of (physical) port-specific * data structs, indexed by pidx (0..n-1) */ @@ -1126,8 +1167,8 @@ struct hfi1_devdata { u16 flags; /* Number of physical ports available */ u8 num_pports; - /* Lowest context number which can be used by user processes */ - u8 first_user_ctxt; + /* Lowest context number which can be used by user processes or VNIC */ + u8 first_dyn_alloc_ctxt; /* adding a new field here would make it part of this cacheline */ /* seqlock for sc2vl */ @@ -1167,15 +1208,24 @@ struct hfi1_devdata { bool eprom_available; /* true if EPROM is available for this device */ bool aspm_supported; /* Does HW support ASPM */ bool aspm_enabled; /* ASPM state: enabled/disabled */ - struct rhashtable sdma_rht; + struct rhashtable *sdma_rht; struct kobject kobj; + + /* vnic data */ + struct hfi1_vnic_data vnic; }; +static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare) +{ + return (dd->vnic.rmt_start + spare) > NUM_MAP_ENTRIES; +} + /* 8051 firmware version helper */ -#define dc8051_ver(a, b) ((a) << 8 | (b)) -#define dc8051_ver_maj(a) ((a & 0xff00) >> 8) -#define dc8051_ver_min(a) (a & 0x00ff) +#define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c)) +#define dc8051_ver_maj(a) (((a) & 0xff0000) >> 16) +#define dc8051_ver_min(a) (((a) & 0x00ff00) >> 8) +#define dc8051_ver_patch(a) ((a) & 0x0000ff) /* f_put_tid types */ #define PT_EXPECTED 0 @@ -1235,6 +1285,9 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *, int); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int); int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int); void set_all_slowpath(struct hfi1_devdata *dd); +void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd); +void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd); +void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd); extern const struct pci_device_id hfi1_pci_tbl[]; @@ -1254,16 +1307,24 @@ int hfi1_reset_device(int); /* return the driver's idea of the logical OPA port state */ static inline u32 driver_lstate(struct hfi1_pportdata *ppd) { - return ppd->lstate; /* use the cached value */ + /* + * The driver does some processing from the time the logical + * link state is at INIT to the time the SM can be notified + * as such. Return IB_PORT_DOWN until the software state + * is ready. + */ + if (ppd->lstate == IB_PORT_INIT && !(ppd->host_link_state & HLS_UP)) + return IB_PORT_DOWN; + else + return ppd->lstate; } void receive_interrupt_work(struct work_struct *work); /* extract service channel from header and rhf */ -static inline int hdr2sc(struct ib_header *hdr, u64 rhf) +static inline int hfi1_9B_get_sc5(struct ib_header *hdr, u64 rhf) { - return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) | - ((!!(rhf_dc_info(rhf))) << 4); + return ib_get_sc(hdr) | ((!!(rhf_dc_info(rhf))) << 4); } #define HFI1_JKEY_WIDTH 16 @@ -1597,9 +1658,9 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, u32 bth1; bth1 = be32_to_cpu(ohdr->bth[1]); - if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) { + if (unlikely(bth1 & (IB_BECN_SMASK | IB_FECN_SMASK))) { hfi1_process_ecn_slowpath(qp, pkt, do_cnp); - return bth1 & HFI1_FECN_SMASK; + return !!(bth1 & IB_FECN_SMASK); } return false; } diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index f40864e..4d6b9f8 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -65,6 +65,7 @@ #include "verbs.h" #include "aspm.h" #include "affinity.h" +#include "vnic.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -139,7 +140,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) goto nomem; /* create one or more kernel contexts */ - for (i = 0; i < dd->first_user_ctxt; ++i) { + for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) { struct hfi1_pportdata *ppd; struct hfi1_ctxtdata *rcd; @@ -214,9 +215,9 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, u32 base; if (dd->rcv_entries.nctxt_extra > - dd->num_rcv_contexts - dd->first_user_ctxt) + dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt) kctxt_ngroups = (dd->rcv_entries.nctxt_extra - - (dd->num_rcv_contexts - dd->first_user_ctxt)); + (dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt)); rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa); if (rcd) { u32 rcvtids, max_entries; @@ -238,27 +239,29 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, * Calculate the context's RcvArray entry starting point. * We do this here because we have to take into account all * the RcvArray entries that previous context would have - * taken and we have to account for any extra groups - * assigned to the kernel or user contexts. + * taken and we have to account for any extra groups assigned + * to the static (kernel) or dynamic (vnic/user) contexts. */ - if (ctxt < dd->first_user_ctxt) { + if (ctxt < dd->first_dyn_alloc_ctxt) { if (ctxt < kctxt_ngroups) { base = ctxt * (dd->rcv_entries.ngroups + 1); rcd->rcv_array_groups++; - } else + } else { base = kctxt_ngroups + (ctxt * dd->rcv_entries.ngroups); + } } else { - u16 ct = ctxt - dd->first_user_ctxt; + u16 ct = ctxt - dd->first_dyn_alloc_ctxt; base = ((dd->n_krcv_queues * dd->rcv_entries.ngroups) + kctxt_ngroups); if (ct < dd->rcv_entries.nctxt_extra) { base += ct * (dd->rcv_entries.ngroups + 1); rcd->rcv_array_groups++; - } else + } else { base += dd->rcv_entries.nctxt_extra + (ct * dd->rcv_entries.ngroups); + } } rcd->eager_base = base * dd->rcv_entries.group_size; @@ -322,7 +325,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, } rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE; - if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */ + /* Applicable only for statically created kernel contexts */ + if (ctxt < dd->first_dyn_alloc_ctxt) { rcd->opstats = kzalloc_node(sizeof(*rcd->opstats), GFP_KERNEL, numa); if (!rcd->opstats) @@ -482,6 +486,9 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, default_pkey_idx = 1; ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY; + ppd->part_enforce |= HFI1_PART_ENFORCE_IN; + ppd->part_enforce |= HFI1_PART_ENFORCE_OUT; + if (loopback) { hfi1_early_err(&pdev->dev, "Faking data partition 0x8001 in idx %u\n", @@ -585,7 +592,7 @@ static void enable_chip(struct hfi1_devdata *dd) * Enable kernel ctxts' receive and receive interrupt. * Other ctxts done as user opens and initializes them. */ - for (i = 0; i < dd->first_user_ctxt; ++i) { + for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) { rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB; rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ? HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; @@ -679,6 +686,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) dd->process_pio_send = hfi1_verbs_send_pio; dd->process_dma_send = hfi1_verbs_send_dma; dd->pio_inline_send = pio_copy; + dd->process_vnic_dma_send = hfi1_vnic_send_dma; if (is_ax(dd)) { atomic_set(&dd->drop_packet, DROP_PACKET_ON); @@ -714,7 +722,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) } /* dd->rcd can be NULL if early initialization failed */ - for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { + for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) { /* * Set up the (kernel) rcvhdr queue and egr TIDs. If doing * re-init, the simplest way to handle this is to free @@ -1078,11 +1086,11 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) spin_lock_init(&dd->uctxt_lock); spin_lock_init(&dd->hfi1_diag_trans_lock); spin_lock_init(&dd->sc_init_lock); - spin_lock_init(&dd->dc8051_lock); spin_lock_init(&dd->dc8051_memlock); seqlock_init(&dd->sc2vl_lock); spin_lock_init(&dd->sde_map_lock); spin_lock_init(&dd->pio_map_lock); + mutex_init(&dd->dc8051_lock); init_waitqueue_head(&dd->event_queue); dd->int_counter = alloc_percpu(u64); @@ -1425,6 +1433,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* First, lock the non-writable module parameters */ HFI1_CAP_LOCK(); + /* Validate dev ids */ + if (!(ent->device == PCI_DEVICE_ID_INTEL0 || + ent->device == PCI_DEVICE_ID_INTEL1)) { + hfi1_early_err(&pdev->dev, + "Failing on unknown Intel deviceid 0x%x\n", + ent->device); + ret = -ENODEV; + goto bail; + } + /* Validate some global module parameters */ ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt); if (ret) @@ -1470,15 +1488,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto bail; - if (!(ent->device == PCI_DEVICE_ID_INTEL0 || - ent->device == PCI_DEVICE_ID_INTEL1)) { - hfi1_early_err(&pdev->dev, - "Failing on unknown Intel deviceid 0x%x\n", - ent->device); - ret = -ENODEV; - goto clean_bail; - } - /* * Do device-specific initialization, function table setup, dd * allocation, etc. @@ -1497,6 +1506,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* do the generic initialization */ initfail = hfi1_init(dd, 0); + /* setup vnic */ + hfi1_vnic_setup(dd); + ret = hfi1_register_ib_device(dd); /* @@ -1530,6 +1542,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) hfi1_device_remove(dd); if (!ret) hfi1_unregister_ib_device(dd); + hfi1_vnic_cleanup(dd); postinit_cleanup(dd); if (initfail) ret = initfail; @@ -1574,6 +1587,9 @@ static void remove_one(struct pci_dev *pdev) /* unregister from IB core */ hfi1_unregister_ib_device(dd); + /* cleanup vnic */ + hfi1_vnic_cleanup(dd); + /* * Disable the IB link, disable interrupts on the device, * clear dma engines, etc. @@ -1613,8 +1629,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize * sizeof(u32)); - gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? - GFP_USER : GFP_KERNEL; + if ((rcd->ctxt < dd->first_dyn_alloc_ctxt) || + (rcd->sc && (rcd->sc->type == SC_KERNEL))) + gfp_flags = GFP_KERNEL; + else + gfp_flags = GFP_USER; rcd->rcvhdrq = dma_zalloc_coherent( &dd->pcidev->dev, amt, &rcd->rcvhdrq_dma, gfp_flags | __GFP_COMP); diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 65348d1..232014d 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -131,19 +131,24 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { set_up_vl15(dd, dd->vau, dd->vl15_init); assign_remote_cm_au_table(dd, dd->vcu); - ppd->neighbor_guid = - read_csr(dd, DC_DC8051_STS_REMOTE_GUID); - ppd->neighbor_type = - read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) & - DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK; - ppd->neighbor_port_number = - read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) & - DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK; - dd_dev_info(dd, "Neighbor GUID: %llx Neighbor type %d\n", - ppd->neighbor_guid, - ppd->neighbor_type); } + ppd->neighbor_guid = + read_csr(dd, DC_DC8051_STS_REMOTE_GUID); + ppd->neighbor_type = + read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) & + DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK; + ppd->neighbor_port_number = + read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) & + DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK; + ppd->neighbor_fm_security = + read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) & + DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK; + dd_dev_info(dd, + "Neighbor Guid %llx, Type %d, Port Num %d\n", + ppd->neighbor_guid, ppd->neighbor_type, + ppd->neighbor_port_number); + /* physical link went up */ ppd->linkup = 1; ppd->offline_disabled_reason = diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 09cda3c..5977673 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -53,6 +53,7 @@ #include "mad.h" #include "trace.h" #include "qp.h" +#include "vnic.h" /* the reset value from the FM is supposed to be 0xffff, handle both */ #define OPA_LINK_WIDTH_RESET_OLD 0x0fff @@ -650,9 +651,11 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0); pi->port_packet_format.supported = - cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B); + cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B | + OPA_PORT_PACKET_FORMAT_16B); pi->port_packet_format.enabled = - cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B); + cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B | + OPA_PORT_PACKET_FORMAT_16B); /* flit_control.interleave is (OPA V1, version .76): * bits use @@ -701,7 +704,13 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT; pi->buffer_units = cpu_to_be32(buffer_units); - pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported); + pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported | + OPA_CAP_MASK3_IsEthOnFabricSupported); + /* Driver does not support mcast/collective configuration */ + pi->opa_cap_mask &= + cpu_to_be16(~OPA_CAP_MASK3_IsAddrRangeConfigSupported); + pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7) + << 3 | (HFI1_MCAST_NR & 0x7)); /* HFI supports a replay buffer 128 LTPs in size */ pi->replay_depth.buffer = 0x80; @@ -1146,16 +1155,6 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, ppd->linkinit_reason = (pi->partenforce_filterraw & OPA_PI_MASK_LINKINIT_REASON); - /* enable/disable SW pkey checking as per FM control */ - if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_IN) - ppd->part_enforce |= HFI1_PART_ENFORCE_IN; - else - ppd->part_enforce &= ~HFI1_PART_ENFORCE_IN; - - if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_OUT) - ppd->part_enforce |= HFI1_PART_ENFORCE_OUT; - else - ppd->part_enforce &= ~HFI1_PART_ENFORCE_OUT; /* Must be a valid unicast LID address. */ if ((smlid == 0 && ls_old > IB_PORT_INIT) || @@ -1167,9 +1166,9 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, spin_lock_irqsave(&ibp->rvp.lock, flags); if (ibp->rvp.sm_ah) { if (smlid != ibp->rvp.sm_lid) - ibp->rvp.sm_ah->attr.dlid = smlid; + rdma_ah_set_dlid(&ibp->rvp.sm_ah->attr, smlid); if (msl != ibp->rvp.sm_sl) - ibp->rvp.sm_ah->attr.sl = msl; + rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl); } spin_unlock_irqrestore(&ibp->rvp.lock, flags); if (smlid != ibp->rvp.sm_lid) @@ -1465,25 +1464,15 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len); } -static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data) -{ - u64 *val = data; - - *val++ = read_csr(dd, SEND_SC2VLT0); - *val++ = read_csr(dd, SEND_SC2VLT1); - *val++ = read_csr(dd, SEND_SC2VLT2); - *val++ = read_csr(dd, SEND_SC2VLT3); - return 0; -} - #define ILLEGAL_VL 12 /* * filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except * for SC15, which must map to VL15). If we don't remap things this * way it is possible for VL15 counters to increment when we try to * send on a SC which is mapped to an invalid VL. + * When getting the table convert ILLEGAL_VL back to VL15. */ -static void filter_sc2vlt(void *data) +static void filter_sc2vlt(void *data, bool set) { int i; u8 *pd = data; @@ -1491,8 +1480,14 @@ static void filter_sc2vlt(void *data) for (i = 0; i < OPA_MAX_SCS; i++) { if (i == 15) continue; - if ((pd[i] & 0x1f) == 0xf) - pd[i] = ILLEGAL_VL; + + if (set) { + if ((pd[i] & 0x1f) == 0xf) + pd[i] = ILLEGAL_VL; + } else { + if ((pd[i] & 0x1f) == ILLEGAL_VL) + pd[i] = 0xf; + } } } @@ -1500,7 +1495,7 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data) { u64 *val = data; - filter_sc2vlt(data); + filter_sc2vlt(data, true); write_csr(dd, SEND_SC2VLT0, *val++); write_csr(dd, SEND_SC2VLT1, *val++); @@ -1512,6 +1507,19 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data) return 0; } +static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data) +{ + u64 *val = (u64 *)data; + + *val++ = read_csr(dd, SEND_SC2VLT0); + *val++ = read_csr(dd, SEND_SC2VLT1); + *val++ = read_csr(dd, SEND_SC2VLT2); + *val++ = read_csr(dd, SEND_SC2VLT3); + + filter_sc2vlt((u64 *)data, false); + return 0; +} + static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, u32 *resp_len) @@ -1986,31 +1994,6 @@ struct opa_pma_mad { u8 data[2024]; } __packed; -struct opa_class_port_info { - u8 base_version; - u8 class_version; - __be16 cap_mask; - __be32 cap_mask2_resp_time; - - u8 redirect_gid[16]; - __be32 redirect_tc_fl; - __be32 redirect_lid; - __be32 redirect_sl_qp; - __be32 redirect_qkey; - - u8 trap_gid[16]; - __be32 trap_tc_fl; - __be32 trap_lid; - __be32 trap_hl_qp; - __be32 trap_qkey; - - __be16 trap_pkey; - __be16 redirect_pkey; - - u8 trap_sl_rsvd; - u8 reserved[3]; -} __packed; - struct opa_port_status_req { __u8 port_num; __u8 reserved[3]; diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 0829fce..e39e01b 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -583,7 +583,7 @@ pci_mmio_enabled(struct pci_dev *pdev) if (words == ~0ULL) ret = PCI_ERS_RESULT_NEED_RESET; dd_dev_info(dd, - "HFI1 mmio_enabled function called, read wordscntr %Lx, returning %d\n", + "HFI1 mmio_enabled function called, read wordscntr %llx, returning %d\n", words, ret); } return ret; diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 615be68..ed72b5a 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -703,6 +703,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, { struct send_context_info *sci; struct send_context *sc = NULL; + int req_type = type; dma_addr_t dma; unsigned long flags; u64 reg; @@ -729,6 +730,13 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, return NULL; } + /* + * VNIC contexts are dynamically allocated. + * Hence, pick a user context for VNIC. + */ + if (type == SC_VNIC) + type = SC_USER; + spin_lock_irqsave(&dd->sc_lock, flags); ret = sc_hw_alloc(dd, type, &sw_index, &hw_context); if (ret) { @@ -738,6 +746,15 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, return NULL; } + /* + * VNIC contexts are used by kernel driver. + * Hence, mark them as kernel contexts. + */ + if (req_type == SC_VNIC) { + dd->send_contexts[sw_index].type = SC_KERNEL; + type = SC_KERNEL; + } + sci = &dd->send_contexts[sw_index]; sci->sc = sc; diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 867e5ff..99ca5ed 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -1,7 +1,7 @@ #ifndef _PIO_H #define _PIO_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -54,6 +54,12 @@ #define SC_USER 3 /* must be the last one: it may take all left */ #define SC_MAX 4 /* count of send context types */ +/* + * SC_VNIC types are allocated (dynamically) from the user context pool, + * (SC_USER) and used by kernel driver as kernel contexts (SC_KERNEL). + */ +#define SC_VNIC SC_MAX + /* invalid send context index */ #define INVALID_SCI 0xff @@ -195,7 +201,7 @@ struct sc_config_sizes { * | mask | --/ |--------------------| * |--------------------------| -/ | * | * | actual_vls (max 8) | -/ |--------------------| - * |--------------------------| --/ | ksc[n] -> sc n | + * |--------------------------| --/ | ksc[n-1] -> sc n | * | vls (max 8) | -/ +--------------------+ * |--------------------------| --/ * | map[0] |-/ @@ -208,21 +214,21 @@ struct sc_config_sizes { * |--------------------------| |--------------------| * | map[vls - 1] |- | * | * +--------------------------+ \- |--------------------| - * \- | ksc[m] -> sc m+n | + * \- | ksc[m-1] -> sc m+n | * \ +--------------------+ * \- * \ - * \- +--------------------+ - * \- | mask | - * \ |--------------------| - * \- | ksc[0] -> sc 1+m+n | - * \- |--------------------| - * >| ksc[1] -> sc 2+m+n | - * |--------------------| - * | * | - * |--------------------| - * | ksc[o] -> sc o+m+n | - * +--------------------+ + * \- +----------------------+ + * \- | mask | + * \ |----------------------| + * \- | ksc[0] -> sc 1+m+n | + * \- |----------------------| + * >| ksc[1] -> sc 2+m+n | + * |----------------------| + * | * | + * |----------------------| + * | ksc[o-1] -> sc o+m+n | + * +----------------------+ * */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index c4ebd09..4573e4c 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -294,7 +294,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, ah = ibah_to_rvtah(wqe->ud_wr.ah); if (wqe->length > (1 << ah->log_pmtu)) return -EINVAL; - if (ibp->sl_to_sc[ah->attr.sl] == 0xf) + if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf) return -EINVAL; default: break; @@ -631,8 +631,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) qp->s_tail, qp->s_head, qp->s_size, qp->s_avail, qp->remote_qpn, - qp->remote_ah_attr.dlid, - qp->remote_ah_attr.sl, + rdma_ah_get_dlid(&qp->remote_ah_attr), + rdma_ah_get_sl(&qp->remote_ah_attr), qp->pmtu, qp->s_retry, qp->s_retry_cnt, @@ -748,7 +748,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp) qp->s_mig_state = IB_MIG_MIGRATED; qp->remote_ah_attr = qp->alt_ah_attr; - qp->port_num = qp->alt_ah_attr.port_num; + qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr); qp->s_pkey_index = qp->s_alt_pkey_index; qp->s_flags |= RVT_S_AHG_CLEAR; priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr); @@ -778,7 +778,7 @@ u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu) u8 sc, vl; ibp = &dd->pport[qp->port_num - 1].ibport_data; - sc = ibp->sl_to_sc[qp->remote_ah_attr.sl]; + sc = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; vl = sc_to_vlt(dd, sc); mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu); @@ -861,7 +861,7 @@ void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl) if (qp->port_num == ppd->port && (qp->ibqp.qp_type == IB_QPT_UC || qp->ibqp.qp_type == IB_QPT_RC) && - qp->remote_ah_attr.sl == sl && + rdma_ah_get_sl(&qp->remote_ah_attr) == sl && (ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK)) { spin_lock_irq(&qp->r_lock); diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 7382be1..75a729c 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -274,7 +274,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto bail_no_tx; ohdr = &ps->s_txreq->phdr.hdr.u.oth; - if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) + if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) ohdr = &ps->s_txreq->phdr.hdr.u.l.oth; /* Sending responses has higher priority over sending requests. */ @@ -744,9 +744,10 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, /* Construct the header */ /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */ hwords = 6; - if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { + if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { hwords += hfi1_make_grh(ibp, &hdr.u.l.grh, - &qp->remote_ah_attr.grh, hwords, 0); + rdma_ah_read_grh(&qp->remote_ah_attr), + hwords, 0); ohdr = &hdr.u.l.oth; lrh0 = HFI1_LRH_GRH; } else { @@ -763,17 +764,19 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, IB_AETH_CREDIT_SHIFT)); else ohdr->u.aeth = rvt_compute_aeth(qp); - sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl]; + sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT); - lrh0 |= (sc5 & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4; + lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr) + & 0xf) << 4; hdr.lrh[0] = cpu_to_be16(lrh0); - hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + hdr.lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr)); hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits); + hdr.lrh[3] = cpu_to_be16(ppd->lid | + rdma_ah_get_path_bits(&qp->remote_ah_attr)); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); - ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << HFI1_BECN_SHIFT); + ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << IB_BECN_SHIFT); ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn)); /* Don't try to send ACKs if the link isn't ACTIVE */ @@ -994,12 +997,12 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) return; /* Find out where the BTH is */ - if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH) + if (ib_get_lnh(hdr) == HFI1_LRH_BTH) ohdr = &hdr->u.oth; else ohdr = &hdr->u.l.oth; - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + opcode = ib_bth_get_opcode(ohdr); if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && opcode <= OP(ATOMIC_ACKNOWLEDGE)) { WARN_ON(!qp->s_rdma_ack_cnt); @@ -1028,13 +1031,17 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; s_last = qp->s_last; + trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) s_last = 0; qp->s_last = s_last; /* see post_send() */ barrier(); rvt_put_swqe(wqe); - rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); + rvt_qp_swqe_complete(qp, + wqe, + ib_hfi1_wc_opcode[wqe->wr.opcode], + IB_WC_SUCCESS); } /* * If we were waiting for sends to complete before re-sending, @@ -1076,12 +1083,16 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, rvt_put_swqe(wqe); s_last = qp->s_last; + trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) s_last = 0; qp->s_last = s_last; /* see post_send() */ barrier(); - rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); + rvt_qp_swqe_complete(qp, + wqe, + ib_hfi1_wc_opcode[wqe->wr.opcode], + IB_WC_SUCCESS); } else { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -1092,10 +1103,11 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, */ if (ppd->dd->flags & HFI1_HAS_SEND_DMA) { struct sdma_engine *engine; + u8 sl = rdma_ah_get_sl(&qp->remote_ah_attr); u8 sc5; /* For now use sc to find engine */ - sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl]; + sc5 = ibp->sl_to_sc[sl]; engine = qp_to_sdma_engine(qp, sc5); sdma_engine_progress_schedule(engine); } @@ -1516,7 +1528,7 @@ read_middle: if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd)) goto ack_done; /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* * Check that the data size is >= 0 && <= pmtu. * Remember to account for ICRC (4). @@ -1540,7 +1552,7 @@ read_middle: if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) goto ack_op_err; /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* * Check that the data size is >= 1 && <= pmtu. * Remember to account for ICRC (4). @@ -1922,7 +1934,8 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) int diff; struct ib_reth *reth; unsigned long flags; - int ret, is_fecn = 0; + int ret; + bool is_fecn = false; bool copy_last = false; u32 rkey; @@ -1934,7 +1947,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) is_fecn = process_ecn(qp, packet, false); psn = be32_to_cpu(ohdr->bth[2]); - opcode = (bth0 >> 24) & 0xff; + opcode = ib_bth_get_opcode(ohdr); /* * Process responses (ACKs) before anything else. Note that the @@ -2065,7 +2078,7 @@ no_immediate_data: wc.ex.imm_data = 0; send_last: /* Get the number of bytes the message was padded by. */ - pad = (bth0 >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -2089,7 +2102,7 @@ send_last: wc.opcode = IB_WC_RECV; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.slid = qp->remote_ah_attr.dlid; + wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr); /* * It seems that IB mandates the presence of an SL in a * work completion only for the UD transport (see section @@ -2101,7 +2114,7 @@ send_last: * * See also OPA Vol. 1, section 9.7.6, and table 9-17. */ - wc.sl = qp->remote_ah_attr.sl; + wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); /* zero fields that are N/A */ wc.vendor_err = 0; wc.pkey_index = 0; @@ -2378,7 +2391,7 @@ void hfi1_rc_hdrerr( return; psn = be32_to_cpu(ohdr->bth[2]); - opcode = (bth0 >> 24) & 0xff; + opcode = ib_bth_get_opcode(ohdr); /* Only deal with RDMA Writes for now */ if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) { diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index aa15bcb..891ba0a 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -219,72 +219,84 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, { __be64 guid; unsigned long flags; - u8 sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl]; + u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) { if (!has_grh) { - if (qp->alt_ah_attr.ah_flags & IB_AH_GRH) + if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) & + IB_AH_GRH) goto err; } else { - if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH)) + const struct ib_global_route *grh; + + if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) & + IB_AH_GRH)) goto err; - guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index); + grh = rdma_ah_read_grh(&qp->alt_ah_attr); + guid = get_sguid(ibp, grh->sgid_index); if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, guid)) goto err; if (!gid_ok( &hdr->u.l.grh.sgid, - qp->alt_ah_attr.grh.dgid.global.subnet_prefix, - qp->alt_ah_attr.grh.dgid.global.interface_id)) + grh->dgid.global.subnet_prefix, + grh->dgid.global.interface_id)) goto err; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, - sc5, be16_to_cpu(hdr->lrh[3])))) { + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, + ib_get_slid(hdr)))) { hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + ib_get_sl(hdr), 0, qp->ibqp.qp_num, - be16_to_cpu(hdr->lrh[3]), - be16_to_cpu(hdr->lrh[1])); + ib_get_slid(hdr), + ib_get_dlid(hdr)); goto err; } /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ - if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid || - ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num) + if (ib_get_slid(hdr) != + rdma_ah_get_dlid(&qp->alt_ah_attr) || + ppd_from_ibp(ibp)->port != + rdma_ah_get_port_num(&qp->alt_ah_attr)) goto err; spin_lock_irqsave(&qp->s_lock, flags); hfi1_migrate_qp(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } else { if (!has_grh) { - if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) + if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & + IB_AH_GRH) goto err; } else { - if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) + const struct ib_global_route *grh; + + if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & + IB_AH_GRH)) goto err; - guid = get_sguid(ibp, - qp->remote_ah_attr.grh.sgid_index); + grh = rdma_ah_read_grh(&qp->remote_ah_attr); + guid = get_sguid(ibp, grh->sgid_index); if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, guid)) goto err; if (!gid_ok( &hdr->u.l.grh.sgid, - qp->remote_ah_attr.grh.dgid.global.subnet_prefix, - qp->remote_ah_attr.grh.dgid.global.interface_id)) + grh->dgid.global.subnet_prefix, + grh->dgid.global.interface_id)) goto err; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, - sc5, be16_to_cpu(hdr->lrh[3])))) { + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, + ib_get_slid(hdr)))) { hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + ib_get_sl(hdr), 0, qp->ibqp.qp_num, - be16_to_cpu(hdr->lrh[3]), - be16_to_cpu(hdr->lrh[1])); + ib_get_slid(hdr), + ib_get_dlid(hdr)); goto err; } /* Validate the SLID. See Ch. 9.6.1.5 */ - if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid || + if (ib_get_slid(hdr) != + rdma_ah_get_dlid(&qp->remote_ah_attr) || ppd_from_ibp(ibp)->port != qp->port_num) goto err; if (qp->s_mig_state == IB_MIG_REARM && @@ -542,8 +554,8 @@ do_write: wc.byte_len = wqe->length; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; + wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr); + wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); wc.port_num = 1; /* Signal completion event if the solicited bit is set. */ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, @@ -637,7 +649,7 @@ done: * Return the size of the header in 32 bit words. */ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, - struct ib_global_route *grh, u32 hwords, u32 nwords) + const struct ib_global_route *grh, u32 hwords, u32 nwords) { hdr->version_tclass_flow = cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) | @@ -731,15 +743,17 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, extra_bytes = -ps->s_txreq->s_cur_size & 3; nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2; lrh0 = HFI1_LRH_BTH; - if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { - qp->s_hdrwords += hfi1_make_grh(ibp, - &ps->s_txreq->phdr.hdr.u.l.grh, - &qp->remote_ah_attr.grh, - qp->s_hdrwords, nwords); + if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { + qp->s_hdrwords += + hfi1_make_grh(ibp, + &ps->s_txreq->phdr.hdr.u.l.grh, + rdma_ah_read_grh(&qp->remote_ah_attr), + qp->s_hdrwords, nwords); lrh0 = HFI1_LRH_GRH; middle = 0; } - lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4; + lrh0 |= (priv->s_sc & 0xf) << 12 | + (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4; /* * reset s_ahg/AHG fields * @@ -763,11 +777,13 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, else qp->s_flags &= ~RVT_S_AHG_VALID; ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0); - ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + ps->s_txreq->phdr.hdr.lrh[1] = + cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr)); ps->s_txreq->phdr.hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | - qp->remote_ah_attr.src_path_bits); + ps->s_txreq->phdr.hdr.lrh[3] = + cpu_to_be16(ppd_from_ibp(ibp)->lid | + rdma_ah_get_path_bits(&qp->remote_ah_attr)); bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); bth0 |= extra_bytes << 20; ohdr->bth[0] = cpu_to_be32(bth0); @@ -775,7 +791,7 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, if (qp->s_flags & RVT_S_ECN) { qp->s_flags &= ~RVT_S_ECN; /* we recently received a FECN, so return a BECN */ - bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT); + bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT); } ohdr->bth[1] = cpu_to_be32(bth1); ohdr->bth[2] = cpu_to_be32(bth2); @@ -784,23 +800,29 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, /* when sending, force a reschedule every one of these periods */ #define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */ +void hfi1_do_send_from_rvt(struct rvt_qp *qp) +{ + hfi1_do_send(qp, false); +} + void _hfi1_do_send(struct work_struct *work) { struct iowait *wait = container_of(work, struct iowait, iowork); struct rvt_qp *qp = iowait_to_qp(wait); - hfi1_do_send(qp); + hfi1_do_send(qp, true); } /** * hfi1_do_send - perform a send on a QP * @work: contains a pointer to the QP + * @in_thread: true if in a workqueue thread * * Process entries in the send work queue until credit or queue is * exhausted. Only allow one CPU to send a packet per QP. * Otherwise, two threads could send packets out of order. */ -void hfi1_do_send(struct rvt_qp *qp) +void hfi1_do_send(struct rvt_qp *qp, bool in_thread) { struct hfi1_pkt_state ps; struct hfi1_qp_priv *priv = qp->priv; @@ -815,9 +837,9 @@ void hfi1_do_send(struct rvt_qp *qp) switch (qp->ibqp.qp_type) { case IB_QPT_RC: - if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc - ) - 1)) == - ps.ppd->lid)) { + if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) & + ~((1 << ps.ppd->lmc) - 1)) == + ps.ppd->lid)) { ruc_loopback(qp); return; } @@ -825,9 +847,9 @@ void hfi1_do_send(struct rvt_qp *qp) timeout_int = (qp->timeout_jiffies); break; case IB_QPT_UC: - if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc - ) - 1)) == - ps.ppd->lid)) { + if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) & + ~((1 << ps.ppd->lmc) - 1)) == + ps.ppd->lid)) { ruc_loopback(qp); return; } @@ -868,8 +890,10 @@ void hfi1_do_send(struct rvt_qp *qp) qp->s_hdrwords = 0; /* allow other tasks to run */ if (unlikely(time_after(jiffies, timeout))) { - if (workqueue_congested(cpu, - ps.ppd->hfi1_wq)) { + if (!in_thread || + workqueue_congested( + cpu, + ps.ppd->hfi1_wq)) { spin_lock_irqsave( &qp->s_lock, ps.flags); @@ -882,11 +906,9 @@ void hfi1_do_send(struct rvt_qp *qp) *ps.ppd->dd->send_schedule); return; } - if (!irqs_disabled()) { - cond_resched(); - this_cpu_inc( - *ps.ppd->dd->send_schedule); - } + cond_resched(); + this_cpu_inc( + *ps.ppd->dd->send_schedule); timeout = jiffies + (timeout_int) / 8; } spin_lock_irqsave(&qp->s_lock, ps.flags); @@ -909,8 +931,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, last = qp->s_last; old_last = last; + trace_hfi1_qp_send_completion(qp, wqe, last); if (++last >= qp->s_size) last = 0; + trace_hfi1_qp_send_completion(qp, wqe, last); qp->s_last = last; /* See post_send() */ barrier(); @@ -920,7 +944,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->ibqp.qp_type == IB_QPT_GSI) atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); - rvt_qp_swqe_complete(qp, wqe, status); + rvt_qp_swqe_complete(qp, + wqe, + ib_hfi1_wc_opcode[wqe->wr.opcode], + status); if (qp->s_acked == old_last) qp->s_acked = last; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 5cde1ec..bfd0d51 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -868,7 +868,7 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, cpu_id = smp_processor_id(); rcu_read_lock(); - rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id, + rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id, sdma_rht_params); if (rht_node && rht_node->map[vl]) { @@ -962,7 +962,12 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, continue; } - rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu, + if (vl >= ARRAY_SIZE(rht_node->map)) { + ret = -EINVAL; + goto out; + } + + rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu, sdma_rht_params); if (!rht_node) { rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL); @@ -982,7 +987,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, rht_node->map[vl]->ctr = 1; rht_node->map[vl]->sde[0] = sde; - ret = rhashtable_insert_fast(&dd->sdma_rht, + ret = rhashtable_insert_fast(dd->sdma_rht, &rht_node->node, sdma_rht_params); if (ret) { @@ -1025,7 +1030,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, if (cpumask_test_cpu(cpu, mask)) continue; - rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu, + rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu, sdma_rht_params); if (rht_node) { bool empty = true; @@ -1049,7 +1054,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, } if (empty) { - ret = rhashtable_remove_fast(&dd->sdma_rht, + ret = rhashtable_remove_fast(dd->sdma_rht, &rht_node->node, sdma_rht_params); WARN_ON(ret); @@ -1108,7 +1113,7 @@ void sdma_seqfile_dump_cpu_list(struct seq_file *s, struct sdma_rht_node *rht_node; int i, j; - rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid, + rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid, sdma_rht_params); if (!rht_node) return; @@ -1322,6 +1327,12 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) synchronize_rcu(); kfree(dd->per_sdma); dd->per_sdma = NULL; + + if (dd->sdma_rht) { + rhashtable_free_and_destroy(dd->sdma_rht, sdma_rht_free, NULL); + kfree(dd->sdma_rht); + dd->sdma_rht = NULL; + } } /** @@ -1341,12 +1352,14 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) { unsigned this_idx; struct sdma_engine *sde; + struct rhashtable *tmp_sdma_rht; u16 descq_cnt; void *curr_head; struct hfi1_pportdata *ppd = dd->pport + port; u32 per_sdma_credits; uint idle_cnt = sdma_idle_cnt; size_t num_engines = dd->chip_sdma_engines; + int ret = -ENOMEM; if (!HFI1_CAP_IS_KSET(SDMA)) { HFI1_CAP_CLEAR(SDMA_AHG); @@ -1378,7 +1391,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) /* alloc memory for array of send engines */ dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL); if (!dd->per_sdma) - return -ENOMEM; + return ret; idle_cnt = ns_to_cclock(dd, idle_cnt); if (!sdma_desct_intr) @@ -1507,18 +1520,27 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) dd->flags |= HFI1_HAS_SEND_DMA; dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0; dd->num_sdma = num_engines; - if (sdma_map_init(dd, port, ppd->vls_operational, NULL)) + ret = sdma_map_init(dd, port, ppd->vls_operational, NULL); + if (ret < 0) goto bail; - if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params)) + tmp_sdma_rht = kzalloc(sizeof(*tmp_sdma_rht), GFP_KERNEL); + if (!tmp_sdma_rht) { + ret = -ENOMEM; goto bail; + } + + ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params); + if (ret < 0) + goto bail; + dd->sdma_rht = tmp_sdma_rht; dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); return 0; bail: sdma_clean(dd, num_engines); - return -ENOMEM; + return ret; } /** @@ -1604,7 +1626,6 @@ void sdma_exit(struct hfi1_devdata *dd) sdma_finalput(&sde->state); } sdma_clean(dd, dd->num_sdma); - rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL); } /* diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 21f1e28..64f10b8 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -966,34 +966,34 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status); * | mask | --/ |--------------------| * |--------------------------| -/ | * | * | actual_vls (max 8) | -/ |--------------------| - * |--------------------------| --/ | sde[n] -> eng n | + * |--------------------------| --/ | sde[n-1] -> eng n | * | vls (max 8) | -/ +--------------------+ * |--------------------------| --/ * | map[0] |-/ - * |--------------------------| +--------------------+ - * | map[1] |--- | mask | - * |--------------------------| \---- |--------------------| - * | * | \-- | sde[0] -> eng 1+n | - * | * | \---- |--------------------| - * | * | \->| sde[1] -> eng 2+n | - * |--------------------------| |--------------------| - * | map[vls - 1] |- | * | - * +--------------------------+ \- |--------------------| - * \- | sde[m] -> eng m+n | - * \ +--------------------+ + * |--------------------------| +---------------------+ + * | map[1] |--- | mask | + * |--------------------------| \---- |---------------------| + * | * | \-- | sde[0] -> eng 1+n | + * | * | \---- |---------------------| + * | * | \->| sde[1] -> eng 2+n | + * |--------------------------| |---------------------| + * | map[vls - 1] |- | * | + * +--------------------------+ \- |---------------------| + * \- | sde[m-1] -> eng m+n | + * \ +---------------------+ * \- * \ - * \- +--------------------+ - * \- | mask | - * \ |--------------------| - * \- | sde[0] -> eng 1+m+n| - * \- |--------------------| - * >| sde[1] -> eng 2+m+n| - * |--------------------| - * | * | - * |--------------------| - * | sde[o] -> eng o+m+n| - * +--------------------+ + * \- +----------------------+ + * \- | mask | + * \ |----------------------| + * \- | sde[0] -> eng 1+m+n | + * \- |----------------------| + * >| sde[1] -> eng 2+m+n | + * |----------------------| + * | * | + * |----------------------| + * | sde[o-1] -> eng o+m+n| + * +----------------------+ * */ diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 919a547..50d140d 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -542,7 +542,7 @@ static ssize_t show_nctxts(struct device *device, * give a more accurate picture of total contexts available. */ return scnprintf(buf, PAGE_SIZE, "%u\n", - min(dd->num_rcv_contexts - dd->first_user_ctxt, + min(dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt, (u32)dd->sc_sizes[SC_USER].count)); } diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index e86798a..eafae48 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -51,13 +51,12 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr) { struct ib_other_headers *ohdr; u8 opcode; - u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); - if (lnh == HFI1_LRH_BTH) + if (ib_get_lnh(hdr) == HFI1_LRH_BTH) ohdr = &hdr->u.oth; else ohdr = &hdr->u.l.oth; - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + opcode = ib_bth_get_opcode(ohdr); return hdr_len_by_opcode[opcode] == 0 ? 0 : hdr_len_by_opcode[opcode] - (12 + 8); } diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 382fcda..090f6b5 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -139,11 +139,11 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff; __entry->f = - (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) & - HFI1_FECN_MASK; + (be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) & + IB_FECN_MASK; __entry->b = - (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) & - HFI1_BECN_MASK; + (be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) & + IB_BECN_MASK; __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; __entry->a = diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h index d308454..deac77d 100644 --- a/drivers/infiniband/hw/hfi1/trace_misc.h +++ b/drivers/infiniband/hw/hfi1/trace_misc.h @@ -72,6 +72,54 @@ TRACE_EVENT(hfi1_interrupt, __entry->src) ); +#ifdef CONFIG_FAULT_INJECTION +TRACE_EVENT(hfi1_fault_opcode, + TP_PROTO(struct rvt_qp *qp, u8 opcode), + TP_ARGS(qp, opcode), + TP_STRUCT__entry(DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(u32, qpn) + __field(u8, opcode) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->opcode = opcode; + ), + TP_printk("[%s] qpn 0x%x opcode 0x%x", + __get_str(dev), __entry->qpn, __entry->opcode) +); + +TRACE_EVENT(hfi1_fault_packet, + TP_PROTO(struct hfi1_packet *packet), + TP_ARGS(packet), + TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->ppd->dd) + __field(u64, eflags) + __field(u32, ctxt) + __field(u32, hlen) + __field(u32, tlen) + __field(u32, updegr) + __field(u32, etail) + ), + TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->ppd->dd); + __entry->eflags = rhf_err_flags(packet->rhf); + __entry->ctxt = packet->rcd->ctxt; + __entry->hlen = packet->hlen; + __entry->tlen = packet->tlen; + __entry->updegr = packet->updegr; + __entry->etail = rhf_egr_index(packet->rhf); + ), + TP_printk( + "[%s] ctxt %d eflags 0x%llx hlen %d tlen %d updegr %d etail %d", + __get_str(dev), + __entry->ctxt, + __entry->eflags, + __entry->hlen, + __entry->tlen, + __entry->updegr, + __entry->etail + ) +); +#endif + #endif /* __HFI1_TRACE_MISC_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/infiniband/hw/hfi1/trace_rc.h b/drivers/infiniband/hw/hfi1/trace_rc.h index 5ea5005..8ce4765 100644 --- a/drivers/infiniband/hw/hfi1/trace_rc.h +++ b/drivers/infiniband/hw/hfi1/trace_rc.h @@ -1,5 +1,5 @@ /* -* Copyright(c) 2015, 2016 Intel Corporation. +* Copyright(c) 2015, 2016, 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -104,11 +104,6 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_ack, TP_ARGS(qp, psn) ); -DEFINE_EVENT(hfi1_rc_template, hfi1_timeout, - TP_PROTO(struct rvt_qp *qp, u32 psn), - TP_ARGS(qp, psn) -); - DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error, TP_PROTO(struct rvt_qp *qp, u32 psn), TP_ARGS(qp, psn) diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h index 415d6be..2c9ac57 100644 --- a/drivers/infiniband/hw/hfi1/trace_tx.h +++ b/drivers/infiniband/hw/hfi1/trace_tx.h @@ -633,6 +633,49 @@ DEFINE_EVENT(hfi1_bct_template, bct_get, TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), TP_ARGS(dd, bc)); +TRACE_EVENT( + hfi1_qp_send_completion, + TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 idx), + TP_ARGS(qp, wqe, idx), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(struct rvt_swqe *, wqe) + __field(u64, wr_id) + __field(u32, qpn) + __field(u32, qpt) + __field(u32, length) + __field(u32, idx) + __field(u32, ssn) + __field(enum ib_wr_opcode, opcode) + __field(int, send_flags) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) + __entry->wqe = wqe; + __entry->wr_id = wqe->wr.wr_id; + __entry->qpn = qp->ibqp.qp_num; + __entry->qpt = qp->ibqp.qp_type; + __entry->length = wqe->length; + __entry->idx = idx; + __entry->ssn = wqe->ssn; + __entry->opcode = wqe->wr.opcode; + __entry->send_flags = wqe->wr.send_flags; + ), + TP_printk( + "[%s] qpn 0x%x qpt %u wqe %p idx %u wr_id %llx length %u ssn %u opcode %x send_flags %x", + __get_str(dev), + __entry->qpn, + __entry->qpt, + __entry->wqe, + __entry->idx, + __entry->wr_id, + __entry->length, + __entry->ssn, + __entry->opcode, + __entry->send_flags + ) +); + #endif /* __HFI1_TRACE_TX_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 4b2a840..5da1e45 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -94,7 +94,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } ohdr = &ps->s_txreq->phdr.hdr.u.oth; - if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) + if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) ohdr = &ps->s_txreq->phdr.hdr.u.l.oth; /* Get the next send request. */ @@ -320,7 +320,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) process_ecn(qp, packet, true); psn = be32_to_cpu(ohdr->bth[2]); - opcode = (bth0 >> 24) & 0xff; + opcode = ib_bth_get_opcode(ohdr); /* Compare the PSN verses the expected PSN. */ if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) { @@ -433,7 +433,7 @@ no_immediate_data: wc.wc_flags = 0; send_last: /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -451,7 +451,7 @@ last_imm: wc.status = IB_WC_SUCCESS; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.slid = qp->remote_ah_attr.dlid; + wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr); /* * It seems that IB mandates the presence of an SL in a * work completion only for the UD transport (see section @@ -463,7 +463,7 @@ last_imm: * * See also OPA Vol. 1, section 9.7.6, and table 9-17. */ - wc.sl = qp->remote_ah_attr.sl; + wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); /* zero fields that are N/A */ wc.vendor_err = 0; wc.pkey_index = 0; @@ -528,7 +528,7 @@ rdma_last_imm: wc.wc_flags = IB_WC_WITH_IMM; /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -555,7 +555,7 @@ rdma_last_imm: case OP(RDMA_WRITE_LAST): rdma_last: /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 13ea4eb..6a4e95c 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -68,7 +68,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); struct hfi1_pportdata *ppd; struct rvt_qp *qp; - struct ib_ah_attr *ah_attr; + struct rdma_ah_attr *ah_attr; unsigned long flags; struct rvt_sge_state ssge; struct rvt_sge *sge; @@ -103,17 +103,17 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (qp->ibqp.qp_num > 1) { u16 pkey; u16 slid; - u8 sc5 = ibp->sl_to_sc[ah_attr->sl]; + u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; pkey = hfi1_get_pkey(ibp, sqp->s_pkey_index); - slid = ppd->lid | (ah_attr->src_path_bits & + slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & ((1 << ppd->lmc) - 1)); if (unlikely(ingress_pkey_check(ppd, pkey, sc5, qp->s_pkey_index, slid))) { hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, pkey, - ah_attr->sl, + rdma_ah_get_sl(ah_attr), sqp->ibqp.qp_num, qp->ibqp.qp_num, - slid, ah_attr->dlid); + slid, rdma_ah_get_dlid(ah_attr)); goto drop; } } @@ -131,13 +131,13 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (unlikely(qkey != qp->qkey)) { u16 lid; - lid = ppd->lid | (ah_attr->src_path_bits & + lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & ((1 << ppd->lmc) - 1)); hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, - ah_attr->sl, + rdma_ah_get_sl(ah_attr), sqp->ibqp.qp_num, qp->ibqp.qp_num, lid, - ah_attr->dlid); + rdma_ah_get_dlid(ah_attr)); goto drop; } } @@ -183,11 +183,11 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) goto bail_unlock; } - if (ah_attr->ah_flags & IB_AH_GRH) { + if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { struct ib_grh grh; - struct ib_global_route grd = ah_attr->grh; + const struct ib_global_route *grd = rdma_ah_read_grh(ah_attr); - hfi1_make_grh(ibp, &grh, &grd, 0, 0); + hfi1_make_grh(ibp, &grh, grd, 0, 0); hfi1_copy_sge(&qp->r_sge, &grh, sizeof(grh), true, false); wc.wc_flags |= IB_WC_GRH; @@ -243,12 +243,13 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } else { wc.pkey_index = 0; } - wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1)); + wc.slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & + ((1 << ppd->lmc) - 1)); /* Check for loopback when the port lid is not set */ if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI) wc.slid = be16_to_cpu(IB_LID_PERMISSIVE); - wc.sl = ah_attr->sl; - wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1); + wc.sl = rdma_ah_get_sl(ah_attr); + wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, @@ -272,7 +273,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; struct ib_other_headers *ohdr; - struct ib_ah_attr *ah_attr; + struct rdma_ah_attr *ah_attr; struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; struct rvt_swqe *wqe; @@ -319,9 +320,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; - if (ah_attr->dlid < be16_to_cpu(IB_MULTICAST_LID_BASE) || - ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { - lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); + if (rdma_ah_get_dlid(ah_attr) < be16_to_cpu(IB_MULTICAST_LID_BASE) || + rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) { + lid = rdma_ah_get_dlid(ah_attr) & ~((1 << ppd->lmc) - 1); if (unlikely(!loopback && (lid == ppd->lid || (lid == be16_to_cpu(IB_LID_PERMISSIVE) && @@ -356,7 +357,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_hdrwords = 7; ps->s_txreq->s_cur_size = wqe->length; ps->s_txreq->ss = &qp->s_sge; - qp->s_srate = ah_attr->static_rate; + qp->s_srate = rdma_ah_get_static_rate(ah_attr); qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); qp->s_wqe = wqe; qp->s_sge.sge = wqe->sg_list[0]; @@ -364,11 +365,11 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_sge.num_sge = wqe->wr.num_sge; qp->s_sge.total_len = wqe->length; - if (ah_attr->ah_flags & IB_AH_GRH) { + if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { /* Header size in 32-bit words. */ qp->s_hdrwords += hfi1_make_grh(ibp, &ps->s_txreq->phdr.hdr.u.l.grh, - &ah_attr->grh, + rdma_ah_read_grh(ah_attr), qp->s_hdrwords, nwords); lrh0 = HFI1_LRH_GRH; ohdr = &ps->s_txreq->phdr.hdr.u.l.oth; @@ -388,8 +389,8 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } else { bth0 = IB_OPCODE_UD_SEND_ONLY << 24; } - sc5 = ibp->sl_to_sc[ah_attr->sl]; - lrh0 |= (ah_attr->sl & 0xf) << 4; + sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; + lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4; if (qp->ibqp.qp_type == IB_QPT_SMI) { lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ priv->s_sc = 0xf; @@ -402,15 +403,17 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); ps->s_txreq->psc = priv->s_sendcontext; ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0); - ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); + ps->s_txreq->phdr.hdr.lrh[1] = + cpu_to_be16(rdma_ah_get_dlid(ah_attr)); ps->s_txreq->phdr.hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { + if (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) { ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE; } else { lid = ppd->lid; if (lid) { - lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1); + lid |= rdma_ah_get_path_bits(ah_attr) & + ((1 << ppd->lmc) - 1); ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(lid); } else { ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE; @@ -537,7 +540,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, bth0 = pkey | (IB_OPCODE_CNP << 24); ohdr->bth[0] = cpu_to_be32(bth0); - ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << HFI1_BECN_SHIFT)); + ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT)); ohdr->bth[2] = 0; /* PSN 0 */ hdr.lrh[0] = cpu_to_be16(lrh0); @@ -680,7 +683,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; bool has_grh = rcv_flags & HFI1_HAS_GRH; - u8 sc5 = hdr2sc(hdr, packet->rhf); + u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf); u32 bth1; u8 sl_from_sc, sl; u16 slid; @@ -688,18 +691,16 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) qkey = be32_to_cpu(ohdr->u.ud.deth[0]); src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; - dlid = be16_to_cpu(hdr->lrh[1]); + dlid = ib_get_dlid(hdr); bth1 = be32_to_cpu(ohdr->bth[1]); - slid = be16_to_cpu(hdr->lrh[3]); - pkey = (u16)be32_to_cpu(ohdr->bth[0]); - sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; - extra_bytes = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + slid = ib_get_slid(hdr); + pkey = ib_bth_get_pkey(ohdr); + opcode = ib_bth_get_opcode(ohdr); + sl = ib_get_sl(hdr); + extra_bytes = ib_bth_get_pad(ohdr); extra_bytes += (SIZE_OF_CRC << 2); sl_from_sc = ibp->sc_to_sl[sc5]; - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - opcode &= 0xff; - process_ecn(qp, packet, (opcode != IB_OPCODE_CNP)); /* * Get the number of bytes the message was padded by diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 4a82953..35c6e7e 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -200,8 +200,9 @@ int hfi1_user_exp_rcv_init(struct file *fp) if (!HFI1_CAP_UGET_MASK(uctxt->flags, TID_UNMAP)) { fd->invalid_tid_idx = 0; - fd->invalid_tids = kzalloc(uctxt->expected_count * - sizeof(u32), GFP_KERNEL); + fd->invalid_tids = kcalloc(uctxt->expected_count, + sizeof(*fd->invalid_tids), + GFP_KERNEL); if (!fd->invalid_tids) { ret = -ENOMEM; goto done; @@ -578,6 +579,9 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo) u32 *tidinfo; unsigned tididx; + if (unlikely(tinfo->tidcnt > fd->tid_used)) + return -EINVAL; + tidinfo = memdup_user((void __user *)(unsigned long)tinfo->tidlist, sizeof(tidinfo[0]) * tinfo->tidcnt); if (IS_ERR(tidinfo)) @@ -607,7 +611,7 @@ int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo) struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; unsigned long *ev = uctxt->dd->events + - (((uctxt->ctxt - uctxt->dd->first_user_ctxt) * + (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) * HFI1_MAX_SHARED_CTXTS) + fd->subctxt); u32 *array; int ret = 0; @@ -1011,8 +1015,8 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) * process in question. */ ev = uctxt->dd->events + - (((uctxt->ctxt - uctxt->dd->first_user_ctxt) * - HFI1_MAX_SHARED_CTXTS) + fdata->subctxt); + (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) * + HFI1_MAX_SHARED_CTXTS) + fdata->subctxt); set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); } fdata->invalid_tid_idx++; diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index 68295a1..e341e6d 100644 --- a/drivers/infiniband/hw/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -73,7 +73,8 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm, { unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit, size = (cache_size * (1UL << 20)); /* convert to bytes */ - unsigned usr_ctxts = dd->num_rcv_contexts - dd->first_user_ctxt; + unsigned int usr_ctxts = + dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt; bool can_lock = capable(CAP_IPC_LOCK); /* diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index e6811c4e..0749689 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -376,7 +376,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) { struct hfi1_filedata *fd; int ret = 0; - unsigned memsize; char buf[64]; struct hfi1_devdata *dd; struct hfi1_user_sdma_comp_q *cq; @@ -401,13 +400,15 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) if (!pq) goto pq_nomem; - memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size; - pq->reqs = kzalloc(memsize, GFP_KERNEL); + pq->reqs = kcalloc(hfi1_sdma_comp_ring_size, + sizeof(*pq->reqs), + GFP_KERNEL); if (!pq->reqs) goto pq_reqs_nomem; - memsize = BITS_TO_LONGS(hfi1_sdma_comp_ring_size) * sizeof(long); - pq->req_in_use = kzalloc(memsize, GFP_KERNEL); + pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size), + sizeof(*pq->req_in_use), + GFP_KERNEL); if (!pq->req_in_use) goto pq_reqs_no_in_use; @@ -442,8 +443,8 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) if (!cq) goto cq_nomem; - memsize = PAGE_ALIGN(sizeof(*cq->comps) * hfi1_sdma_comp_ring_size); - cq->comps = vmalloc_user(memsize); + cq->comps = vmalloc_user(PAGE_ALIGN(sizeof(*cq->comps) + * hfi1_sdma_comp_ring_size)); if (!cq->comps) goto cq_comps_nomem; @@ -704,7 +705,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, /* Save all the IO vector structures */ for (i = 0; i < req->data_iovs; i++) { INIT_LIST_HEAD(&req->iovs[i].list); - memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec)); + memcpy(&req->iovs[i].iov, + iovec + idx++, + sizeof(req->iovs[i].iov)); ret = pin_vector_pages(req, &req->iovs[i]); if (ret) { req->status = ret; @@ -1615,9 +1618,10 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, { hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d", pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret); - cq->comps[idx].status = state; if (state == ERROR) cq->comps[idx].errcode = -ret; + smp_wmb(); /* make sure errcode is visible first */ + cq->comps[idx].status = state; trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt, idx, state, ret); } diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 222315f..90e7b77 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -60,6 +60,8 @@ #include "trace.h" #include "qp.h" #include "verbs_txreq.h" +#include "debugfs.h" +#include "vnic.h" static unsigned int hfi1_lkey_table_size = 16; module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, @@ -297,6 +299,22 @@ static inline bool wss_exceeds_threshold(void) } /* + * Translate ib_wr_opcode into ib_wc_opcode. + */ +const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { + [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, + [IB_WR_SEND] = IB_WC_SEND, + [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, + [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, + [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, + [IB_WR_SEND_WITH_INV] = IB_WC_SEND, + [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, + [IB_WR_REG_MR] = IB_WC_REG_MR +}; + +/* * Length of header by opcode, 0 --> not supported */ const u8 hdr_len_by_opcode[256] = { @@ -501,6 +519,35 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet) return NULL; } +static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc) +{ +#ifdef CONFIG_FAULT_INJECTION + if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP) + /* + * In order to drop non-IB traffic we + * set PbcInsertHrc to NONE (0x2). + * The packet will still be delivered + * to the receiving node but a + * KHdrHCRCErr (KDETH packet with a bad + * HCRC) will be triggered and the + * packet will not be delivered to the + * correct context. + */ + pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT; + else + /* + * In order to drop regular verbs + * traffic we set the PbcTestEbp + * flag. The packet will still be + * delivered to the receiving node but + * a 'late ebp error' will be + * triggered and will be dropped. + */ + pbc |= PBC_TEST_EBP; +#endif + return pbc; +} + /** * hfi1_ib_rcv - process an incoming packet * @packet: data packet information @@ -525,7 +572,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) u16 lid; /* Check for GRH */ - lnh = be16_to_cpu(hdr->lrh[0]) & 3; + lnh = ib_get_lnh(hdr); if (lnh == HFI1_LRH_BTH) { packet->ohdr = &hdr->u.oth; } else if (lnh == HFI1_LRH_GRH) { @@ -544,12 +591,12 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) trace_input_ibhdr(rcd->dd, hdr); - opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24); + opcode = ib_bth_get_opcode(packet->ohdr); inc_opstats(tlen, &rcd->opstats->stats[opcode]); /* Get the destination QP number. */ qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK; - lid = be16_to_cpu(hdr->lrh[1]); + lid = ib_get_dlid(hdr); if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { struct rvt_mcast *mcast; @@ -557,7 +604,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) if (lnh != HFI1_LRH_GRH) goto drop; - mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid); + mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid); if (!mcast) goto drop; list_for_each_entry_rcu(p, &mcast->qp_list, list) { @@ -583,6 +630,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) rcu_read_unlock(); goto drop; } + if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode, + true))) { + rcu_read_unlock(); + goto drop; + } spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(opcode, packet); if (likely(packet_handler)) @@ -781,7 +833,6 @@ static int build_verbs_tx_desc( if (ret) goto bail_txadd; } - /* add the ulp payload - if any. tx->ss can be NULL for acks */ if (tx->ss) ret = build_verbs_ulp_payload(sde, length, tx); @@ -800,7 +851,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_ibdev *dev = ps->dev; struct hfi1_pportdata *ppd = ps->ppd; struct verbs_txreq *tx; - u64 pbc_flags = 0; u8 sc5 = priv->s_sc; int ret; @@ -809,12 +859,16 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (!sdma_txreq_built(&tx->txreq)) { if (likely(pbc == 0)) { u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); + u8 opcode = get_opcode(&tx->phdr.hdr); + /* No vl15 here */ /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) + pbc = hfi1_fault_tx(qp, opcode, pbc); pbc = create_pbc(ppd, - pbc_flags, + pbc, qp->srate_mbps, vl, plen); @@ -917,7 +971,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u32 plen = hdrwords + dwords + 2; /* includes pbc */ struct hfi1_pportdata *ppd = ps->ppd; u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr; - u64 pbc_flags = 0; u8 sc5; unsigned long flags = 0; struct send_context *sc; @@ -942,9 +995,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (likely(pbc == 0)) { u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); + struct verbs_txreq *tx = ps->s_txreq; + u8 opcode = get_opcode(&tx->phdr.hdr); + /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; - pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); + pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) + pbc = hfi1_fault_tx(qp, opcode, pbc); + pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); } if (cb) iowait_pio_inc(&priv->s_iowait); @@ -1173,7 +1231,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) hdr = &ps->s_txreq->phdr.hdr; /* locate the pkey within the headers */ - lnh = be16_to_cpu(hdr->lrh[0]) & 3; + lnh = ib_get_lnh(hdr); if (lnh == HFI1_LRH_GRH) ohdr = &hdr->u.l.oth; else @@ -1220,17 +1278,20 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) static void hfi1_fill_device_attr(struct hfi1_devdata *dd) { struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; - u16 ver = dd->dc8051_ver; + u32 ver = dd->dc8051_ver; memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props)); - rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) | - (u64)dc8051_ver_min(ver); + rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 32) | + ((u64)(dc8051_ver_min(ver)) << 16) | + (u64)dc8051_ver_patch(ver); + rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | - IB_DEVICE_MEM_MGT_EXTENSIONS; + IB_DEVICE_MEM_MGT_EXTENSIONS | + IB_DEVICE_RDMA_NETDEV_OPA_VNIC; rdi->dparms.props.page_size_cap = PAGE_SIZE; rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; rdi->dparms.props.vendor_part_id = dd->pcidev->device; @@ -1398,14 +1459,14 @@ static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, /* * convert ah port,sl to sc */ -u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah) +u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah) { - struct hfi1_ibport *ibp = to_iport(ibdev, ah->port_num); + struct hfi1_ibport *ibp = to_iport(ibdev, rdma_ah_get_port_num(ah)); - return ibp->sl_to_sc[ah->sl]; + return ibp->sl_to_sc[rdma_ah_get_sl(ah)]; } -static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) +static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr) { struct hfi1_ibport *ibp; struct hfi1_pportdata *ppd; @@ -1413,9 +1474,9 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) u8 sc5; /* test the mapping for validity */ - ibp = to_iport(ibdev, ah_attr->port_num); + ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr)); ppd = ppd_from_ibp(ibp); - sc5 = ibp->sl_to_sc[ah_attr->sl]; + sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; dd = dd_from_ppd(ppd); if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf) return -EINVAL; @@ -1423,7 +1484,7 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) } static void hfi1_notify_new_ah(struct ib_device *ibdev, - struct ib_ah_attr *ah_attr, + struct rdma_ah_attr *ah_attr, struct rvt_ah *ah) { struct hfi1_ibport *ibp; @@ -1436,9 +1497,9 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev, * done being setup. We can however modify things which we need to set. */ - ibp = to_iport(ibdev, ah_attr->port_num); + ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr)); ppd = ppd_from_ibp(ibp); - sc5 = ibp->sl_to_sc[ah->attr.sl]; + sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)]; dd = dd_from_ppd(ppd); ah->vl = sc_to_vlt(dd, sc5); if (ah->vl < num_vls || ah->vl == 15) @@ -1447,17 +1508,21 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev, struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid) { - struct ib_ah_attr attr; + struct rdma_ah_attr attr; struct ib_ah *ah = ERR_PTR(-EINVAL); struct rvt_qp *qp0; + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + struct hfi1_devdata *dd = dd_from_ppd(ppd); + u8 port_num = ppd->port; memset(&attr, 0, sizeof(attr)); - attr.dlid = dlid; - attr.port_num = ppd_from_ibp(ibp)->port; + attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num); + rdma_ah_set_dlid(&attr, dlid); + rdma_ah_set_port_num(&attr, ppd_from_ibp(ibp)->port); rcu_read_lock(); qp0 = rcu_dereference(ibp->rvp.qp[0]); if (qp0) - ah = ib_create_ah(qp0->ibqp.pd, &attr); + ah = rdma_create_ah(qp0->ibqp.pd, &attr); rcu_read_unlock(); return ah; } @@ -1504,10 +1569,10 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str, { struct rvt_dev_info *rdi = ib_to_rvt(ibdev); struct hfi1_ibdev *dev = dev_from_rdi(rdi); - u16 ver = dd_from_dev(dev)->dc8051_ver; + u32 ver = dd_from_dev(dev)->dc8051_ver; - snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver), - dc8051_ver_min(ver)); + snprintf(str, str_len, "%u.%u.%u", dc8051_ver_maj(ver), + dc8051_ver_min(ver), dc8051_ver_patch(ver)); } static const char * const driver_cntr_names[] = { @@ -1524,6 +1589,7 @@ static const char * const driver_cntr_names[] = { "DRIVER_EgrHdrFull" }; +static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */ static const char **dev_cntr_names; static const char **port_cntr_names; static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); @@ -1578,6 +1644,7 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, { int i, err; + mutex_lock(&cntr_names_lock); if (!cntr_names_initialized) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); @@ -1586,8 +1653,10 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, num_driver_cntrs, &num_dev_cntrs, &dev_cntr_names); - if (err) + if (err) { + mutex_unlock(&cntr_names_lock); return NULL; + } for (i = 0; i < num_driver_cntrs; i++) dev_cntr_names[num_dev_cntrs + i] = @@ -1601,10 +1670,12 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, if (err) { kfree(dev_cntr_names); dev_cntr_names = NULL; + mutex_unlock(&cntr_names_lock); return NULL; } cntr_names_initialized = 1; } + mutex_unlock(&cntr_names_lock); if (!port_num) return rdma_alloc_hw_stats_struct( @@ -1707,6 +1778,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->modify_device = modify_device; ibdev->alloc_hw_stats = alloc_hw_stats; ibdev->get_hw_stats = get_hw_stats; + ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn; + ibdev->free_rdma_netdev = hfi1_vnic_free_rn; /* keep process mad in the driver */ ibdev->process_mad = hfi1_process_mad; @@ -1751,7 +1824,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; - dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send; + dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send_from_rvt; dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send; dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send; dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; @@ -1823,9 +1896,13 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) del_timer_sync(&dev->mem_timer); verbs_txreq_exit(dev); + mutex_lock(&cntr_names_lock); kfree(dev_cntr_names); kfree(port_cntr_names); + dev_cntr_names = NULL; + port_cntr_names = NULL; cntr_names_initialized = 0; + mutex_unlock(&cntr_names_lock); } void hfi1_cnp_rcv(struct hfi1_packet *packet) @@ -1840,12 +1917,12 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) switch (packet->qp->ibqp.qp_type) { case IB_QPT_UC: - rlid = qp->remote_ah_attr.dlid; + rlid = rdma_ah_get_dlid(&qp->remote_ah_attr); rqpn = qp->remote_qpn; svc_type = IB_CC_SVCTYPE_UC; break; case IB_QPT_RC: - rlid = qp->remote_ah_attr.dlid; + rlid = rdma_ah_get_dlid(&qp->remote_ah_attr); rqpn = qp->remote_qpn; svc_type = IB_CC_SVCTYPE_RC; break; @@ -1859,7 +1936,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) return; } - sc5 = hdr2sc(hdr, packet->rhf); + sc5 = hfi1_9B_get_sc5(hdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; lqpn = qp->ibqp.qp_num; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 3a0b589..52ff275 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -195,6 +195,11 @@ struct hfi1_ibdev { struct dentry *hfi1_ibdev_dbg; /* per HFI symlinks to above */ struct dentry *hfi1_ibdev_link; +#ifdef CONFIG_FAULT_INJECTION + struct fault_opcode *fault_opcode; + struct fault_packet *fault_packet; + bool fault_suppress_err; +#endif #endif }; @@ -303,7 +308,7 @@ void hfi1_rc_hdrerr( u32 rcv_flags, struct rvt_qp *qp); -u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); +u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid); @@ -342,7 +347,7 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, - struct ib_global_route *grh, u32 hwords, u32 nwords); + const struct ib_global_route *grh, u32 hwords, u32 nwords); void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2, int middle, @@ -350,7 +355,9 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, void _hfi1_do_send(struct work_struct *work); -void hfi1_do_send(struct rvt_qp *qp); +void hfi1_do_send_from_rvt(struct rvt_qp *qp); + +void hfi1_do_send(struct rvt_qp *qp, bool in_thread); void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status); diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h new file mode 100644 index 0000000..e2c4552 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/vnic.h @@ -0,0 +1,184 @@ +#ifndef _HFI1_VNIC_H +#define _HFI1_VNIC_H +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <rdma/opa_vnic.h> +#include "hfi.h" +#include "sdma.h" + +#define HFI1_VNIC_MAX_TXQ 16 +#define HFI1_VNIC_MAX_PAD 12 + +/* L2 header definitions */ +#define HFI1_L2_TYPE_OFFSET 0x7 +#define HFI1_L2_TYPE_SHFT 0x5 +#define HFI1_L2_TYPE_MASK 0x3 + +#define HFI1_GET_L2_TYPE(hdr) \ + ((*((u8 *)(hdr) + HFI1_L2_TYPE_OFFSET) >> HFI1_L2_TYPE_SHFT) & \ + HFI1_L2_TYPE_MASK) + +/* L4 type definitions */ +#define HFI1_L4_TYPE_OFFSET 8 + +#define HFI1_GET_L4_TYPE(data) \ + (*((u8 *)(data) + HFI1_L4_TYPE_OFFSET)) + +/* L4 header definitions */ +#define HFI1_VNIC_L4_HDR_OFFSET OPA_VNIC_L2_HDR_LEN + +#define HFI1_VNIC_GET_L4_HDR(data) \ + (*((u16 *)((u8 *)(data) + HFI1_VNIC_L4_HDR_OFFSET))) + +#define HFI1_VNIC_GET_VESWID(data) \ + (HFI1_VNIC_GET_L4_HDR(data) & 0xFFF) + +/* Service class */ +#define HFI1_VNIC_SC_OFFSET_LOW 6 +#define HFI1_VNIC_SC_OFFSET_HI 7 +#define HFI1_VNIC_SC_SHIFT 4 + +#define HFI1_VNIC_MAX_QUEUE 16 + +/** + * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information + * @dd - device data pointer + * @sde - sdma engine + * @vinfo - vnic info pointer + * @wait - iowait structure + * @stx - sdma tx request + * @state - vnic Tx ring SDMA state + * @q_idx - vnic Tx queue index + */ +struct hfi1_vnic_sdma { + struct hfi1_devdata *dd; + struct sdma_engine *sde; + struct hfi1_vnic_vport_info *vinfo; + struct iowait wait; + struct sdma_txreq stx; + unsigned int state; + u8 q_idx; +}; + +/** + * struct hfi1_vnic_rx_queue - HFI1 VNIC receive queue + * @idx: queue index + * @vinfo: pointer to vport information + * @netdev: network device + * @napi: netdev napi structure + * @skbq: queue of received socket buffers + */ +struct hfi1_vnic_rx_queue { + u8 idx; + struct hfi1_vnic_vport_info *vinfo; + struct net_device *netdev; + struct napi_struct napi; + struct sk_buff_head skbq; +}; + +/** + * struct hfi1_vnic_vport_info - HFI1 VNIC virtual port information + * @dd: device data pointer + * @netdev: net device pointer + * @flags: state flags + * @lock: vport lock + * @num_tx_q: number of transmit queues + * @num_rx_q: number of receive queues + * @vesw_id: virtual switch id + * @rxq: Array of receive queues + * @stats: per queue stats + * @sdma: VNIC SDMA structure per TXQ + */ +struct hfi1_vnic_vport_info { + struct hfi1_devdata *dd; + struct net_device *netdev; + unsigned long flags; + + /* Lock used around state updates */ + struct mutex lock; + + u8 num_tx_q; + u8 num_rx_q; + u16 vesw_id; + struct hfi1_vnic_rx_queue rxq[HFI1_NUM_VNIC_CTXT]; + + struct opa_vnic_stats stats[HFI1_VNIC_MAX_QUEUE]; + struct hfi1_vnic_sdma sdma[HFI1_VNIC_MAX_TXQ]; +}; + +#define v_dbg(format, arg...) \ + netdev_dbg(vinfo->netdev, format, ## arg) +#define v_err(format, arg...) \ + netdev_err(vinfo->netdev, format, ## arg) +#define v_info(format, arg...) \ + netdev_info(vinfo->netdev, format, ## arg) + +/* vnic hfi1 internal functions */ +void hfi1_vnic_setup(struct hfi1_devdata *dd); +void hfi1_vnic_cleanup(struct hfi1_devdata *dd); +int hfi1_vnic_txreq_init(struct hfi1_devdata *dd); +void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd); + +void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet); +void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo); +bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo, + u8 q_idx); + +/* vnic rdma netdev operations */ +struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, + u8 port_num, + enum rdma_netdev_t type, + const char *name, + unsigned char name_assign_type, + void (*setup)(struct net_device *)); +void hfi1_vnic_free_rn(struct net_device *netdev); +int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, + struct hfi1_vnic_vport_info *vinfo, + struct sk_buff *skb, u64 pbc, u8 plen); + +#endif /* _HFI1_VNIC_H */ diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c new file mode 100644 index 0000000..392f4d5 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -0,0 +1,907 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains HFI1 support for VNIC functionality + */ + +#include <linux/io.h> +#include <linux/if_vlan.h> + +#include "vnic.h" + +#define HFI_TX_TIMEOUT_MS 1000 + +#define HFI1_VNIC_RCV_Q_SIZE 1024 + +#define HFI1_VNIC_UP 0 + +static DEFINE_SPINLOCK(vport_cntr_lock); + +static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) +{ + unsigned int rcvctrl_ops = 0; + int ret; + + ret = hfi1_init_ctxt(uctxt->sc); + if (ret) + goto done; + + uctxt->do_interrupt = &handle_receive_interrupt; + + /* Now allocate the RcvHdr queue and eager buffers. */ + ret = hfi1_create_rcvhdrq(dd, uctxt); + if (ret) + goto done; + + ret = hfi1_setup_eagerbufs(uctxt); + if (ret) + goto done; + + set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags); + + if (uctxt->rcvhdrtail_kvaddr) + clear_rcvhdrtail(uctxt); + + rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; + rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB; + + if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR)) + rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; + if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL)) + rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; + if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) + rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; + if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) + rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; + + hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); + + uctxt->is_vnic = true; +done: + return ret; +} + +static int allocate_vnic_ctxt(struct hfi1_devdata *dd, + struct hfi1_ctxtdata **vnic_ctxt) +{ + struct hfi1_ctxtdata *uctxt; + unsigned int ctxt; + int ret; + + if (dd->flags & HFI1_FROZEN) + return -EIO; + + for (ctxt = dd->first_dyn_alloc_ctxt; + ctxt < dd->num_rcv_contexts; ctxt++) + if (!dd->rcd[ctxt]) + break; + + if (ctxt == dd->num_rcv_contexts) + return -EBUSY; + + uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, dd->node); + if (!uctxt) { + dd_dev_err(dd, "Unable to create ctxtdata, failing open\n"); + return -ENOMEM; + } + + uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) | + HFI1_CAP_KGET(NODROP_RHQ_FULL) | + HFI1_CAP_KGET(NODROP_EGR_FULL) | + HFI1_CAP_KGET(DMA_RTAIL); + uctxt->seq_cnt = 1; + + /* Allocate and enable a PIO send context */ + uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize, + uctxt->numa_id); + + ret = uctxt->sc ? 0 : -ENOMEM; + if (ret) + goto bail; + + dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n", + uctxt->sc->sw_index, uctxt->sc->hw_context); + ret = sc_enable(uctxt->sc); + if (ret) + goto bail; + + if (dd->num_msix_entries) + hfi1_set_vnic_msix_info(uctxt); + + hfi1_stats.sps_ctxts++; + dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); + *vnic_ctxt = uctxt; + + return ret; +bail: + /* + * hfi1_free_ctxtdata() also releases send_context + * structure if uctxt->sc is not null + */ + dd->rcd[uctxt->ctxt] = NULL; + hfi1_free_ctxtdata(dd, uctxt); + dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret); + return ret; +} + +static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, + struct hfi1_ctxtdata *uctxt) +{ + unsigned long flags; + + dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); + flush_wc(); + + if (dd->num_msix_entries) + hfi1_reset_vnic_msix_info(uctxt); + + spin_lock_irqsave(&dd->uctxt_lock, flags); + /* + * Disable receive context and interrupt available, reset all + * RcvCtxtCtrl bits to default values. + */ + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | + HFI1_RCVCTRL_TIDFLOW_DIS | + HFI1_RCVCTRL_INTRAVAIL_DIS | + HFI1_RCVCTRL_ONE_PKT_EGR_DIS | + HFI1_RCVCTRL_NO_RHQ_DROP_DIS | + HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); + /* + * VNIC contexts are allocated from user context pool. + * Release them back to user context pool. + * + * Reset context integrity checks to default. + * (writes to CSRs probably belong in chip.c) + */ + write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, + hfi1_pkt_default_send_ctxt_mask(dd, SC_USER)); + sc_disable(uctxt->sc); + + dd->send_contexts[uctxt->sc->sw_index].type = SC_USER; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + + dd->rcd[uctxt->ctxt] = NULL; + uctxt->event_flags = 0; + + hfi1_clear_tids(uctxt); + hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); + + hfi1_stats.sps_ctxts--; + hfi1_free_ctxtdata(dd, uctxt); +} + +void hfi1_vnic_setup(struct hfi1_devdata *dd) +{ + idr_init(&dd->vnic.vesw_idr); +} + +void hfi1_vnic_cleanup(struct hfi1_devdata *dd) +{ + idr_destroy(&dd->vnic.vesw_idr); +} + +#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \ + u64 *src64, *dst64; \ + for (src64 = &qstats->x_grp.unicast, \ + dst64 = &stats->x_grp.unicast; \ + dst64 <= &stats->x_grp.s_1519_max;) { \ + *dst64++ += *src64++; \ + } \ + } while (0) + +/* hfi1_vnic_update_stats - update statistics */ +static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo, + struct opa_vnic_stats *stats) +{ + struct net_device *netdev = vinfo->netdev; + u8 i; + + /* add tx counters on different queues */ + for (i = 0; i < vinfo->num_tx_q; i++) { + struct opa_vnic_stats *qstats = &vinfo->stats[i]; + struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats; + + stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors; + stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors; + stats->tx_drop_state += qstats->tx_drop_state; + stats->tx_dlid_zero += qstats->tx_dlid_zero; + + SUM_GRP_COUNTERS(stats, qstats, tx_grp); + stats->netstats.tx_packets += qnstats->tx_packets; + stats->netstats.tx_bytes += qnstats->tx_bytes; + } + + /* add rx counters on different queues */ + for (i = 0; i < vinfo->num_rx_q; i++) { + struct opa_vnic_stats *qstats = &vinfo->stats[i]; + struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats; + + stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors; + stats->netstats.rx_nohandler += qnstats->rx_nohandler; + stats->rx_drop_state += qstats->rx_drop_state; + stats->rx_oversize += qstats->rx_oversize; + stats->rx_runt += qstats->rx_runt; + + SUM_GRP_COUNTERS(stats, qstats, rx_grp); + stats->netstats.rx_packets += qnstats->rx_packets; + stats->netstats.rx_bytes += qnstats->rx_bytes; + } + + stats->netstats.tx_errors = stats->netstats.tx_fifo_errors + + stats->netstats.tx_carrier_errors + + stats->tx_drop_state + stats->tx_dlid_zero; + stats->netstats.tx_dropped = stats->netstats.tx_errors; + + stats->netstats.rx_errors = stats->netstats.rx_fifo_errors + + stats->netstats.rx_nohandler + + stats->rx_drop_state + stats->rx_oversize + + stats->rx_runt; + stats->netstats.rx_dropped = stats->netstats.rx_errors; + + netdev->stats.tx_packets = stats->netstats.tx_packets; + netdev->stats.tx_bytes = stats->netstats.tx_bytes; + netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors; + netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors; + netdev->stats.tx_errors = stats->netstats.tx_errors; + netdev->stats.tx_dropped = stats->netstats.tx_dropped; + + netdev->stats.rx_packets = stats->netstats.rx_packets; + netdev->stats.rx_bytes = stats->netstats.rx_bytes; + netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors; + netdev->stats.multicast = stats->rx_grp.mcastbcast; + netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt; + netdev->stats.rx_errors = stats->netstats.rx_errors; + netdev->stats.rx_dropped = stats->netstats.rx_dropped; +} + +/* update_len_counters - update pkt's len histogram counters */ +static inline void update_len_counters(struct opa_vnic_grp_stats *grp, + int len) +{ + /* account for 4 byte FCS */ + if (len >= 1515) + grp->s_1519_max++; + else if (len >= 1020) + grp->s_1024_1518++; + else if (len >= 508) + grp->s_512_1023++; + else if (len >= 252) + grp->s_256_511++; + else if (len >= 124) + grp->s_128_255++; + else if (len >= 61) + grp->s_65_127++; + else + grp->s_64++; +} + +/* hfi1_vnic_update_tx_counters - update transmit counters */ +static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo, + u8 q_idx, struct sk_buff *skb, int err) +{ + struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); + struct opa_vnic_stats *stats = &vinfo->stats[q_idx]; + struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp; + u16 vlan_tci; + + stats->netstats.tx_packets++; + stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN; + + update_len_counters(tx_grp, skb->len); + + /* rest of the counts are for good packets only */ + if (unlikely(err)) + return; + + if (is_multicast_ether_addr(mac_hdr->h_dest)) + tx_grp->mcastbcast++; + else + tx_grp->unicast++; + + if (!__vlan_get_tag(skb, &vlan_tci)) + tx_grp->vlan++; + else + tx_grp->untagged++; +} + +/* hfi1_vnic_update_rx_counters - update receive counters */ +static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo, + u8 q_idx, struct sk_buff *skb, int err) +{ + struct ethhdr *mac_hdr = (struct ethhdr *)skb->data; + struct opa_vnic_stats *stats = &vinfo->stats[q_idx]; + struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp; + u16 vlan_tci; + + stats->netstats.rx_packets++; + stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN; + + update_len_counters(rx_grp, skb->len); + + /* rest of the counts are for good packets only */ + if (unlikely(err)) + return; + + if (is_multicast_ether_addr(mac_hdr->h_dest)) + rx_grp->mcastbcast++; + else + rx_grp->unicast++; + + if (!__vlan_get_tag(skb, &vlan_tci)) + rx_grp->vlan++; + else + rx_grp->untagged++; +} + +/* This function is overloaded for opa_vnic specific implementation */ +static void hfi1_vnic_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats; + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + + hfi1_vnic_update_stats(vinfo, vstats); +} + +static u64 create_bypass_pbc(u32 vl, u32 dw_len) +{ + u64 pbc; + + pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT) + | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN + | PBC_PACKET_BYPASS + | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT) + | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT; + + return pbc; +} + +/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */ +static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo, + u8 q_idx) +{ + netif_stop_subqueue(vinfo->netdev, q_idx); + if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx)) + return; + + netif_start_subqueue(vinfo->netdev, q_idx); +} + +static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + u8 pad_len, q_idx = skb->queue_mapping; + struct hfi1_devdata *dd = vinfo->dd; + struct opa_vnic_skb_mdata *mdata; + u32 pkt_len, total_len; + int err = -EINVAL; + u64 pbc; + + v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len); + if (unlikely(!netif_oper_up(netdev))) { + vinfo->stats[q_idx].tx_drop_state++; + goto tx_finish; + } + + /* take out meta data */ + mdata = (struct opa_vnic_skb_mdata *)skb->data; + skb_pull(skb, sizeof(*mdata)); + if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) { + vinfo->stats[q_idx].tx_dlid_zero++; + goto tx_finish; + } + + /* add tail padding (for 8 bytes size alignment) and icrc */ + pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7; + pad_len += OPA_VNIC_ICRC_TAIL_LEN; + + /* + * pkt_len is how much data we have to write, includes header and data. + * total_len is length of the packet in Dwords plus the PBC should not + * include the CRC. + */ + pkt_len = (skb->len + pad_len) >> 2; + total_len = pkt_len + 2; /* PBC + packet */ + + pbc = create_bypass_pbc(mdata->vl, total_len); + + skb_get(skb); + v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len); + err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len); + if (unlikely(err)) { + if (err == -ENOMEM) + vinfo->stats[q_idx].netstats.tx_fifo_errors++; + else if (err != -EBUSY) + vinfo->stats[q_idx].netstats.tx_carrier_errors++; + } + /* remove the header before updating tx counters */ + skb_pull(skb, OPA_VNIC_HDR_LEN); + + if (unlikely(err == -EBUSY)) { + hfi1_vnic_maybe_stop_tx(vinfo, q_idx); + dev_kfree_skb_any(skb); + return NETDEV_TX_BUSY; + } + +tx_finish: + /* update tx counters */ + hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +static u16 hfi1_vnic_select_queue(struct net_device *netdev, + struct sk_buff *skb, + void *accel_priv, + select_queue_fallback_t fallback) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + struct opa_vnic_skb_mdata *mdata; + struct sdma_engine *sde; + + mdata = (struct opa_vnic_skb_mdata *)skb->data; + sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl); + return sde->this_idx; +} + +/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */ +static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq, + struct sk_buff *skb) +{ + struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; + int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN; + int rc = -EFAULT; + + skb_pull(skb, OPA_VNIC_HDR_LEN); + + /* Validate Packet length */ + if (unlikely(skb->len > max_len)) + vinfo->stats[rxq->idx].rx_oversize++; + else if (unlikely(skb->len < ETH_ZLEN)) + vinfo->stats[rxq->idx].rx_runt++; + else + rc = 0; + return rc; +} + +static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq) +{ + unsigned char *pad_info; + struct sk_buff *skb; + + skb = skb_dequeue(&rxq->skbq); + if (unlikely(!skb)) + return NULL; + + /* remove tail padding and icrc */ + pad_info = skb->data + skb->len - 1; + skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN - + ((*pad_info) & 0x7))); + + return skb; +} + +/* hfi1_vnic_handle_rx - handle skb receive */ +static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq, + int *work_done, int work_to_do) +{ + struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; + struct sk_buff *skb; + int rc; + + while (1) { + if (*work_done >= work_to_do) + break; + + skb = hfi1_vnic_get_skb(rxq); + if (unlikely(!skb)) + break; + + rc = hfi1_vnic_decap_skb(rxq, skb); + /* update rx counters */ + hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc); + if (unlikely(rc)) { + dev_kfree_skb_any(skb); + continue; + } + + skb_checksum_none_assert(skb); + skb->protocol = eth_type_trans(skb, rxq->netdev); + + napi_gro_receive(&rxq->napi, skb); + (*work_done)++; + } +} + +/* hfi1_vnic_napi - napi receive polling callback function */ +static int hfi1_vnic_napi(struct napi_struct *napi, int budget) +{ + struct hfi1_vnic_rx_queue *rxq = container_of(napi, + struct hfi1_vnic_rx_queue, napi); + struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; + int work_done = 0; + + v_dbg("napi %d budget %d\n", rxq->idx, budget); + hfi1_vnic_handle_rx(rxq, &work_done, budget); + + v_dbg("napi %d work_done %d\n", rxq->idx, work_done); + if (work_done < budget) + napi_complete(napi); + + return work_done; +} + +void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) +{ + struct hfi1_devdata *dd = packet->rcd->dd; + struct hfi1_vnic_vport_info *vinfo = NULL; + struct hfi1_vnic_rx_queue *rxq; + struct sk_buff *skb; + int l4_type, vesw_id = -1; + u8 q_idx; + + l4_type = HFI1_GET_L4_TYPE(packet->ebuf); + if (likely(l4_type == OPA_VNIC_L4_ETHR)) { + vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf); + vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id); + + /* + * In case of invalid vesw id, count the error on + * the first available vport. + */ + if (unlikely(!vinfo)) { + struct hfi1_vnic_vport_info *vinfo_tmp; + int id_tmp = 0; + + vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp); + if (vinfo_tmp) { + spin_lock(&vport_cntr_lock); + vinfo_tmp->stats[0].netstats.rx_nohandler++; + spin_unlock(&vport_cntr_lock); + } + } + } + + if (unlikely(!vinfo)) { + dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n", + l4_type, vesw_id, packet->rcd->ctxt); + return; + } + + q_idx = packet->rcd->vnic_q_idx; + rxq = &vinfo->rxq[q_idx]; + if (unlikely(!netif_oper_up(vinfo->netdev))) { + vinfo->stats[q_idx].rx_drop_state++; + skb_queue_purge(&rxq->skbq); + return; + } + + if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) { + vinfo->stats[q_idx].netstats.rx_fifo_errors++; + return; + } + + skb = netdev_alloc_skb(vinfo->netdev, packet->tlen); + if (unlikely(!skb)) { + vinfo->stats[q_idx].netstats.rx_fifo_errors++; + return; + } + + memcpy(skb->data, packet->ebuf, packet->tlen); + skb_put(skb, packet->tlen); + skb_queue_tail(&rxq->skbq, skb); + + if (napi_schedule_prep(&rxq->napi)) { + v_dbg("napi %d scheduling\n", q_idx); + __napi_schedule(&rxq->napi); + } +} + +static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo) +{ + struct hfi1_devdata *dd = vinfo->dd; + struct net_device *netdev = vinfo->netdev; + int i, rc; + + /* ensure virtual eth switch id is valid */ + if (!vinfo->vesw_id) + return -EINVAL; + + rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id, + vinfo->vesw_id + 1, GFP_NOWAIT); + if (rc < 0) + return rc; + + for (i = 0; i < vinfo->num_rx_q; i++) { + struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; + + skb_queue_head_init(&rxq->skbq); + napi_enable(&rxq->napi); + } + + netif_carrier_on(netdev); + netif_tx_start_all_queues(netdev); + set_bit(HFI1_VNIC_UP, &vinfo->flags); + + return 0; +} + +static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo) +{ + struct hfi1_devdata *dd = vinfo->dd; + u8 i; + + clear_bit(HFI1_VNIC_UP, &vinfo->flags); + netif_carrier_off(vinfo->netdev); + netif_tx_disable(vinfo->netdev); + idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id); + + /* ensure irqs see the change */ + hfi1_vnic_synchronize_irq(dd); + + /* remove unread skbs */ + for (i = 0; i < vinfo->num_rx_q; i++) { + struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; + + napi_disable(&rxq->napi); + skb_queue_purge(&rxq->skbq); + } +} + +static int hfi1_netdev_open(struct net_device *netdev) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + int rc; + + mutex_lock(&vinfo->lock); + rc = hfi1_vnic_up(vinfo); + mutex_unlock(&vinfo->lock); + return rc; +} + +static int hfi1_netdev_close(struct net_device *netdev) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + + mutex_lock(&vinfo->lock); + if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) + hfi1_vnic_down(vinfo); + mutex_unlock(&vinfo->lock); + return 0; +} + +static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd, + struct hfi1_ctxtdata **vnic_ctxt) +{ + int rc; + + rc = allocate_vnic_ctxt(dd, vnic_ctxt); + if (rc) { + dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc); + return rc; + } + + rc = setup_vnic_ctxt(dd, *vnic_ctxt); + if (rc) { + dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc); + deallocate_vnic_ctxt(dd, *vnic_ctxt); + *vnic_ctxt = NULL; + } + + return rc; +} + +static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) +{ + struct hfi1_devdata *dd = vinfo->dd; + int i, rc = 0; + + mutex_lock(&hfi1_mutex); + if (!dd->vnic.num_vports) { + rc = hfi1_vnic_txreq_init(dd); + if (rc) + goto txreq_fail; + + dd->vnic.msix_idx = dd->first_dyn_msix_idx; + } + + for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) { + rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]); + if (rc) + break; + dd->vnic.ctxt[i]->vnic_q_idx = i; + } + + if (i < vinfo->num_rx_q) { + /* + * If required amount of contexts is not + * allocated successfully then remaining contexts + * are released. + */ + while (i-- > dd->vnic.num_ctxt) { + deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); + dd->vnic.ctxt[i] = NULL; + } + goto alloc_fail; + } + + if (dd->vnic.num_ctxt != i) { + dd->vnic.num_ctxt = i; + hfi1_init_vnic_rsm(dd); + } + + dd->vnic.num_vports++; + hfi1_vnic_sdma_init(vinfo); +alloc_fail: + if (!dd->vnic.num_vports) + hfi1_vnic_txreq_deinit(dd); +txreq_fail: + mutex_unlock(&hfi1_mutex); + return rc; +} + +static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo) +{ + struct hfi1_devdata *dd = vinfo->dd; + int i; + + mutex_lock(&hfi1_mutex); + if (--dd->vnic.num_vports == 0) { + for (i = 0; i < dd->vnic.num_ctxt; i++) { + deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); + dd->vnic.ctxt[i] = NULL; + } + hfi1_deinit_vnic_rsm(dd); + dd->vnic.num_ctxt = 0; + hfi1_vnic_txreq_deinit(dd); + } + mutex_unlock(&hfi1_mutex); +} + +static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + bool reopen = false; + + /* + * If vesw_id is being changed, and if the vnic port is up, + * reset the vnic port to ensure new vesw_id gets picked up + */ + if (id != vinfo->vesw_id) { + mutex_lock(&vinfo->lock); + if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) { + hfi1_vnic_down(vinfo); + reopen = true; + } + + vinfo->vesw_id = id; + if (reopen) + hfi1_vnic_up(vinfo); + + mutex_unlock(&vinfo->lock); + } +} + +/* netdev ops */ +static const struct net_device_ops hfi1_netdev_ops = { + .ndo_open = hfi1_netdev_open, + .ndo_stop = hfi1_netdev_close, + .ndo_start_xmit = hfi1_netdev_start_xmit, + .ndo_select_queue = hfi1_vnic_select_queue, + .ndo_get_stats64 = hfi1_vnic_get_stats64, +}; + +struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, + u8 port_num, + enum rdma_netdev_t type, + const char *name, + unsigned char name_assign_type, + void (*setup)(struct net_device *)) +{ + struct hfi1_devdata *dd = dd_from_ibdev(device); + struct hfi1_vnic_vport_info *vinfo; + struct net_device *netdev; + struct rdma_netdev *rn; + int i, size, rc; + + if (!port_num || (port_num > dd->num_pports)) + return ERR_PTR(-EINVAL); + + if (type != RDMA_NETDEV_OPA_VNIC) + return ERR_PTR(-EOPNOTSUPP); + + size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); + netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, + dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT); + if (!netdev) + return ERR_PTR(-ENOMEM); + + rn = netdev_priv(netdev); + vinfo = opa_vnic_dev_priv(netdev); + vinfo->dd = dd; + vinfo->num_tx_q = dd->chip_sdma_engines; + vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT; + vinfo->netdev = netdev; + rn->set_id = hfi1_vnic_set_vesw_id; + + netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG; + netdev->hw_features = netdev->features; + netdev->vlan_features = netdev->features; + netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS); + netdev->netdev_ops = &hfi1_netdev_ops; + mutex_init(&vinfo->lock); + + for (i = 0; i < vinfo->num_rx_q; i++) { + struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; + + rxq->idx = i; + rxq->vinfo = vinfo; + rxq->netdev = netdev; + netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64); + } + + rc = hfi1_vnic_init(vinfo); + if (rc) + goto init_fail; + + return netdev; +init_fail: + mutex_destroy(&vinfo->lock); + free_netdev(netdev); + return ERR_PTR(rc); +} + +void hfi1_vnic_free_rn(struct net_device *netdev) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + + hfi1_vnic_deinit(vinfo); + mutex_destroy(&vinfo->lock); + free_netdev(netdev); +} diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c new file mode 100644 index 0000000..51a817d --- /dev/null +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -0,0 +1,323 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains HFI1 support for VNIC SDMA functionality + */ + +#include "sdma.h" +#include "vnic.h" + +#define HFI1_VNIC_SDMA_Q_ACTIVE BIT(0) +#define HFI1_VNIC_SDMA_Q_DEFERRED BIT(1) + +#define HFI1_VNIC_TXREQ_NAME_LEN 32 +#define HFI1_VNIC_SDMA_DESC_WTRMRK 64 +#define HFI1_VNIC_SDMA_RETRY_COUNT 1 + +/* + * struct vnic_txreq - VNIC transmit descriptor + * @txreq: sdma transmit request + * @sdma: vnic sdma pointer + * @skb: skb to send + * @pad: pad buffer + * @plen: pad length + * @pbc_val: pbc value + * @retry_count: tx retry count + */ +struct vnic_txreq { + struct sdma_txreq txreq; + struct hfi1_vnic_sdma *sdma; + + struct sk_buff *skb; + unsigned char pad[HFI1_VNIC_MAX_PAD]; + u16 plen; + __le64 pbc_val; + + u32 retry_count; +}; + +static void vnic_sdma_complete(struct sdma_txreq *txreq, + int status) +{ + struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq); + struct hfi1_vnic_sdma *vnic_sdma = tx->sdma; + + sdma_txclean(vnic_sdma->dd, txreq); + dev_kfree_skb_any(tx->skb); + kmem_cache_free(vnic_sdma->dd->vnic.txreq_cache, tx); +} + +static noinline int build_vnic_ulp_payload(struct sdma_engine *sde, + struct vnic_txreq *tx) +{ + int i, ret = 0; + + ret = sdma_txadd_kvaddr( + sde->dd, + &tx->txreq, + tx->skb->data, + skb_headlen(tx->skb)); + if (unlikely(ret)) + goto bail_txadd; + + for (i = 0; i < skb_shinfo(tx->skb)->nr_frags; i++) { + struct skb_frag_struct *frag = &skb_shinfo(tx->skb)->frags[i]; + + /* combine physically continuous fragments later? */ + ret = sdma_txadd_page(sde->dd, + &tx->txreq, + skb_frag_page(frag), + frag->page_offset, + skb_frag_size(frag)); + if (unlikely(ret)) + goto bail_txadd; + } + + if (tx->plen) + ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq, + tx->pad + HFI1_VNIC_MAX_PAD - tx->plen, + tx->plen); + +bail_txadd: + return ret; +} + +static int build_vnic_tx_desc(struct sdma_engine *sde, + struct vnic_txreq *tx, + u64 pbc) +{ + int ret = 0; + u16 hdrbytes = 2 << 2; /* PBC */ + + ret = sdma_txinit_ahg( + &tx->txreq, + 0, + hdrbytes + tx->skb->len + tx->plen, + 0, + 0, + NULL, + 0, + vnic_sdma_complete); + if (unlikely(ret)) + goto bail_txadd; + + /* add pbc */ + tx->pbc_val = cpu_to_le64(pbc); + ret = sdma_txadd_kvaddr( + sde->dd, + &tx->txreq, + &tx->pbc_val, + hdrbytes); + if (unlikely(ret)) + goto bail_txadd; + + /* add the ulp payload */ + ret = build_vnic_ulp_payload(sde, tx); +bail_txadd: + return ret; +} + +/* setup the last plen bypes of pad */ +static inline void hfi1_vnic_update_pad(unsigned char *pad, u8 plen) +{ + pad[HFI1_VNIC_MAX_PAD - 1] = plen - OPA_VNIC_ICRC_TAIL_LEN; +} + +int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, + struct hfi1_vnic_vport_info *vinfo, + struct sk_buff *skb, u64 pbc, u8 plen) +{ + struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx]; + struct sdma_engine *sde = vnic_sdma->sde; + struct vnic_txreq *tx; + int ret = -ECOMM; + + if (unlikely(READ_ONCE(vnic_sdma->state) != HFI1_VNIC_SDMA_Q_ACTIVE)) + goto tx_err; + + if (unlikely(!sde || !sdma_running(sde))) + goto tx_err; + + tx = kmem_cache_alloc(dd->vnic.txreq_cache, GFP_ATOMIC); + if (unlikely(!tx)) { + ret = -ENOMEM; + goto tx_err; + } + + tx->sdma = vnic_sdma; + tx->skb = skb; + hfi1_vnic_update_pad(tx->pad, plen); + tx->plen = plen; + ret = build_vnic_tx_desc(sde, tx, pbc); + if (unlikely(ret)) + goto free_desc; + tx->retry_count = 0; + + ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq); + /* When -ECOMM, sdma callback will be called with ABORT status */ + if (unlikely(ret && unlikely(ret != -ECOMM))) + goto free_desc; + + return ret; + +free_desc: + sdma_txclean(dd, &tx->txreq); + kmem_cache_free(dd->vnic.txreq_cache, tx); +tx_err: + if (ret != -EBUSY) + dev_kfree_skb_any(skb); + return ret; +} + +/* + * hfi1_vnic_sdma_sleep - vnic sdma sleep function + * + * This function gets called from sdma_send_txreq() when there are not enough + * sdma descriptors available to send the packet. It adds Tx queue's wait + * structure to sdma engine's dmawait list to be woken up when descriptors + * become available. + */ +static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, + struct iowait *wait, + struct sdma_txreq *txreq, + unsigned int seq) +{ + struct hfi1_vnic_sdma *vnic_sdma = + container_of(wait, struct hfi1_vnic_sdma, wait); + struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev; + struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq); + + if (sdma_progress(sde, seq, txreq)) + if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT) + return -EAGAIN; + + vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED; + write_seqlock(&dev->iowait_lock); + if (list_empty(&vnic_sdma->wait.list)) + list_add_tail(&vnic_sdma->wait.list, &sde->dmawait); + write_sequnlock(&dev->iowait_lock); + return -EBUSY; +} + +/* + * hfi1_vnic_sdma_wakeup - vnic sdma wakeup function + * + * This function gets called when SDMA descriptors becomes available and Tx + * queue's wait structure was previously added to sdma engine's dmawait list. + * It notifies the upper driver about Tx queue wakeup. + */ +static void hfi1_vnic_sdma_wakeup(struct iowait *wait, int reason) +{ + struct hfi1_vnic_sdma *vnic_sdma = + container_of(wait, struct hfi1_vnic_sdma, wait); + struct hfi1_vnic_vport_info *vinfo = vnic_sdma->vinfo; + + vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE; + if (__netif_subqueue_stopped(vinfo->netdev, vnic_sdma->q_idx)) + netif_wake_subqueue(vinfo->netdev, vnic_sdma->q_idx); +}; + +inline bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo, + u8 q_idx) +{ + struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx]; + + return (READ_ONCE(vnic_sdma->state) == HFI1_VNIC_SDMA_Q_ACTIVE); +} + +void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo) +{ + int i; + + for (i = 0; i < vinfo->num_tx_q; i++) { + struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i]; + + iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep, + hfi1_vnic_sdma_wakeup, NULL); + vnic_sdma->sde = &vinfo->dd->per_sdma[i]; + vnic_sdma->dd = vinfo->dd; + vnic_sdma->vinfo = vinfo; + vnic_sdma->q_idx = i; + vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE; + + /* Add a free descriptor watermark for wakeups */ + if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) { + INIT_LIST_HEAD(&vnic_sdma->stx.list); + vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK; + list_add_tail(&vnic_sdma->stx.list, + &vnic_sdma->wait.tx_head); + } + } +} + +static void hfi1_vnic_txreq_kmem_cache_ctor(void *obj) +{ + struct vnic_txreq *tx = (struct vnic_txreq *)obj; + + memset(tx, 0, sizeof(*tx)); +} + +int hfi1_vnic_txreq_init(struct hfi1_devdata *dd) +{ + char buf[HFI1_VNIC_TXREQ_NAME_LEN]; + + snprintf(buf, sizeof(buf), "hfi1_%u_vnic_txreq_cache", dd->unit); + dd->vnic.txreq_cache = kmem_cache_create(buf, + sizeof(struct vnic_txreq), + 0, SLAB_HWCACHE_ALIGN, + hfi1_vnic_txreq_kmem_cache_ctor); + if (!dd->vnic.txreq_cache) + return -ENOMEM; + return 0; +} + +void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd) +{ + kmem_cache_destroy(dd->vnic.txreq_cache); + dd->vnic.txreq_cache = NULL; +} |