summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/hfi1
diff options
context:
space:
mode:
authorDoug Ledford <dledford@redhat.com>2017-08-10 13:43:11 -0400
committerDoug Ledford <dledford@redhat.com>2017-08-10 13:43:11 -0400
commitdb14dff1743e4cd3d840950ad8d735b8957aaf6a (patch)
treeb7c8ca2283e40c4826e56aa784de2dde22cbe141 /drivers/infiniband/hw/hfi1
parent520eccdfe187591a51ea9ab4c1a024ae4d0f68d9 (diff)
parent1bb77b8c1d57149ed0aa6825255ead80ae584034 (diff)
downloadop-kernel-dev-db14dff1743e4cd3d840950ad8d735b8957aaf6a.zip
op-kernel-dev-db14dff1743e4cd3d840950ad8d735b8957aaf6a.tar.gz
Merge tag 'rdma-next-2017-08-10' of git://git.kernel.org/pub/scm/linux/kernel/git/leon/linux-rdma into rdma-netlink
RDMA netlink infrastructure v2 Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile2
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c18
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h14
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.h6
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c660
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h24
-rw-r--r--drivers/infiniband/hw/hfi1/common.h1
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c191
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.c11
-rw-r--r--drivers/infiniband/hw/hfi1/exp_rcv.c114
-rw-r--r--drivers/infiniband/hw/hfi1/exp_rcv.h190
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c152
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c16
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h156
-rw-r--r--drivers/infiniband/hw/hfi1/init.c111
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c3
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h70
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c690
-rw-r--r--drivers/infiniband/hw/hfi1/mad.h5
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c14
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.h5
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c384
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c15
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c44
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c21
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c42
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c100
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c42
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h3
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c58
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h322
-rw-r--r--drivers/infiniband/hw/hfi1/trace_misc.h20
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rx.h92
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c18
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c64
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c128
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.h26
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c230
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h6
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c122
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h11
-rw-r--r--drivers/infiniband/hw/hfi1/vnic.h1
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c17
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c14
44 files changed, 2628 insertions, 1605 deletions
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 88085f6..66d538c 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -8,7 +8,7 @@
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
- eprom.o file_ops.o firmware.o \
+ eprom.o exp_rcv.o file_ops.o firmware.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index e2cd2cd..a97055d 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -335,10 +335,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
sde->cpu = cpu;
cpumask_clear(&msix->mask);
cpumask_set_cpu(cpu, &msix->mask);
- dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n",
- msix->msix.vector, irq_type_names[msix->type],
+ dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n",
+ msix->irq, irq_type_names[msix->type],
sde->this_idx, cpu);
- irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+ irq_set_affinity_hint(msix->irq, &msix->mask);
/*
* Set the new cpu in the hfi1_affinity_node and clean
@@ -387,7 +387,7 @@ static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix)
{
struct irq_affinity_notify *notify = &msix->notify;
- notify->irq = msix->msix.vector;
+ notify->irq = msix->irq;
notify->notify = hfi1_irq_notifier_notify;
notify->release = hfi1_irq_notifier_release;
@@ -472,10 +472,10 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
}
cpumask_set_cpu(cpu, &msix->mask);
- dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n",
- msix->msix.vector, irq_type_names[msix->type],
+ dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n",
+ msix->irq, irq_type_names[msix->type],
extra, cpu);
- irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+ irq_set_affinity_hint(msix->irq, &msix->mask);
if (msix->type == IRQ_SDMA) {
sde->cpu = cpu;
@@ -533,7 +533,7 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
}
}
- irq_set_affinity_hint(msix->msix.vector, NULL);
+ irq_set_affinity_hint(msix->irq, NULL);
cpumask_clear(&msix->mask);
mutex_unlock(&node_affinity.lock);
}
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index e78c7aa..2a1e374 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -75,24 +75,26 @@ struct hfi1_msix_entry;
/* Initialize non-HT cpu cores mask */
void init_real_cpu_mask(void);
/* Initialize driver affinity data */
-int hfi1_dev_affinity_init(struct hfi1_devdata *);
+int hfi1_dev_affinity_init(struct hfi1_devdata *dd);
/*
* Set IRQ affinity to a CPU. The function will determine the
* CPU and set the affinity to it.
*/
-int hfi1_get_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
+int hfi1_get_irq_affinity(struct hfi1_devdata *dd,
+ struct hfi1_msix_entry *msix);
/*
* Remove the IRQ's CPU affinity. This function also updates
* any internal CPU tracking data
*/
-void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
+void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
+ struct hfi1_msix_entry *msix);
/*
* Determine a CPU affinity for a user process, if the process does not
* have an affinity set yet.
*/
-int hfi1_get_proc_affinity(int);
+int hfi1_get_proc_affinity(int node);
/* Release a CPU used by a user process. */
-void hfi1_put_proc_affinity(int);
+void hfi1_put_proc_affinity(int cpu);
struct hfi1_affinity_node {
int node;
diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h
index 794e681..3f9a071 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -237,7 +237,7 @@ static inline void aspm_disable_all(struct hfi1_devdata *dd)
{
struct hfi1_ctxtdata *rcd;
unsigned long flags;
- unsigned i;
+ u16 i;
for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = dd->rcd[i];
@@ -256,7 +256,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd)
{
struct hfi1_ctxtdata *rcd;
unsigned long flags;
- unsigned i;
+ u16 i;
aspm_enable(dd);
@@ -284,7 +284,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
static inline void aspm_init(struct hfi1_devdata *dd)
{
- unsigned i;
+ u16 i;
spin_lock_init(&dd->aspm_lock);
dd->aspm_supported = aspm_hw_l1_supported(dd);
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 94b54850..101fbbb 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1012,14 +1012,15 @@ static struct flag_table dc8051_info_err_flags[] = {
*/
static struct flag_table dc8051_info_host_msg_flags[] = {
FLAG_ENTRY0("Host request done", 0x0001),
- FLAG_ENTRY0("BC SMA message", 0x0002),
- FLAG_ENTRY0("BC PWR_MGM message", 0x0004),
+ FLAG_ENTRY0("BC PWR_MGM message", 0x0002),
+ FLAG_ENTRY0("BC SMA message", 0x0004),
FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008),
FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010),
FLAG_ENTRY0("External device config request", 0x0020),
FLAG_ENTRY0("VerifyCap all frames received", 0x0040),
FLAG_ENTRY0("LinkUp achieved", 0x0080),
FLAG_ENTRY0("Link going down", 0x0100),
+ FLAG_ENTRY0("Link width downgraded", 0x0200),
};
static u32 encoded_size(u32 size);
@@ -1066,6 +1067,8 @@ static int thermal_init(struct hfi1_devdata *dd);
static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs);
+static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
+ int msecs);
static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
static void handle_temp_err(struct hfi1_devdata *dd);
@@ -1294,25 +1297,71 @@ CNTR_ELEM(#name, \
CNTR_SYNTH, \
access_ibp_##cntr)
+/**
+ * hfi_addr_from_offset - return addr for readq/writeq
+ * @dd - the dd device
+ * @offset - the offset of the CSR within bar0
+ *
+ * This routine selects the appropriate base address
+ * based on the indicated offset.
+ */
+static inline void __iomem *hfi1_addr_from_offset(
+ const struct hfi1_devdata *dd,
+ u32 offset)
+{
+ if (offset >= dd->base2_start)
+ return dd->kregbase2 + (offset - dd->base2_start);
+ return dd->kregbase1 + offset;
+}
+
+/**
+ * read_csr - read CSR at the indicated offset
+ * @dd - the dd device
+ * @offset - the offset of the CSR within bar0
+ *
+ * Return: the value read or all FF's if there
+ * is no mapping
+ */
u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
{
- if (dd->flags & HFI1_PRESENT) {
- return readq((void __iomem *)dd->kregbase + offset);
- }
+ if (dd->flags & HFI1_PRESENT)
+ return readq(hfi1_addr_from_offset(dd, offset));
return -1;
}
+/**
+ * write_csr - write CSR at the indicated offset
+ * @dd - the dd device
+ * @offset - the offset of the CSR within bar0
+ * @value - value to write
+ */
void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
{
- if (dd->flags & HFI1_PRESENT)
- writeq(value, (void __iomem *)dd->kregbase + offset);
+ if (dd->flags & HFI1_PRESENT) {
+ void __iomem *base = hfi1_addr_from_offset(dd, offset);
+
+ /* avoid write to RcvArray */
+ if (WARN_ON(offset >= RCV_ARRAY && offset < dd->base2_start))
+ return;
+ writeq(value, base);
+ }
}
+/**
+ * get_csr_addr - return te iomem address for offset
+ * @dd - the dd device
+ * @offset - the offset of the CSR within bar0
+ *
+ * Return: The iomem address to use in subsequent
+ * writeq/readq operations.
+ */
void __iomem *get_csr_addr(
- struct hfi1_devdata *dd,
+ const struct hfi1_devdata *dd,
u32 offset)
{
- return (void __iomem *)dd->kregbase + offset;
+ if (dd->flags & HFI1_PRESENT)
+ return hfi1_addr_from_offset(dd, offset);
+ return NULL;
}
static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
@@ -5496,7 +5545,7 @@ static void update_rcverr_timer(unsigned long opaque)
set_link_down_reason(
ppd, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
- queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
+ queue_work(ppd->link_wq, &ppd->link_bounce_work);
}
dd->rcv_ovfl_cnt = (u32)cur_ovfl_cnt;
@@ -6051,7 +6100,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
* will not happen. We have to do it here
* before turning the DC off.
*/
- queue_work(ppd->hfi1_wq, &ppd->link_down_work);
+ queue_work(ppd->link_wq, &ppd->link_down_work);
}
} else {
dd_dev_info(dd, "%s: QSFP module inserted\n",
@@ -6086,7 +6135,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
/* Schedule the QSFP work only if there is a cable attached. */
if (qsfp_mod_present(ppd))
- queue_work(ppd->hfi1_wq, &ppd->qsfp_info.qsfp_work);
+ queue_work(ppd->link_wq, &ppd->qsfp_info.qsfp_work);
}
static int request_host_lcb_access(struct hfi1_devdata *dd)
@@ -6741,7 +6790,7 @@ static void rxe_freeze(struct hfi1_devdata *dd)
/* disable all receive contexts */
for (i = 0; i < dd->num_rcv_contexts; i++)
- hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, i);
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, dd->rcd[i]);
}
/*
@@ -6753,7 +6802,7 @@ static void rxe_freeze(struct hfi1_devdata *dd)
static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
{
u32 rcvmask;
- int i;
+ u16 i;
/* enable all kernel contexts */
for (i = 0; i < dd->num_rcv_contexts; i++) {
@@ -6765,9 +6814,9 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
rcvmask = HFI1_RCVCTRL_CTXT_ENB;
/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
- rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
+ rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ?
HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
- hfi1_rcvctrl(dd, rcvmask, i);
+ hfi1_rcvctrl(dd, rcvmask, rcd);
}
/* enable port */
@@ -6906,7 +6955,7 @@ static void reset_neighbor_info(struct hfi1_pportdata *ppd)
static const char * const link_down_reason_strs[] = {
[OPA_LINKDOWN_REASON_NONE] = "None",
- [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Recive error 0",
+ [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Receive error 0",
[OPA_LINKDOWN_REASON_BAD_PKT_LEN] = "Bad packet length",
[OPA_LINKDOWN_REASON_PKT_TOO_LONG] = "Packet too long",
[OPA_LINKDOWN_REASON_PKT_TOO_SHORT] = "Packet too short",
@@ -6996,6 +7045,7 @@ void handle_link_down(struct work_struct *work)
/* Go offline first, then deal with reading/writing through 8051 */
was_up = !!(ppd->host_link_state & HLS_UP);
set_link_state(ppd, HLS_DN_OFFLINE);
+ xchg(&ppd->is_link_down_queued, 0);
if (was_up) {
lcl_reason = 0;
@@ -7689,12 +7739,12 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
host_msg &= ~(u64)HOST_REQ_DONE;
}
if (host_msg & BC_SMA_MSG) {
- queue_work(ppd->hfi1_wq, &ppd->sma_message_work);
+ queue_work(ppd->link_wq, &ppd->sma_message_work);
host_msg &= ~(u64)BC_SMA_MSG;
}
if (host_msg & LINKUP_ACHIEVED) {
dd_dev_info(dd, "8051: Link up\n");
- queue_work(ppd->hfi1_wq, &ppd->link_up_work);
+ queue_work(ppd->link_wq, &ppd->link_up_work);
host_msg &= ~(u64)LINKUP_ACHIEVED;
}
if (host_msg & EXT_DEVICE_CFG_REQ) {
@@ -7702,7 +7752,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
}
if (host_msg & VERIFY_CAP_FRAME) {
- queue_work(ppd->hfi1_wq, &ppd->link_vc_work);
+ queue_work(ppd->link_wq, &ppd->link_vc_work);
host_msg &= ~(u64)VERIFY_CAP_FRAME;
}
if (host_msg & LINK_GOING_DOWN) {
@@ -7717,7 +7767,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
host_msg &= ~(u64)LINK_GOING_DOWN;
}
if (host_msg & LINK_WIDTH_DOWNGRADED) {
- queue_work(ppd->hfi1_wq, &ppd->link_downgrade_work);
+ queue_work(ppd->link_wq, &ppd->link_downgrade_work);
host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
}
if (host_msg) {
@@ -7756,11 +7806,12 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
*/
if ((ppd->host_link_state &
(HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) ||
- ppd->link_enabled == 0) {
+ ppd->link_enabled == 0 || ppd->is_link_down_queued) {
dd_dev_info(dd, "%s: not queuing link down\n",
__func__);
} else {
- queue_work(ppd->hfi1_wq, &ppd->link_down_work);
+ xchg(&ppd->is_link_down_queued, 1);
+ queue_work(ppd->link_wq, &ppd->link_down_work);
}
}
}
@@ -7968,7 +8019,7 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
dd_dev_info_ratelimited(dd, "%s: PortErrorAction bounce\n",
__func__);
set_link_down_reason(ppd, lcl_reason, 0, lcl_reason);
- queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
+ queue_work(ppd->link_wq, &ppd->link_bounce_work);
}
}
@@ -8781,6 +8832,20 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
& REMOTE_DEVICE_REV_MASK;
}
+int write_host_interface_version(struct hfi1_devdata *dd, u8 version)
+{
+ u32 frame;
+ u32 mask;
+
+ mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT);
+ read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame);
+ /* Clear, then set field */
+ frame &= ~mask;
+ frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT);
+ return load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG,
+ frame);
+}
+
void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
u8 *ver_patch)
{
@@ -9257,12 +9322,6 @@ int start_link(struct hfi1_pportdata *ppd)
*/
tune_serdes(ppd);
- if (!ppd->link_enabled) {
- dd_dev_info(ppd->dd,
- "%s: stopping link start because link is disabled\n",
- __func__);
- return 0;
- }
if (!ppd->driver_link_ready) {
dd_dev_info(ppd->dd,
"%s: stopping link start because driver is not ready\n",
@@ -9373,13 +9432,13 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
(qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
- dd_dev_info(dd, "%s: QSFP cable temperature too high\n",
- __func__);
+ dd_dev_err(dd, "%s: QSFP cable temperature too high\n",
+ __func__);
if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
(qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
- dd_dev_info(dd, "%s: QSFP cable temperature too low\n",
- __func__);
+ dd_dev_err(dd, "%s: QSFP cable temperature too low\n",
+ __func__);
/*
* The remaining alarms/warnings don't matter if the link is down.
@@ -9389,75 +9448,75 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
(qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
- dd_dev_info(dd, "%s: QSFP supply voltage too high\n",
- __func__);
+ dd_dev_err(dd, "%s: QSFP supply voltage too high\n",
+ __func__);
if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
(qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
- dd_dev_info(dd, "%s: QSFP supply voltage too low\n",
- __func__);
+ dd_dev_err(dd, "%s: QSFP supply voltage too low\n",
+ __func__);
/* Byte 2 is vendor specific */
if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
(qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable RX channel 1/2 power too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable RX channel 1/2 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
(qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable RX channel 1/2 power too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable RX channel 1/2 power too low\n",
+ __func__);
if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
(qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable RX channel 3/4 power too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable RX channel 3/4 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
(qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable RX channel 3/4 power too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable RX channel 3/4 power too low\n",
+ __func__);
if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
(qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too high\n",
+ __func__);
if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
(qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too low\n",
+ __func__);
if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
(qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too high\n",
+ __func__);
if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
(qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too low\n",
+ __func__);
if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
(qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 1/2 power too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 1/2 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
(qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 1/2 power too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 1/2 power too low\n",
+ __func__);
if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
(qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 3/4 power too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 3/4 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
(qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 3/4 power too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 3/4 power too low\n",
+ __func__);
/* Bytes 9-10 and 11-12 are reserved */
/* Bytes 13-15 are vendor specific */
@@ -9480,6 +9539,13 @@ void qsfp_event(struct work_struct *work)
if (!qsfp_mod_present(ppd))
return;
+ if (ppd->host_link_state == HLS_DN_DISABLE) {
+ dd_dev_info(ppd->dd,
+ "%s: stopping link start because link is disabled\n",
+ __func__);
+ return;
+ }
+
/*
* Turn DC back on after cable has been re-inserted. Up until
* now, the DC has been in reset to save power.
@@ -9635,7 +9701,7 @@ static void try_start_link(struct hfi1_pportdata *ppd)
"QSFP not responding, waiting and retrying %d\n",
(int)ppd->qsfp_retry_count);
ppd->qsfp_retry_count++;
- queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work,
+ queue_delayed_work(ppd->link_wq, &ppd->start_link_work,
msecs_to_jiffies(QSFP_RETRY_WAIT));
return;
}
@@ -9742,17 +9808,6 @@ static inline int init_cpu_counters(struct hfi1_devdata *dd)
return 0;
}
-static const char * const pt_names[] = {
- "expected",
- "eager",
- "invalid"
-};
-
-static const char *pt_name(u32 type)
-{
- return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type];
-}
-
/*
* index is the index into the receive array
*/
@@ -9760,35 +9815,34 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
u32 type, unsigned long pa, u16 order)
{
u64 reg;
- void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc :
- (dd->kregbase + RCV_ARRAY));
if (!(dd->flags & HFI1_PRESENT))
goto done;
- if (type == PT_INVALID) {
+ if (type == PT_INVALID || type == PT_INVALID_FLUSH) {
pa = 0;
+ order = 0;
} else if (type > PT_INVALID) {
dd_dev_err(dd,
"unexpected receive array type %u for index %u, not handled\n",
type, index);
goto done;
}
-
- hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx",
- pt_name(type), index, pa, (unsigned long)order);
+ trace_hfi1_put_tid(dd, index, type, pa, order);
#define RT_ADDR_SHIFT 12 /* 4KB kernel address boundary */
reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK
| (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
| ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
<< RCV_ARRAY_RT_ADDR_SHIFT;
- writeq(reg, base + (index * 8));
+ trace_hfi1_write_rcvarray(dd->rcvarray_wc + (index * 8), reg);
+ writeq(reg, dd->rcvarray_wc + (index * 8));
- if (type == PT_EAGER)
+ if (type == PT_EAGER || type == PT_INVALID_FLUSH || (index & 3) == 3)
/*
- * Eager entries are written one-by-one so we have to push them
- * after we write the entry.
+ * Eager entries are written and flushed
+ *
+ * Expected entries are flushed every 4 writes
*/
flush_wc();
done:
@@ -9810,15 +9864,6 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
}
-struct ib_header *hfi1_get_msgheader(
- struct hfi1_devdata *dd, __le32 *rhf_addr)
-{
- u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
-
- return (struct ib_header *)
- (rhf_addr - dd->rhf_offset + offset);
-}
-
static const char * const ib_cfg_name_strings[] = {
"HFI1_IB_CFG_LIDLMC",
"HFI1_IB_CFG_LWID_DG_ENB",
@@ -10037,28 +10082,6 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
sdma_update_lmc(dd, mask, ppd->lid);
}
-static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
-{
- unsigned long timeout;
- u32 curr_state;
-
- timeout = jiffies + msecs_to_jiffies(msecs);
- while (1) {
- curr_state = read_physical_state(dd);
- if (curr_state == state)
- break;
- if (time_after(jiffies, timeout)) {
- dd_dev_err(dd,
- "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
- state, curr_state);
- return -ETIMEDOUT;
- }
- usleep_range(1950, 2050); /* sleep 2ms-ish */
- }
-
- return 0;
-}
-
static const char *state_completed_string(u32 completed)
{
static const char * const state_completed[] = {
@@ -10253,49 +10276,35 @@ static void force_logical_link_state_down(struct hfi1_pportdata *ppd)
static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
{
struct hfi1_devdata *dd = ppd->dd;
- u32 pstate, previous_state;
+ u32 previous_state;
int ret;
- int do_transition;
- int do_wait;
update_lcb_cache(dd);
previous_state = ppd->host_link_state;
ppd->host_link_state = HLS_GOING_OFFLINE;
- pstate = read_physical_state(dd);
- if (pstate == PLS_OFFLINE) {
- do_transition = 0; /* in right state */
- do_wait = 0; /* ...no need to wait */
- } else if ((pstate & 0xf0) == PLS_OFFLINE) {
- do_transition = 0; /* in an offline transient state */
- do_wait = 1; /* ...wait for it to settle */
- } else {
- do_transition = 1; /* need to move to offline */
- do_wait = 1; /* ...will need to wait */
- }
- if (do_transition) {
- ret = set_physical_link_state(dd,
- (rem_reason << 8) | PLS_OFFLINE);
+ /* start offline transition */
+ ret = set_physical_link_state(dd, (rem_reason << 8) | PLS_OFFLINE);
- if (ret != HCMD_SUCCESS) {
- dd_dev_err(dd,
- "Failed to transition to Offline link state, return %d\n",
- ret);
- return -EINVAL;
- }
- if (ppd->offline_disabled_reason ==
- HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))
- ppd->offline_disabled_reason =
- HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
+ if (ret != HCMD_SUCCESS) {
+ dd_dev_err(dd,
+ "Failed to transition to Offline link state, return %d\n",
+ ret);
+ return -EINVAL;
}
+ if (ppd->offline_disabled_reason ==
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))
+ ppd->offline_disabled_reason =
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
- if (do_wait) {
- /* it can take a while for the link to go down */
- ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000);
- if (ret < 0)
- return ret;
- }
+ /*
+ * Wait for offline transition. It can take a while for
+ * the link to go down.
+ */
+ ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000);
+ if (ret < 0)
+ return ret;
/*
* Now in charge of LCB - must be after the physical state is
@@ -10484,6 +10493,14 @@ void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
}
/*
+ * Verify if BCT for data VLs is non-zero.
+ */
+static inline bool data_vls_operational(struct hfi1_pportdata *ppd)
+{
+ return !!ppd->actual_vls_operational;
+}
+
+/*
* Change the physical and/or logical link state.
*
* Do not call this routine while inside an interrupt. It contains
@@ -10545,38 +10562,58 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
goto unexpected;
}
+ /*
+ * Wait for Link_Up physical state.
+ * Physical and Logical states should already be
+ * be transitioned to LinkUp and LinkInit respectively.
+ */
+ ret = wait_physical_linkstate(ppd, PLS_LINKUP, 1000);
+ if (ret) {
+ dd_dev_err(dd,
+ "%s: physical state did not change to LINK-UP\n",
+ __func__);
+ break;
+ }
+
ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
if (ret) {
dd_dev_err(dd,
"%s: logical state did not change to INIT\n",
__func__);
- } else {
- /* clear old transient LINKINIT_REASON code */
- if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
- ppd->linkinit_reason =
- OPA_LINKINIT_REASON_LINKUP;
+ break;
+ }
- /* enable the port */
- add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
+ /* clear old transient LINKINIT_REASON code */
+ if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
+ ppd->linkinit_reason =
+ OPA_LINKINIT_REASON_LINKUP;
- handle_linkup_change(dd, 1);
- ppd->host_link_state = HLS_UP_INIT;
- }
+ /* enable the port */
+ add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
+
+ handle_linkup_change(dd, 1);
+ ppd->host_link_state = HLS_UP_INIT;
break;
case HLS_UP_ARMED:
if (ppd->host_link_state != HLS_UP_INIT)
goto unexpected;
- ppd->host_link_state = HLS_UP_ARMED;
+ if (!data_vls_operational(ppd)) {
+ dd_dev_err(dd,
+ "%s: data VLs not operational\n", __func__);
+ ret = -EINVAL;
+ break;
+ }
+
set_logical_state(dd, LSTATE_ARMED);
ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000);
if (ret) {
- /* logical state didn't change, stay at init */
- ppd->host_link_state = HLS_UP_INIT;
dd_dev_err(dd,
"%s: logical state did not change to ARMED\n",
__func__);
+ break;
}
+ ppd->host_link_state = HLS_UP_ARMED;
/*
* The simulator does not currently implement SMA messages,
* so neighbor_normal is not set. Set it here when we first
@@ -10589,18 +10626,16 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
if (ppd->host_link_state != HLS_UP_ARMED)
goto unexpected;
- ppd->host_link_state = HLS_UP_ACTIVE;
set_logical_state(dd, LSTATE_ACTIVE);
ret = wait_logical_linkstate(ppd, IB_PORT_ACTIVE, 1000);
if (ret) {
- /* logical state didn't change, stay at armed */
- ppd->host_link_state = HLS_UP_ARMED;
dd_dev_err(dd,
"%s: logical state did not change to ACTIVE\n",
__func__);
} else {
/* tell all engines to go running */
sdma_all_running(dd);
+ ppd->host_link_state = HLS_UP_ACTIVE;
/* Signal the IB layer that the port has went active */
event.device = &dd->verbs_dev.rdi.ibdev;
@@ -10658,6 +10693,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
*/
if (ret)
goto_offline(ppd, 0);
+ else
+ cache_physical_state(ppd);
break;
case HLS_DN_DISABLE:
/* link is disabled */
@@ -10682,6 +10719,13 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
ret = -EINVAL;
break;
}
+ ret = wait_physical_linkstate(ppd, PLS_DISABLED, 10000);
+ if (ret) {
+ dd_dev_err(dd,
+ "%s: physical state did not change to DISABLED\n",
+ __func__);
+ break;
+ }
dc_shutdown(dd);
}
ppd->host_link_state = HLS_DN_DISABLE;
@@ -10699,6 +10743,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
if (ppd->host_link_state != HLS_DN_POLL)
goto unexpected;
ppd->host_link_state = HLS_VERIFY_CAP;
+ cache_physical_state(ppd);
break;
case HLS_GOING_UP:
if (ppd->host_link_state != HLS_VERIFY_CAP)
@@ -11693,16 +11738,18 @@ static u32 encoded_size(u32 size)
return 0x1; /* if invalid, go with the minimum size */
}
-void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
+void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
+ struct hfi1_ctxtdata *rcd)
{
- struct hfi1_ctxtdata *rcd;
u64 rcvctrl, reg;
int did_enable = 0;
+ u16 ctxt;
- rcd = dd->rcd[ctxt];
if (!rcd)
return;
+ ctxt = rcd->ctxt;
+
hfi1_cdbg(RCVCTRL, "ctxt %d op 0x%x", ctxt, op);
rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
@@ -12672,21 +12719,56 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
return -ETIMEDOUT;
}
-u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
+/*
+ * Read the physical hardware link state and set the driver's cached value
+ * of it.
+ */
+void cache_physical_state(struct hfi1_pportdata *ppd)
{
- u32 pstate;
+ u32 read_pstate;
u32 ib_pstate;
- pstate = read_physical_state(ppd->dd);
- ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
- if (ppd->last_pstate != ib_pstate) {
+ read_pstate = read_physical_state(ppd->dd);
+ ib_pstate = chip_to_opa_pstate(ppd->dd, read_pstate);
+ /* check if OPA pstate changed */
+ if (chip_to_opa_pstate(ppd->dd, ppd->pstate) != ib_pstate) {
dd_dev_info(ppd->dd,
"%s: physical state changed to %s (0x%x), phy 0x%x\n",
__func__, opa_pstate_name(ib_pstate), ib_pstate,
- pstate);
- ppd->last_pstate = ib_pstate;
+ read_pstate);
}
- return ib_pstate;
+ ppd->pstate = read_pstate;
+}
+
+/*
+ * wait_physical_linkstate - wait for an physical link state change to occur
+ * @ppd: port device
+ * @state: the state to wait for
+ * @msecs: the number of milliseconds to wait
+ *
+ * Wait up to msecs milliseconds for physical link state change to occur.
+ * Returns 0 if state reached, otherwise -ETIMEDOUT.
+ */
+static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
+ int msecs)
+{
+ unsigned long timeout;
+
+ timeout = jiffies + msecs_to_jiffies(msecs);
+ while (1) {
+ cache_physical_state(ppd);
+ if (ppd->pstate == state)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(ppd->dd,
+ "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
+ state, ppd->pstate);
+ return -ETIMEDOUT;
+ }
+ usleep_range(1950, 2050); /* sleep 2ms-ish */
+ }
+
+ return 0;
}
#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
@@ -12809,30 +12891,24 @@ static void clean_up_interrupts(struct hfi1_devdata *dd)
for (i = 0; i < dd->num_msix_entries; i++, me++) {
if (!me->arg) /* => no irq, no affinity */
continue;
- hfi1_put_irq_affinity(dd, &dd->msix_entries[i]);
- free_irq(me->msix.vector, me->arg);
+ hfi1_put_irq_affinity(dd, me);
+ free_irq(me->irq, me->arg);
}
+
+ /* clean structures */
+ kfree(dd->msix_entries);
+ dd->msix_entries = NULL;
+ dd->num_msix_entries = 0;
} else {
/* INTx */
if (dd->requested_intx_irq) {
free_irq(dd->pcidev->irq, dd);
dd->requested_intx_irq = 0;
}
- }
-
- /* turn off interrupts */
- if (dd->num_msix_entries) {
- /* MSI-X */
- pci_disable_msix(dd->pcidev);
- } else {
- /* INTx */
disable_intx(dd->pcidev);
}
- /* clean structures */
- kfree(dd->msix_entries);
- dd->msix_entries = NULL;
- dd->num_msix_entries = 0;
+ pci_free_irq_vectors(dd->pcidev);
}
/*
@@ -12986,13 +13062,21 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
continue;
/* make sure the name is terminated */
me->name[sizeof(me->name) - 1] = 0;
+ me->irq = pci_irq_vector(dd->pcidev, i);
+ /*
+ * On err return me->irq. Don't need to clear this
+ * because 'arg' has not been set, and cleanup will
+ * do the right thing.
+ */
+ if (me->irq < 0)
+ return me->irq;
- ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
+ ret = request_threaded_irq(me->irq, handler, thread, 0,
me->name, arg);
if (ret) {
dd_dev_err(dd,
- "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
- err_info, me->msix.vector, idx, ret);
+ "unable to allocate %s interrupt, irq %d, index %d, err %d\n",
+ err_info, me->irq, idx, ret);
return ret;
}
/*
@@ -13003,8 +13087,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
ret = hfi1_get_irq_affinity(dd, me);
if (ret)
- dd_dev_err(dd,
- "unable to pin IRQ %d\n", ret);
+ dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
}
return ret;
@@ -13023,7 +13106,7 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
- synchronize_irq(me->msix.vector);
+ synchronize_irq(me->irq);
}
}
@@ -13036,7 +13119,7 @@ void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
return;
hfi1_put_irq_affinity(dd, me);
- free_irq(me->msix.vector, me->arg);
+ free_irq(me->irq, me->arg);
me->arg = NULL;
}
@@ -13064,14 +13147,19 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
me->name[sizeof(me->name) - 1] = 0;
me->type = IRQ_RCVCTXT;
-
+ me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr);
+ if (me->irq < 0) {
+ dd_dev_err(dd, "vnic irq vector request (idx %d) fail %d\n",
+ idx, me->irq);
+ return;
+ }
remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
- ret = request_threaded_irq(me->msix.vector, receive_context_interrupt,
+ ret = request_threaded_irq(me->irq, receive_context_interrupt,
receive_context_thread, 0, me->name, arg);
if (ret) {
- dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n",
- me->msix.vector, idx, ret);
+ dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n",
+ me->irq, idx, ret);
return;
}
/*
@@ -13084,7 +13172,7 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
if (ret) {
dd_dev_err(dd,
"unable to pin IRQ %d\n", ret);
- free_irq(me->msix.vector, me->arg);
+ free_irq(me->irq, me->arg);
}
}
@@ -13107,9 +13195,8 @@ static void reset_interrupts(struct hfi1_devdata *dd)
static int set_up_interrupts(struct hfi1_devdata *dd)
{
- struct hfi1_msix_entry *entries;
- u32 total, request;
- int i, ret;
+ u32 total;
+ int ret, request;
int single_interrupt = 0; /* we expect to have all the interrupts */
/*
@@ -13121,39 +13208,31 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
*/
total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT;
- entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
- if (!entries) {
- ret = -ENOMEM;
- goto fail;
- }
- /* 1-1 MSI-X entry assignment */
- for (i = 0; i < total; i++)
- entries[i].msix.entry = i;
-
/* ask for MSI-X interrupts */
- request = total;
- request_msix(dd, &request, entries);
-
- if (request == 0) {
+ request = request_msix(dd, total);
+ if (request < 0) {
+ ret = request;
+ goto fail;
+ } else if (request == 0) {
/* using INTx */
/* dd->num_msix_entries already zero */
- kfree(entries);
single_interrupt = 1;
dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
+ } else if (request < total) {
+ /* using MSI-X, with reduced interrupts */
+ dd_dev_err(dd, "reduced interrupt found, wanted %u, got %u\n",
+ total, request);
+ ret = -EINVAL;
+ goto fail;
} else {
- /* using MSI-X */
- dd->num_msix_entries = request;
- dd->msix_entries = entries;
-
- if (request != total) {
- /* using MSI-X, with reduced interrupts */
- dd_dev_err(
- dd,
- "cannot handle reduced interrupt case, want %u, got %u\n",
- total, request);
- ret = -EINVAL;
+ dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
+ GFP_KERNEL);
+ if (!dd->msix_entries) {
+ ret = -ENOMEM;
goto fail;
}
+ /* using MSI-X */
+ dd->num_msix_entries = total;
dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
}
@@ -13396,8 +13475,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
/* RcvArray */
for (i = 0; i < dd->chip_rcv_array_count; i++)
- write_csr(dd, RCV_ARRAY + (8 * i),
- RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
+ hfi1_put_tid(dd, i, PT_INVALID_FLUSH, 0, 0);
/* RcvQPMapTable */
for (i = 0; i < 32; i++)
@@ -13831,9 +13909,10 @@ static void init_sc2vl_tables(struct hfi1_devdata *dd)
* a reset following the (possible) FLR in this routine.
*
*/
-static void init_chip(struct hfi1_devdata *dd)
+static int init_chip(struct hfi1_devdata *dd)
{
int i;
+ int ret = 0;
/*
* Put the HFI CSRs in a known state.
@@ -13881,12 +13960,22 @@ static void init_chip(struct hfi1_devdata *dd)
pcie_flr(dd->pcidev);
/* restore command and BARs */
- restore_pci_variables(dd);
+ ret = restore_pci_variables(dd);
+ if (ret) {
+ dd_dev_err(dd, "%s: Could not restore PCI variables\n",
+ __func__);
+ return ret;
+ }
if (is_ax(dd)) {
dd_dev_info(dd, "Resetting CSRs with FLR\n");
pcie_flr(dd->pcidev);
- restore_pci_variables(dd);
+ ret = restore_pci_variables(dd);
+ if (ret) {
+ dd_dev_err(dd, "%s: Could not restore PCI variables\n",
+ __func__);
+ return ret;
+ }
}
} else {
dd_dev_info(dd, "Resetting CSRs with writes\n");
@@ -13914,6 +14003,7 @@ static void init_chip(struct hfi1_devdata *dd)
write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
init_chip_resources(dd);
+ return ret;
}
static void init_early_variables(struct hfi1_devdata *dd)
@@ -14470,99 +14560,86 @@ static void init_txe(struct hfi1_devdata *dd)
write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE);
}
-int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey)
+int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd,
+ u16 jkey)
{
- struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
- unsigned sctxt;
- int ret = 0;
+ u8 hw_ctxt;
u64 reg;
- if (!rcd || !rcd->sc) {
- ret = -EINVAL;
- goto done;
- }
- sctxt = rcd->sc->hw_context;
+ if (!rcd || !rcd->sc)
+ return -EINVAL;
+
+ hw_ctxt = rcd->sc->hw_context;
reg = SEND_CTXT_CHECK_JOB_KEY_MASK_SMASK | /* mask is always 1's */
((jkey & SEND_CTXT_CHECK_JOB_KEY_VALUE_MASK) <<
SEND_CTXT_CHECK_JOB_KEY_VALUE_SHIFT);
/* JOB_KEY_ALLOW_PERMISSIVE is not allowed by default */
if (HFI1_CAP_KGET_MASK(rcd->flags, ALLOW_PERM_JKEY))
reg |= SEND_CTXT_CHECK_JOB_KEY_ALLOW_PERMISSIVE_SMASK;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg);
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_JOB_KEY, reg);
/*
* Enable send-side J_KEY integrity check, unless this is A0 h/w
*/
if (!is_ax(dd)) {
- reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
+ reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE);
reg |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg);
}
/* Enable J_KEY check on receive context. */
reg = RCV_KEY_CTRL_JOB_KEY_ENABLE_SMASK |
((jkey & RCV_KEY_CTRL_JOB_KEY_VALUE_MASK) <<
RCV_KEY_CTRL_JOB_KEY_VALUE_SHIFT);
- write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, reg);
-done:
- return ret;
+ write_kctxt_csr(dd, rcd->ctxt, RCV_KEY_CTRL, reg);
+
+ return 0;
}
-int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt)
+int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
{
- struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
- unsigned sctxt;
- int ret = 0;
+ u8 hw_ctxt;
u64 reg;
- if (!rcd || !rcd->sc) {
- ret = -EINVAL;
- goto done;
- }
- sctxt = rcd->sc->hw_context;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, 0);
+ if (!rcd || !rcd->sc)
+ return -EINVAL;
+
+ hw_ctxt = rcd->sc->hw_context;
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_JOB_KEY, 0);
/*
* Disable send-side J_KEY integrity check, unless this is A0 h/w.
* This check would not have been enabled for A0 h/w, see
* set_ctxt_jkey().
*/
if (!is_ax(dd)) {
- reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
+ reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE);
reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg);
}
/* Turn off the J_KEY on the receive side */
- write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, 0);
-done:
- return ret;
+ write_kctxt_csr(dd, rcd->ctxt, RCV_KEY_CTRL, 0);
+
+ return 0;
}
-int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
+int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd,
+ u16 pkey)
{
- struct hfi1_ctxtdata *rcd;
- unsigned sctxt;
- int ret = 0;
+ u8 hw_ctxt;
u64 reg;
- if (ctxt < dd->num_rcv_contexts) {
- rcd = dd->rcd[ctxt];
- } else {
- ret = -EINVAL;
- goto done;
- }
- if (!rcd || !rcd->sc) {
- ret = -EINVAL;
- goto done;
- }
- sctxt = rcd->sc->hw_context;
+ if (!rcd || !rcd->sc)
+ return -EINVAL;
+
+ hw_ctxt = rcd->sc->hw_context;
reg = ((u64)pkey & SEND_CTXT_CHECK_PARTITION_KEY_VALUE_MASK) <<
SEND_CTXT_CHECK_PARTITION_KEY_VALUE_SHIFT;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
- reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
+ reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE);
reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
reg &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
-done:
- return ret;
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg);
+
+ return 0;
}
int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt)
@@ -14573,9 +14650,6 @@ int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt)
if (!ctxt || !ctxt->sc)
return -EINVAL;
- if (ctxt->ctxt >= dd->num_rcv_contexts)
- return -EINVAL;
-
hw_ctxt = ctxt->sc->hw_context;
reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE);
reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
@@ -14773,7 +14847,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
}
ppd->vls_supported = num_vls;
ppd->vls_operational = ppd->vls_supported;
- ppd->actual_vls_operational = ppd->vls_supported;
/* Set the default MTU. */
for (vl = 0; vl < num_vls; vl++)
dd->vld[vl].mtu = hfi1_max_mtu;
@@ -14793,7 +14866,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
/* start in offline */
ppd->host_link_state = HLS_DN_OFFLINE;
init_vl_arb_caches(ppd);
- ppd->last_pstate = 0xff; /* invalid value */
+ ppd->pstate = PLS_OFFLINE;
}
dd->link_default = HLS_DN_POLL;
@@ -14807,6 +14880,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret < 0)
goto bail_free;
+ /* Save PCI space registers to rewrite after device reset */
+ ret = save_pci_variables(dd);
+ if (ret < 0)
+ goto bail_cleanup;
+
/* verify that reads actually work, save revision for reset check */
dd->revision = read_csr(dd, CCE_REVISION);
if (dd->revision == ~(u64)0) {
@@ -14899,7 +14977,9 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
goto bail_cleanup;
/* obtain chip sizes, reset chip CSRs */
- init_chip(dd);
+ ret = init_chip(dd);
+ if (ret)
+ goto bail_cleanup;
/* read in the PCIe link speed information */
ret = pcie_speeds(dd);
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index cbe455d..6a0c691 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -384,6 +384,7 @@
#define VERIFY_CAP_LOCAL_FABRIC 0x08
#define VERIFY_CAP_LOCAL_LINK_WIDTH 0x09
#define LOCAL_DEVICE_ID 0x0a
+#define RESERVED_REGISTERS 0x0b
#define LOCAL_LNI_INFO 0x0c
#define REMOTE_LNI_INFO 0x0d
#define MISC_STATUS 0x0e
@@ -506,6 +507,9 @@
#define DOWN_REMOTE_REASON_SHIFT 16
#define DOWN_REMOTE_REASON_MASK 0xff
+#define HOST_INTERFACE_VERSION_SHIFT 16
+#define HOST_INTERFACE_VERSION_MASK 0xff
+
/* verify capability PHY power management bits */
#define PWRM_BER_CONTROL 0x1
#define PWRM_BANDWIDTH_CONTROL 0x2
@@ -605,11 +609,11 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 offset, u64 *data);
int write_lcb_csr(struct hfi1_devdata *dd, u32 offset, u64 data);
void __iomem *get_csr_addr(
- struct hfi1_devdata *dd,
+ const struct hfi1_devdata *dd,
u32 offset);
static inline void __iomem *get_kctxt_csr_addr(
- struct hfi1_devdata *dd,
+ const struct hfi1_devdata *dd,
int ctxt,
u32 offset0)
{
@@ -704,6 +708,7 @@ int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result);
/* chip.c */
void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
u8 *ver_patch);
+int write_host_interface_version(struct hfi1_devdata *dd, u8 version);
void read_guid(struct hfi1_devdata *dd);
int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout);
void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
@@ -744,6 +749,7 @@ int is_bx(struct hfi1_devdata *dd);
u32 read_physical_state(struct hfi1_devdata *dd);
u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate);
u32 get_logical_state(struct hfi1_pportdata *ppd);
+void cache_physical_state(struct hfi1_pportdata *ppd);
const char *opa_lstate_name(u32 lstate);
const char *opa_pstate_name(u32 pstate);
u32 driver_physical_state(struct hfi1_pportdata *ppd);
@@ -1347,21 +1353,21 @@ enum {
u64 get_all_cpu_total(u64 __percpu *cntr);
void hfi1_start_cleanup(struct hfi1_devdata *dd);
void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
-struct ib_header *hfi1_get_msgheader(
- struct hfi1_devdata *dd, __le32 *rhf_addr);
void hfi1_init_ctxt(struct send_context *sc);
void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
u32 type, unsigned long pa, u16 order);
void hfi1_quiet_serdes(struct hfi1_pportdata *ppd);
-void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt);
+void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
+ struct hfi1_ctxtdata *rcd);
u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp);
u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp);
-u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd);
int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which);
int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val);
-int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey);
-int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt);
-int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey);
+int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd,
+ u16 jkey);
+int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt);
+int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt,
+ u16 pkey);
int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt);
void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index 995d62c..ba9ab97 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -325,6 +325,7 @@ struct diag_pkt {
#define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
/* misc. */
+#define SC15_PACKET 0xF
#define SIZE_OF_CRC 1
#define LIM_MGMT_P_KEY 0x7FFF
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index a50870e..0b7ca0e 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -96,7 +96,6 @@ MODULE_PARM_DESC(cap_mask, "Bit mask of enabled/disabled HW features");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("Intel Omni-Path Architecture driver");
-MODULE_VERSION(HFI1_DRIVER_VERSION);
/*
* MAX_PKT_RCV is the max # if packets processed per receive interrupt.
@@ -196,7 +195,7 @@ int hfi1_count_active_units(void)
spin_lock_irqsave(&hfi1_devs_lock, flags);
list_for_each_entry(dd, &hfi1_dev_list, list) {
- if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase)
+ if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase1)
continue;
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
@@ -224,6 +223,20 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf,
(offset * RCV_BUF_BLOCK_SIZE));
}
+static inline void *hfi1_get_header(struct hfi1_devdata *dd,
+ __le32 *rhf_addr)
+{
+ u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
+
+ return (void *)(rhf_addr - dd->rhf_offset + offset);
+}
+
+static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd,
+ __le32 *rhf_addr)
+{
+ return (struct ib_header *)hfi1_get_header(dd, rhf_addr);
+}
+
/*
* Validate and encode the a given RcvArray Buffer size.
* The function will check whether the given size falls within
@@ -249,7 +262,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
{
struct ib_header *rhdr = packet->hdr;
u32 rte = rhf_rcv_type_err(packet->rhf);
- int lnh = ib_get_lnh(rhdr);
+ u8 lnh = ib_get_lnh(rhdr);
+ bool has_grh = false;
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct hfi1_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
@@ -257,37 +271,42 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR))
return;
+ if (lnh == HFI1_LRH_BTH) {
+ packet->ohdr = &rhdr->u.oth;
+ } else if (lnh == HFI1_LRH_GRH) {
+ has_grh = true;
+ packet->ohdr = &rhdr->u.l.oth;
+ packet->grh = &rhdr->u.l.grh;
+ } else {
+ goto drop;
+ }
+
if (packet->rhf & RHF_TID_ERR) {
/* For TIDERR and RC QPs preemptively schedule a NAK */
- struct ib_other_headers *ohdr = NULL;
u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
- u16 lid = ib_get_dlid(rhdr);
+ u32 dlid = ib_get_dlid(rhdr);
u32 qp_num;
- u32 rcv_flags = 0;
+ u32 mlid_base = be16_to_cpu(IB_MULTICAST_LID_BASE);
/* Sanity check packet */
if (tlen < 24)
goto drop;
/* Check for GRH */
- if (lnh == HFI1_LRH_BTH) {
- ohdr = &rhdr->u.oth;
- } else if (lnh == HFI1_LRH_GRH) {
+ if (has_grh) {
u32 vtf;
+ struct ib_grh *grh = packet->grh;
- ohdr = &rhdr->u.l.oth;
- if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
+ if (grh->next_hdr != IB_GRH_NEXT_HDR)
goto drop;
- vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow);
+ vtf = be32_to_cpu(grh->version_tclass_flow);
if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
goto drop;
- rcv_flags |= HFI1_HAS_GRH;
- } else {
- goto drop;
}
+
/* Get the destination QP number. */
- qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
- if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+ qp_num = ib_bth_get_qpn(packet->ohdr);
+ if (dlid < mlid_base) {
struct rvt_qp *qp;
unsigned long flags;
@@ -312,11 +331,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
- hfi1_rc_hdrerr(
- rcd,
- rhdr,
- rcv_flags,
- qp);
+ hfi1_rc_hdrerr(rcd, packet, qp);
break;
default:
/* For now don't handle any other QP types */
@@ -332,9 +347,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
switch (rte) {
case RHF_RTE_ERROR_OP_CODE_ERR:
{
- u32 opcode;
void *ebuf = NULL;
- __be32 *bth = NULL;
+ u8 opcode;
if (rhf_use_egr_bfr(packet->rhf))
ebuf = packet->ebuf;
@@ -342,16 +356,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
if (!ebuf)
goto drop; /* this should never happen */
- if (lnh == HFI1_LRH_BTH)
- bth = (__be32 *)ebuf;
- else if (lnh == HFI1_LRH_GRH)
- bth = (__be32 *)((char *)ebuf + sizeof(struct ib_grh));
- else
- goto drop;
-
- opcode = be32_to_cpu(bth[0]) >> 24;
- opcode &= 0xff;
-
+ opcode = ib_bth_get_opcode(packet->ohdr);
if (opcode == IB_OPCODE_CNP) {
/*
* Only in pre-B0 h/w is the CNP_OPCODE handled
@@ -365,7 +370,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
- lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
+ lqpn = ib_bth_get_qpn(packet->ohdr);
rcu_read_lock();
qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn);
if (!qp) {
@@ -415,7 +420,6 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd,
packet->rhf = rhf_to_cpu(packet->rhf_addr);
packet->rhqoff = rcd->head;
packet->numpkt = 0;
- packet->rcv_flags = 0;
}
void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
@@ -424,21 +428,18 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct ib_header *hdr = pkt->hdr;
struct ib_other_headers *ohdr = pkt->ohdr;
- struct ib_grh *grh = NULL;
+ struct ib_grh *grh = pkt->grh;
u32 rqpn = 0, bth1;
u16 rlid, dlid = ib_get_dlid(hdr);
u8 sc, svc_type;
bool is_mcast = false;
- if (pkt->rcv_flags & HFI1_HAS_GRH)
- grh = &hdr->u.l.grh;
-
switch (qp->ibqp.qp_type) {
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_UD:
rlid = ib_get_slid(hdr);
- rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
+ rqpn = ib_get_sqpn(ohdr);
svc_type = IB_CC_SVCTYPE_UD;
is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
(dlid != be16_to_cpu(IB_LID_PERMISSIVE));
@@ -461,7 +462,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
bth1 = be32_to_cpu(ohdr->bth[1]);
if (do_cnp && (bth1 & IB_FECN_SMASK)) {
- u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
+ u16 pkey = ib_bth_get_pkey(ohdr);
return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh);
}
@@ -591,9 +592,10 @@ static void __prescan_rxq(struct hfi1_packet *packet)
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
+ packet->grh = NULL;
} else if (lnh == HFI1_LRH_GRH) {
packet->ohdr = &hdr->u.l.oth;
- packet->rcv_flags |= HFI1_HAS_GRH;
+ packet->grh = &hdr->u.l.grh;
} else {
goto next; /* just in case */
}
@@ -698,10 +700,8 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
{
int ret;
- packet->hdr = hfi1_get_msgheader(packet->rcd->dd,
- packet->rhf_addr);
- packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
packet->etype = rhf_rcv_type(packet->rhf);
+
/* total length */
packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */
/* retrieve eager buffer details */
@@ -759,7 +759,7 @@ static inline void process_rcv_update(int last, struct hfi1_packet *packet)
packet->etail, 0, 0);
packet->updegr = 0;
}
- packet->rcv_flags = 0;
+ packet->grh = NULL;
}
static inline void finish_packet(struct hfi1_packet *packet)
@@ -837,9 +837,9 @@ bail:
return last;
}
-static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt)
+static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt)
{
- int i;
+ u16 i;
/*
* For dynamically allocated kernel contexts (like vnic) switch
@@ -857,9 +857,9 @@ static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt)
&handle_receive_interrupt_nodma_rtail;
}
-static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt)
+static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt)
{
- int i;
+ u16 i;
/*
* For dynamically allocated kernel contexts (like vnic) switch
@@ -879,7 +879,7 @@ static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt)
void set_all_slowpath(struct hfi1_devdata *dd)
{
- int i;
+ u16 i;
/* HFI1_CTRL_CTXT must always use the slow path interrupt handler */
for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) {
@@ -896,20 +896,25 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
struct hfi1_devdata *dd)
{
struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
- struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
- packet->rhf_addr);
u8 etype = rhf_rcv_type(packet->rhf);
+ u8 sc = SC15_PACKET;
- if (etype == RHF_RCV_TYPE_IB &&
- hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) {
- int hwstate = read_logical_state(dd);
+ if (etype == RHF_RCV_TYPE_IB) {
+ struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
+ packet->rhf_addr);
+ sc = hfi1_9B_get_sc5(hdr, packet->rhf);
+ }
+ if (sc != SC15_PACKET) {
+ int hwstate = driver_lstate(rcd->ppd);
- if (hwstate != LSTATE_ACTIVE) {
- dd_dev_info(dd, "Unexpected link state %d\n", hwstate);
+ if (hwstate != IB_PORT_ACTIVE) {
+ dd_dev_info(dd,
+ "Unexpected link state %s\n",
+ opa_lstate_name(hwstate));
return 0;
}
- queue_work(rcd->ppd->hfi1_wq, lsaw);
+ queue_work(rcd->ppd->link_wq, lsaw);
return 1;
}
return 0;
@@ -1063,7 +1068,7 @@ void receive_interrupt_work(struct work_struct *work)
struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
linkstate_active_work);
struct hfi1_devdata *dd = ppd->dd;
- int i;
+ u16 i;
/* Received non-SC15 packet implies neighbor_normal */
ppd->neighbor_normal = 1;
@@ -1264,7 +1269,8 @@ void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
*/
int hfi1_reset_device(int unit)
{
- int ret, i;
+ int ret;
+ u16 i;
struct hfi1_devdata *dd = hfi1_lookup(unit);
struct hfi1_pportdata *ppd;
unsigned long flags;
@@ -1277,7 +1283,7 @@ int hfi1_reset_device(int unit)
dd_dev_info(dd, "Reset on unit %u requested\n", unit);
- if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) {
+ if (!dd->kregbase1 || !(dd->flags & HFI1_PRESENT)) {
dd_dev_info(dd,
"Invalid unit number %u or not initialized or not present\n",
unit);
@@ -1321,6 +1327,58 @@ bail:
return ret;
}
+static inline void hfi1_setup_ib_header(struct hfi1_packet *packet)
+{
+ packet->hdr = (struct hfi1_ib_message_header *)
+ hfi1_get_msgheader(packet->rcd->dd,
+ packet->rhf_addr);
+ packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
+}
+
+static int hfi1_setup_9B_packet(struct hfi1_packet *packet)
+{
+ struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
+ struct ib_header *hdr;
+ u8 lnh;
+
+ hfi1_setup_ib_header(packet);
+ hdr = packet->hdr;
+
+ lnh = ib_get_lnh(hdr);
+ if (lnh == HFI1_LRH_BTH) {
+ packet->ohdr = &hdr->u.oth;
+ packet->grh = NULL;
+ } else if (lnh == HFI1_LRH_GRH) {
+ u32 vtf;
+
+ packet->ohdr = &hdr->u.l.oth;
+ packet->grh = &hdr->u.l.grh;
+ if (packet->grh->next_hdr != IB_GRH_NEXT_HDR)
+ goto drop;
+ vtf = be32_to_cpu(packet->grh->version_tclass_flow);
+ if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
+ goto drop;
+ } else {
+ goto drop;
+ }
+
+ /* Query commonly used fields from packet header */
+ packet->opcode = ib_bth_get_opcode(packet->ohdr);
+ packet->slid = ib_get_slid(hdr);
+ packet->dlid = ib_get_dlid(hdr);
+ packet->sl = ib_get_sl(hdr);
+ packet->sc = hfi1_9B_get_sc5(hdr, packet->rhf);
+ packet->pad = ib_bth_get_pad(packet->ohdr);
+ packet->extra_byte = 0;
+ packet->fecn = ib_bth_get_fecn(packet->ohdr);
+ packet->becn = ib_bth_get_becn(packet->ohdr);
+
+ return 0;
+drop:
+ ibp->rvp.n_pkt_drops++;
+ return -EINVAL;
+}
+
void handle_eflags(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
@@ -1351,6 +1409,9 @@ int process_receive_ib(struct hfi1_packet *packet)
if (unlikely(hfi1_dbg_fault_packet(packet)))
return RHF_RCV_CONTINUE;
+ if (hfi1_setup_9B_packet(packet))
+ return RHF_RCV_CONTINUE;
+
trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
packet->rcd->ctxt,
rhf_err_flags(packet->rhf),
@@ -1422,6 +1483,7 @@ int process_receive_error(struct hfi1_packet *packet)
rhf_rcv_type_err(packet->rhf) == 3))
return RHF_RCV_CONTINUE;
+ hfi1_setup_ib_header(packet);
handle_eflags(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
@@ -1435,6 +1497,8 @@ int kdeth_process_expected(struct hfi1_packet *packet)
{
if (unlikely(hfi1_dbg_fault_packet(packet)))
return RHF_RCV_CONTINUE;
+
+ hfi1_setup_ib_header(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
@@ -1445,6 +1509,7 @@ int kdeth_process_expected(struct hfi1_packet *packet)
int kdeth_process_eager(struct hfi1_packet *packet)
{
+ hfi1_setup_ib_header(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
if (unlikely(hfi1_dbg_fault_packet(packet)))
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index 26da124..d46b171 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -250,7 +250,6 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
{
void *buffer;
void *p;
- u32 length;
int ret;
buffer = kmalloc(P1_SIZE, GFP_KERNEL);
@@ -265,13 +264,13 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
/* scan for image magic that may trail the actual data */
p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
- if (p)
- length = p - buffer;
- else
- length = P1_SIZE;
+ if (!p) {
+ kfree(buffer);
+ return -ENOENT;
+ }
*data = buffer;
- *size = length;
+ *size = p - buffer;
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c
new file mode 100644
index 0000000..0af9167
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "exp_rcv.h"
+#include "trace.h"
+
+/**
+ * exp_tid_group_init - initialize exp_tid_set
+ * @set - the set
+ */
+void hfi1_exp_tid_group_init(struct exp_tid_set *set)
+{
+ INIT_LIST_HEAD(&set->list);
+ set->count = 0;
+}
+
+/**
+ * alloc_ctxt_rcv_groups - initialize expected receive groups
+ * @rcd - the context to add the groupings to
+ */
+int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ u32 tidbase;
+ struct tid_group *grp;
+ int i;
+
+ tidbase = rcd->expected_base;
+ for (i = 0; i < rcd->expected_count /
+ dd->rcv_entries.group_size; i++) {
+ grp = kzalloc(sizeof(*grp), GFP_KERNEL);
+ if (!grp)
+ goto bail;
+ grp->size = dd->rcv_entries.group_size;
+ grp->base = tidbase;
+ tid_group_add_tail(grp, &rcd->tid_group_list);
+ tidbase += dd->rcv_entries.group_size;
+ }
+
+ return 0;
+bail:
+ hfi1_free_ctxt_rcv_groups(rcd);
+ return -ENOMEM;
+}
+
+/**
+ * free_ctxt_rcv_groups - free expected receive groups
+ * @rcd - the context to free
+ *
+ * The routine dismantles the expect receive linked
+ * list and clears any tids associated with the receive
+ * context.
+ *
+ * This should only be called for kernel contexts and the
+ * a base user context.
+ */
+void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
+{
+ struct tid_group *grp, *gptr;
+
+ WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list));
+ WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list));
+
+ list_for_each_entry_safe(grp, gptr, &rcd->tid_group_list.list, list) {
+ tid_group_remove(grp, &rcd->tid_group_list);
+ kfree(grp);
+ }
+
+ hfi1_clear_tids(rcd);
+}
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h
new file mode 100644
index 0000000..0871904
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.h
@@ -0,0 +1,190 @@
+#ifndef _HFI1_EXP_RCV_H
+#define _HFI1_EXP_RCV_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hfi.h"
+
+#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
+
+#define EXP_TID_TIDLEN_MASK 0x7FFULL
+#define EXP_TID_TIDLEN_SHIFT 0
+#define EXP_TID_TIDCTRL_MASK 0x3ULL
+#define EXP_TID_TIDCTRL_SHIFT 20
+#define EXP_TID_TIDIDX_MASK 0x3FFULL
+#define EXP_TID_TIDIDX_SHIFT 22
+#define EXP_TID_GET(tid, field) \
+ (((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK)
+
+#define EXP_TID_SET(field, value) \
+ (((value) & EXP_TID_TID##field##_MASK) << \
+ EXP_TID_TID##field##_SHIFT)
+#define EXP_TID_CLEAR(tid, field) ({ \
+ (tid) &= ~(EXP_TID_TID##field##_MASK << \
+ EXP_TID_TID##field##_SHIFT); \
+ })
+#define EXP_TID_RESET(tid, field, value) do { \
+ EXP_TID_CLEAR(tid, field); \
+ (tid) |= EXP_TID_SET(field, (value)); \
+ } while (0)
+
+/*
+ * Define fields in the KDETH header so we can update the header
+ * template.
+ */
+#define KDETH_OFFSET_SHIFT 0
+#define KDETH_OFFSET_MASK 0x7fff
+#define KDETH_OM_SHIFT 15
+#define KDETH_OM_MASK 0x1
+#define KDETH_TID_SHIFT 16
+#define KDETH_TID_MASK 0x3ff
+#define KDETH_TIDCTRL_SHIFT 26
+#define KDETH_TIDCTRL_MASK 0x3
+#define KDETH_INTR_SHIFT 28
+#define KDETH_INTR_MASK 0x1
+#define KDETH_SH_SHIFT 29
+#define KDETH_SH_MASK 0x1
+#define KDETH_KVER_SHIFT 30
+#define KDETH_KVER_MASK 0x3
+#define KDETH_JKEY_SHIFT 0x0
+#define KDETH_JKEY_MASK 0xff
+#define KDETH_HCRC_UPPER_SHIFT 16
+#define KDETH_HCRC_UPPER_MASK 0xff
+#define KDETH_HCRC_LOWER_SHIFT 24
+#define KDETH_HCRC_LOWER_MASK 0xff
+
+#define KDETH_GET(val, field) \
+ (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK)
+#define KDETH_SET(dw, field, val) do { \
+ u32 dwval = le32_to_cpu(dw); \
+ dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \
+ dwval |= (((val) & KDETH_##field##_MASK) << \
+ KDETH_##field##_SHIFT); \
+ dw = cpu_to_le32(dwval); \
+ } while (0)
+
+#define KDETH_RESET(dw, field, val) ({ dw = 0; KDETH_SET(dw, field, val); })
+
+/* KDETH OM multipliers and switch over point */
+#define KDETH_OM_SMALL 4
+#define KDETH_OM_SMALL_SHIFT 2
+#define KDETH_OM_LARGE 64
+#define KDETH_OM_LARGE_SHIFT 6
+#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
+
+struct tid_group {
+ struct list_head list;
+ u32 base;
+ u8 size;
+ u8 used;
+ u8 map;
+};
+
+/*
+ * Write an "empty" RcvArray entry.
+ * This function exists so the TID registaration code can use it
+ * to write to unused/unneeded entries and still take advantage
+ * of the WC performance improvements. The HFI will ignore this
+ * write to the RcvArray entry.
+ */
+static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
+{
+ /*
+ * Doing the WC fill writes only makes sense if the device is
+ * present and the RcvArray has been mapped as WC memory.
+ */
+ if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) {
+ writeq(0, dd->rcvarray_wc + (index * 8));
+ if ((index & 3) == 3)
+ flush_wc();
+ }
+}
+
+static inline void tid_group_add_tail(struct tid_group *grp,
+ struct exp_tid_set *set)
+{
+ list_add_tail(&grp->list, &set->list);
+ set->count++;
+}
+
+static inline void tid_group_remove(struct tid_group *grp,
+ struct exp_tid_set *set)
+{
+ list_del_init(&grp->list);
+ set->count--;
+}
+
+static inline void tid_group_move(struct tid_group *group,
+ struct exp_tid_set *s1,
+ struct exp_tid_set *s2)
+{
+ tid_group_remove(group, s1);
+ tid_group_add_tail(group, s2);
+}
+
+static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
+{
+ struct tid_group *grp =
+ list_first_entry(&set->list, struct tid_group, list);
+ list_del_init(&grp->list);
+ set->count--;
+ return grp;
+}
+
+static inline u32 rcventry2tidinfo(u32 rcventry)
+{
+ u32 pair = rcventry & ~0x1;
+
+ return EXP_TID_SET(IDX, pair >> 1) |
+ EXP_TID_SET(CTRL, 1 << (rcventry - pair));
+}
+
+int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd);
+void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd);
+void hfi1_exp_tid_group_init(struct exp_tid_set *set);
+
+#endif /* _HFI1_EXP_RCV_H */
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 3158128..a0c13fa 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -81,19 +81,23 @@ static u64 kvirt_to_phys(void *addr);
static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo);
static int init_subctxts(struct hfi1_ctxtdata *uctxt,
const struct hfi1_user_info *uinfo);
-static int init_user_ctxt(struct hfi1_filedata *fd);
+static int init_user_ctxt(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt);
static void user_init(struct hfi1_ctxtdata *uctxt);
static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
__u32 len);
static int get_base_info(struct hfi1_filedata *fd, void __user *ubase,
__u32 len);
-static int setup_base_ctxt(struct hfi1_filedata *fd);
+static int setup_base_ctxt(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt);
static int setup_subctxt(struct hfi1_ctxtdata *uctxt);
static int find_sub_ctxt(struct hfi1_filedata *fd,
const struct hfi1_user_info *uinfo);
static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
- struct hfi1_user_info *uinfo);
+ struct hfi1_user_info *uinfo,
+ struct hfi1_ctxtdata **cd);
+static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt);
static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt);
static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt);
static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
@@ -181,7 +185,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
struct hfi1_devdata,
user_cdev);
- if (!((dd->flags & HFI1_PRESENT) && dd->kregbase))
+ if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1))
return -EINVAL;
if (!atomic_inc_not_zero(&dd->user_refcount))
@@ -267,12 +271,14 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
/*
* Copy the number of tidlist entries we used
* and the length of the buffer we registered.
- * These fields are adjacent in the structure so
- * we can copy them at the same time.
*/
addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
- sizeof(tinfo.tidcnt) +
+ sizeof(tinfo.tidcnt)))
+ return -EFAULT;
+
+ addr = arg + offsetof(struct hfi1_tid_info, length);
+ if (copy_to_user((void __user *)addr, &tinfo.length,
sizeof(tinfo.length)))
ret = -EFAULT;
}
@@ -388,8 +394,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
sc_disable(sc);
ret = sc_enable(sc);
- hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB,
- uctxt->ctxt);
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt);
} else {
ret = sc_restart(sc);
}
@@ -757,7 +762,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
flush_wc();
/* drain user sdma queue */
- hfi1_user_sdma_free_queues(fdata);
+ hfi1_user_sdma_free_queues(fdata, uctxt);
/* release the cpu */
hfi1_put_proc_affinity(fdata->rec_cpu_num);
@@ -774,6 +779,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
*ev = 0;
__clear_bit(fdata->subctxt, uctxt->in_use_ctxts);
+ fdata->uctxt = NULL;
+ hfi1_rcd_put(uctxt); /* fdata reference */
if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) {
mutex_unlock(&hfi1_mutex);
goto done;
@@ -790,34 +797,26 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
HFI1_RCVCTRL_TAILUPD_DIS |
HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
- HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
+ HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
/* Clear the context's J_KEY */
- hfi1_clear_ctxt_jkey(dd, uctxt->ctxt);
+ hfi1_clear_ctxt_jkey(dd, uctxt);
/*
- * Reset context integrity checks to default.
- * (writes to CSRs probably belong in chip.c)
+ * If a send context is allocated, reset context integrity
+ * checks to default and disable the send context.
*/
- write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
- hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type));
- sc_disable(uctxt->sc);
+ if (uctxt->sc) {
+ set_pio_integrity(uctxt->sc);
+ sc_disable(uctxt->sc);
+ }
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
- dd->rcd[uctxt->ctxt] = NULL;
-
- hfi1_user_exp_rcv_grp_free(uctxt);
+ hfi1_free_ctxt_rcv_groups(uctxt);
hfi1_clear_ctxt_pkey(dd, uctxt);
- uctxt->rcvwait_to = 0;
- uctxt->piowait_to = 0;
- uctxt->rcvnowait = 0;
- uctxt->pionowait = 0;
uctxt->event_flags = 0;
-
- hfi1_stats.sps_ctxts--;
- if (++dd->freectxts == dd->num_user_contexts)
- aspm_enable_all(dd);
mutex_unlock(&hfi1_mutex);
- hfi1_free_ctxtdata(dd, uctxt);
+
+ deallocate_ctxt(uctxt);
done:
mmdrop(fdata->mm);
kobject_put(&dd->kobj);
@@ -849,6 +848,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
{
int ret;
unsigned int swmajor, swminor;
+ struct hfi1_ctxtdata *uctxt = NULL;
swmajor = uinfo->userversion >> 16;
if (swmajor != HFI1_USER_SWMAJOR)
@@ -874,7 +874,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
* couldn't find a sub context.
*/
if (!ret)
- ret = allocate_ctxt(fd, fd->dd, uinfo);
+ ret = allocate_ctxt(fd, fd->dd, uinfo, &uctxt);
mutex_unlock(&hfi1_mutex);
@@ -887,31 +887,38 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
ret = wait_event_interruptible(fd->uctxt->wait, !test_bit(
HFI1_CTXT_BASE_UNINIT,
&fd->uctxt->event_flags));
- if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) {
- clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
- return -ENOMEM;
- }
+ if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags))
+ ret = -ENOMEM;
+
/* The only thing a sub context needs is the user_xxx stuff */
if (!ret)
- ret = init_user_ctxt(fd);
+ ret = init_user_ctxt(fd, fd->uctxt);
if (ret)
clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
+
} else if (!ret) {
- ret = setup_base_ctxt(fd);
- if (fd->uctxt->subctxt_cnt) {
+ ret = setup_base_ctxt(fd, uctxt);
+ if (uctxt->subctxt_cnt) {
/* If there is an error, set the failed bit. */
if (ret)
set_bit(HFI1_CTXT_BASE_FAILED,
- &fd->uctxt->event_flags);
+ &uctxt->event_flags);
/*
* Base context is done, notify anybody using a
* sub-context that is waiting for this completion
*/
- clear_bit(HFI1_CTXT_BASE_UNINIT,
- &fd->uctxt->event_flags);
- wake_up(&fd->uctxt->wait);
+ clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
+ wake_up(&uctxt->wait);
}
+ if (ret)
+ deallocate_ctxt(uctxt);
+ }
+
+ /* If an error occurred, clear the reference */
+ if (ret && fd->uctxt) {
+ hfi1_rcd_put(fd->uctxt);
+ fd->uctxt = NULL;
}
return ret;
@@ -924,7 +931,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
static int find_sub_ctxt(struct hfi1_filedata *fd,
const struct hfi1_user_info *uinfo)
{
- int i;
+ u16 i;
struct hfi1_devdata *dd = fd->dd;
u16 subctxt;
@@ -961,6 +968,8 @@ static int find_sub_ctxt(struct hfi1_filedata *fd,
fd->uctxt = uctxt;
fd->subctxt = subctxt;
+
+ hfi1_rcd_get(uctxt);
__set_bit(fd->subctxt, uctxt->in_use_ctxts);
return 1;
@@ -970,10 +979,11 @@ static int find_sub_ctxt(struct hfi1_filedata *fd,
}
static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
- struct hfi1_user_info *uinfo)
+ struct hfi1_user_info *uinfo,
+ struct hfi1_ctxtdata **cd)
{
struct hfi1_ctxtdata *uctxt;
- unsigned int ctxt;
+ u16 ctxt;
int ret, numa;
if (dd->flags & HFI1_FROZEN) {
@@ -1058,8 +1068,6 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm));
memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid));
uctxt->jkey = generate_jkey(current_uid());
- INIT_LIST_HEAD(&uctxt->sdma_queues);
- spin_lock_init(&uctxt->sdma_qlock);
hfi1_stats.sps_ctxts++;
/*
* Disable ASPM when there are open user/PSM contexts to avoid
@@ -1067,16 +1075,31 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
*/
if (dd->freectxts-- == dd->num_user_contexts)
aspm_disable_all(dd);
- fd->uctxt = uctxt;
+
+ *cd = uctxt;
return 0;
ctxdata_free:
+ *cd = NULL;
dd->rcd[ctxt] = NULL;
- hfi1_free_ctxtdata(dd, uctxt);
+ hfi1_rcd_put(uctxt);
return ret;
}
+static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt)
+{
+ mutex_lock(&hfi1_mutex);
+ hfi1_stats.sps_ctxts--;
+ if (++uctxt->dd->freectxts == uctxt->dd->num_user_contexts)
+ aspm_enable_all(uctxt->dd);
+
+ /* _rcd_put() should be done after releasing mutex */
+ uctxt->dd->rcd[uctxt->ctxt] = NULL;
+ mutex_unlock(&hfi1_mutex);
+ hfi1_rcd_put(uctxt); /* dd reference */
+}
+
static int init_subctxts(struct hfi1_ctxtdata *uctxt,
const struct hfi1_user_info *uinfo)
{
@@ -1153,7 +1176,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt)
clear_rcvhdrtail(uctxt);
/* Setup J_KEY before enabling the context */
- hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey);
+ hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey);
rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP))
@@ -1179,7 +1202,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt)
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
else
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
- hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
+ hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
}
static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
@@ -1223,23 +1246,25 @@ static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
return ret;
}
-static int init_user_ctxt(struct hfi1_filedata *fd)
+static int init_user_ctxt(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt)
{
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
int ret;
ret = hfi1_user_sdma_alloc_queues(uctxt, fd);
if (ret)
return ret;
- ret = hfi1_user_exp_rcv_init(fd);
+ ret = hfi1_user_exp_rcv_init(fd, uctxt);
+ if (ret)
+ hfi1_user_sdma_free_queues(fd, uctxt);
return ret;
}
-static int setup_base_ctxt(struct hfi1_filedata *fd)
+static int setup_base_ctxt(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt)
{
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
int ret = 0;
@@ -1260,19 +1285,24 @@ static int setup_base_ctxt(struct hfi1_filedata *fd)
if (ret)
goto setup_failed;
- ret = hfi1_user_exp_rcv_grp_init(fd);
+ ret = hfi1_alloc_ctxt_rcv_groups(uctxt);
if (ret)
goto setup_failed;
- ret = init_user_ctxt(fd);
+ ret = init_user_ctxt(fd, uctxt);
if (ret)
goto setup_failed;
user_init(uctxt);
+ /* Now that the context is set up, the fd can get a reference. */
+ fd->uctxt = uctxt;
+ hfi1_rcd_get(uctxt);
+
return 0;
setup_failed:
+ /* Call _free_ctxtdata, not _rcd_put(). We still need the context. */
hfi1_free_ctxtdata(dd, uctxt);
return ret;
}
@@ -1390,7 +1420,7 @@ static unsigned int poll_next(struct file *fp,
spin_lock_irq(&dd->uctxt_lock);
if (hdrqempty(uctxt)) {
set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags);
- hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt);
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt);
pollflag = 0;
} else {
pollflag = POLLIN | POLLRDNORM;
@@ -1409,7 +1439,7 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit)
{
struct hfi1_ctxtdata *uctxt;
struct hfi1_devdata *dd = ppd->dd;
- unsigned ctxt;
+ u16 ctxt;
int ret = 0;
unsigned long flags;
@@ -1475,7 +1505,7 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
} else {
rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS;
}
- hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt);
+ hfi1_rcvctrl(dd, rcvctrl_op, uctxt);
/* always; new head should be equal to new tail; see above */
bail:
return 0;
@@ -1525,7 +1555,7 @@ static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey)
}
if (intable)
- ret = hfi1_set_ctxt_pkey(dd, uctxt->ctxt, pkey);
+ ret = hfi1_set_ctxt_pkey(dd, uctxt, pkey);
done:
return ret;
}
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 4042c11..885714b 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -69,6 +69,7 @@
#define ALT_FW_FABRIC_NAME "hfi1_fabric_d.fw"
#define ALT_FW_SBUS_NAME "hfi1_sbus_d.fw"
#define ALT_FW_PCIE_NAME "hfi1_pcie_d.fw"
+#define HOST_INTERFACE_VERSION 1
static uint fw_8051_load = 1;
static uint fw_fabric_serdes_load = 1;
@@ -615,6 +616,14 @@ retry:
fw_fabric_serdes_name = ALT_FW_FABRIC_NAME;
fw_sbus_name = ALT_FW_SBUS_NAME;
fw_pcie_serdes_name = ALT_FW_PCIE_NAME;
+
+ /*
+ * Add a delay before obtaining and loading debug firmware.
+ * Authorization will fail if the delay between firmware
+ * authorization events is shorter than 50us. Add 100us to
+ * make a delay time safe.
+ */
+ usleep_range(100, 120);
}
if (fw_sbus_load) {
@@ -1079,6 +1088,13 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
dd_dev_info(dd, "8051 firmware version %d.%d.%d\n",
(int)ver_major, (int)ver_minor, (int)ver_patch);
dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch);
+ ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
+ if (ret != HCMD_SUCCESS) {
+ dd_dev_err(dd,
+ "Failed to set host interface version, return 0x%x\n",
+ ret);
+ return -EIO;
+ }
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 414a04a..fb5f839 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -213,13 +213,11 @@ struct hfi1_ctxtdata {
/* dynamic receive available interrupt timeout */
u32 rcvavail_timeout;
- /*
- * number of opens (including slave sub-contexts) on this instance
- * (ignoring forks, dup, etc. for now)
- */
- int cnt;
+ /* Reference count the base context usage */
+ struct kref kref;
+
/* Device context index */
- unsigned ctxt;
+ u16 ctxt;
/*
* non-zero if ctxt can be shared, and defines the maximum number of
* sub-contexts for this device context.
@@ -245,24 +243,10 @@ struct hfi1_ctxtdata {
/* lock protecting all Expected TID data */
struct mutex exp_lock;
- /* number of pio bufs for this ctxt (all procs, if shared) */
- u32 piocnt;
- /* first pio buffer for this ctxt */
- u32 pio_base;
- /* chip offset of PIO buffers for this ctxt */
- u32 piobufs;
/* per-context configuration flags */
unsigned long flags;
/* per-context event flags for fileops/intr communication */
unsigned long event_flags;
- /* WAIT_RCV that timed out, no interrupt */
- u32 rcvwait_to;
- /* WAIT_PIO that timed out, no interrupt */
- u32 piowait_to;
- /* WAIT_RCV already happened, no wait */
- u32 rcvnowait;
- /* WAIT_PIO already happened, no wait */
- u32 pionowait;
/* total number of polled urgent packets */
u32 urgent;
/* saved total number of polled urgent packets for poll edge trigger */
@@ -292,7 +276,6 @@ struct hfi1_ctxtdata {
u8 redirect_seq_cnt;
/* ctxt rcvhdrq head offset */
u32 head;
- u32 pkt_count;
/* QPs waiting for context processing */
struct list_head qp_wait_list;
/* interrupt handling */
@@ -301,15 +284,6 @@ struct hfi1_ctxtdata {
unsigned numa_id; /* numa node of this context */
/* verbs stats per CTX */
struct hfi1_opcode_stats_perctx *opstats;
- /*
- * This is the kernel thread that will keep making
- * progress on the user sdma requests behind the scenes.
- * There is one per context (shared contexts use the master's).
- */
- struct task_struct *progress;
- struct list_head sdma_queues;
- /* protect sdma queues */
- spinlock_t sdma_qlock;
/* Is ASPM interrupt supported for this context */
bool aspm_intr_supported;
@@ -356,17 +330,26 @@ struct hfi1_packet {
__le32 *rhf_addr;
struct rvt_qp *qp;
struct ib_other_headers *ohdr;
+ struct ib_grh *grh;
u64 rhf;
u32 maxcnt;
u32 rhqoff;
+ u32 dlid;
+ u32 slid;
u16 tlen;
s16 etail;
u8 hlen;
u8 numpkt;
u8 rsize;
u8 updegr;
- u8 rcv_flags;
u8 etype;
+ u8 extra_byte;
+ u8 pad;
+ u8 sc;
+ u8 sl;
+ u8 opcode;
+ bool becn;
+ bool fecn;
};
struct rvt_sge_state;
@@ -512,7 +495,7 @@ static inline void incr_cntr32(u32 *cntr)
#define MAX_NAME_SIZE 64
struct hfi1_msix_entry {
enum irq_type type;
- struct msix_entry msix;
+ int irq;
void *arg;
char name[MAX_NAME_SIZE];
cpumask_t mask;
@@ -593,6 +576,7 @@ struct hfi1_pportdata {
/* SendDMA related entries */
struct workqueue_struct *hfi1_wq;
+ struct workqueue_struct *link_wq;
/* move out of interrupt context */
struct work_struct link_vc_work;
@@ -654,12 +638,13 @@ struct hfi1_pportdata {
u8 link_enabled; /* link enabled? */
u8 linkinit_reason;
u8 local_tx_rate; /* rate given to 8051 firmware */
- u8 last_pstate; /* info only */
+ u8 pstate; /* info only */
u8 qsfp_retry_count;
/* placeholders for IB MAD packet settings */
u8 overrun_threshold;
u8 phy_error_threshold;
+ unsigned int is_link_down_queued;
/* Used to override LED behavior for things like maintenance beaconing*/
/*
@@ -860,12 +845,15 @@ struct hfi1_devdata {
struct device *diag_device;
struct device *ui_device;
- /* mem-mapped pointer to base of chip regs */
- u8 __iomem *kregbase;
- /* end of mem-mapped chip space excluding sendbuf and user regs */
- u8 __iomem *kregend;
- /* physical address of chip for io_remap, etc. */
+ /* first mapping up to RcvArray */
+ u8 __iomem *kregbase1;
resource_size_t physaddr;
+
+ /* second uncached mapping from RcvArray to pio send buffers */
+ u8 __iomem *kregbase2;
+ /* for detecting offset above kregbase2 address */
+ u32 base2_start;
+
/* Per VL data. Enough for all VLs but not all elements are set/used. */
struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* send context data */
@@ -1229,9 +1217,10 @@ static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
#define dc8051_ver_patch(a) ((a) & 0x0000ff)
/* f_put_tid types */
-#define PT_EXPECTED 0
-#define PT_EAGER 1
-#define PT_INVALID 2
+#define PT_EXPECTED 0
+#define PT_EAGER 1
+#define PT_INVALID_FLUSH 2
+#define PT_INVALID 3
struct tid_rb_node;
struct mmu_rb_node;
@@ -1277,12 +1266,13 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd);
int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd);
int hfi1_create_ctxts(struct hfi1_devdata *dd);
-struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
+struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt,
int numa);
void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
struct hfi1_devdata *dd, u8 hw_pidx, u8 port);
void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
-
+int hfi1_rcd_put(struct hfi1_ctxtdata *rcd);
+void hfi1_rcd_get(struct hfi1_ctxtdata *rcd);
int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
@@ -1321,6 +1311,22 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
return ppd->lstate;
}
+/* return the driver's idea of the physical OPA port state */
+static inline u32 driver_pstate(struct hfi1_pportdata *ppd)
+{
+ /*
+ * The driver does some processing from the time the physical
+ * link state is at LINKUP to the time the SM can be notified
+ * as such. Return IB_PORTPHYSSTATE_TRAINING until the software
+ * state is ready.
+ */
+ if (ppd->pstate == PLS_LINKUP &&
+ !(ppd->host_link_state & HLS_UP))
+ return IB_PORTPHYSSTATE_TRAINING;
+ else
+ return chip_to_opa_pstate(ppd->dd, ppd->pstate);
+}
+
void receive_interrupt_work(struct work_struct *work);
/* extract service channel from header and rhf */
@@ -1829,10 +1835,9 @@ void hfi1_pcie_cleanup(struct pci_dev *pdev);
int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
int pcie_speeds(struct hfi1_devdata *dd);
-void request_msix(struct hfi1_devdata *dd, u32 *nent,
- struct hfi1_msix_entry *entry);
-void hfi1_enable_intx(struct pci_dev *pdev);
-void restore_pci_variables(struct hfi1_devdata *dd);
+int request_msix(struct hfi1_devdata *dd, u32 msireq);
+int restore_pci_variables(struct hfi1_devdata *dd);
+int save_pci_variables(struct hfi1_devdata *dd);
int do_pcie_gen3_transition(struct hfi1_devdata *dd);
int parse_platform_config(struct hfi1_devdata *dd);
int get_platform_config_field(struct hfi1_devdata *dd,
@@ -2087,52 +2092,13 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev))
#define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev))
-#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype }
-#define show_packettype(etype) \
-__print_symbolic(etype, \
- packettype_name(EXPECTED), \
- packettype_name(EAGER), \
- packettype_name(IB), \
- packettype_name(ERROR), \
- packettype_name(BYPASS))
-
-#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode }
-#define show_ib_opcode(opcode) \
-__print_symbolic(opcode, \
- ib_opcode_name(RC_SEND_FIRST), \
- ib_opcode_name(RC_SEND_MIDDLE), \
- ib_opcode_name(RC_SEND_LAST), \
- ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(RC_SEND_ONLY), \
- ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(RC_RDMA_WRITE_FIRST), \
- ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \
- ib_opcode_name(RC_RDMA_WRITE_LAST), \
- ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(RC_RDMA_WRITE_ONLY), \
- ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(RC_RDMA_READ_REQUEST), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \
- ib_opcode_name(RC_ACKNOWLEDGE), \
- ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
- ib_opcode_name(RC_COMPARE_SWAP), \
- ib_opcode_name(RC_FETCH_ADD), \
- ib_opcode_name(UC_SEND_FIRST), \
- ib_opcode_name(UC_SEND_MIDDLE), \
- ib_opcode_name(UC_SEND_LAST), \
- ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(UC_SEND_ONLY), \
- ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(UC_RDMA_WRITE_FIRST), \
- ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \
- ib_opcode_name(UC_RDMA_WRITE_LAST), \
- ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(UC_RDMA_WRITE_ONLY), \
- ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(UD_SEND_ONLY), \
- ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(CNP))
+/*
+ * hfi1_check_mcast- Check if the given lid is
+ * in the IB multicast range.
+ */
+static inline bool hfi1_check_mcast(u16 lid)
+{
+ return ((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+ (lid != be16_to_cpu(IB_LID_PERMISSIVE)));
+}
#endif /* _HFI1_KERNEL_H */
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 4a11d4d..be027c9 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -67,6 +67,7 @@
#include "aspm.h"
#include "affinity.h"
#include "vnic.h"
+#include "exp_rcv.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -130,7 +131,7 @@ unsigned long *hfi1_cpulist;
*/
int hfi1_create_ctxts(struct hfi1_devdata *dd)
{
- unsigned i;
+ u16 i;
int ret;
/* Control context has to be always 0 */
@@ -190,19 +191,49 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
nomem:
ret = -ENOMEM;
- if (dd->rcd) {
- for (i = 0; i < dd->num_rcv_contexts; ++i)
- hfi1_free_ctxtdata(dd, dd->rcd[i]);
- }
+ for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i)
+ hfi1_rcd_put(dd->rcd[i]);
+
+ /* All the contexts should be freed, free the array */
kfree(dd->rcd);
dd->rcd = NULL;
return ret;
}
/*
+ * Helper routines for the receive context reference count (rcd and uctxt)
+ */
+static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd)
+{
+ kref_init(&rcd->kref);
+}
+
+static void hfi1_rcd_free(struct kref *kref)
+{
+ struct hfi1_ctxtdata *rcd =
+ container_of(kref, struct hfi1_ctxtdata, kref);
+
+ hfi1_free_ctxtdata(rcd->dd, rcd);
+ kfree(rcd);
+}
+
+int hfi1_rcd_put(struct hfi1_ctxtdata *rcd)
+{
+ if (rcd)
+ return kref_put(&rcd->kref, hfi1_rcd_free);
+
+ return 0;
+}
+
+void hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
+{
+ kref_get(&rcd->kref);
+}
+
+/*
* Common code for user and kernel context setup.
*/
-struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
+struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt,
int numa)
{
struct hfi1_devdata *dd = ppd->dd;
@@ -221,6 +252,9 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
hfi1_cdbg(PROC, "setting up context %u\n", ctxt);
INIT_LIST_HEAD(&rcd->qp_wait_list);
+ hfi1_exp_tid_group_init(&rcd->tid_group_list);
+ hfi1_exp_tid_group_init(&rcd->tid_used_list);
+ hfi1_exp_tid_group_init(&rcd->tid_full_list);
rcd->ppd = ppd;
rcd->dd = dd;
__set_bit(0, rcd->in_use_ctxts);
@@ -328,6 +362,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
if (!rcd->opstats)
goto bail;
}
+
+ hfi1_rcd_init(rcd);
}
return rcd;
bail:
@@ -567,8 +603,8 @@ static int init_after_reset(struct hfi1_devdata *dd)
*/
for (i = 0; i < dd->num_rcv_contexts; i++)
hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
- HFI1_RCVCTRL_INTRAVAIL_DIS |
- HFI1_RCVCTRL_TAILUPD_DIS, i);
+ HFI1_RCVCTRL_INTRAVAIL_DIS |
+ HFI1_RCVCTRL_TAILUPD_DIS, dd->rcd[i]);
pio_send_control(dd, PSC_GLOBAL_DISABLE);
for (i = 0; i < dd->num_send_contexts; i++)
sc_disable(dd->send_contexts[i].sc);
@@ -579,7 +615,7 @@ static int init_after_reset(struct hfi1_devdata *dd)
static void enable_chip(struct hfi1_devdata *dd)
{
u32 rcvmask;
- u32 i;
+ u16 i;
/* enable PIO send */
pio_send_control(dd, PSC_GLOBAL_ENABLE);
@@ -598,7 +634,7 @@ static void enable_chip(struct hfi1_devdata *dd)
rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
if (HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, NODROP_EGR_FULL))
rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
- hfi1_rcvctrl(dd, rcvmask, i);
+ hfi1_rcvctrl(dd, rcvmask, dd->rcd[i]);
sc_enable(dd->rcd[i]->sc);
}
}
@@ -624,6 +660,20 @@ static int create_workqueues(struct hfi1_devdata *dd)
if (!ppd->hfi1_wq)
goto wq_error;
}
+ if (!ppd->link_wq) {
+ /*
+ * Make the link workqueue single-threaded to enforce
+ * serialization.
+ */
+ ppd->link_wq =
+ alloc_workqueue(
+ "hfi_link_%d_%d",
+ WQ_SYSFS | WQ_MEM_RECLAIM | WQ_UNBOUND,
+ 1, /* max_active */
+ dd->unit, pidx);
+ if (!ppd->link_wq)
+ goto wq_error;
+ }
}
return 0;
wq_error:
@@ -634,6 +684,10 @@ wq_error:
destroy_workqueue(ppd->hfi1_wq);
ppd->hfi1_wq = NULL;
}
+ if (ppd->link_wq) {
+ destroy_workqueue(ppd->link_wq);
+ ppd->link_wq = NULL;
+ }
}
return -ENOMEM;
}
@@ -656,7 +710,8 @@ wq_error:
int hfi1_init(struct hfi1_devdata *dd, int reinit)
{
int ret = 0, pidx, lastfail = 0;
- unsigned i, len;
+ unsigned long len;
+ u16 i;
struct hfi1_ctxtdata *rcd;
struct hfi1_pportdata *ppd;
@@ -878,10 +933,10 @@ static void shutdown_device(struct hfi1_devdata *dd)
ppd = dd->pport + pidx;
for (i = 0; i < dd->num_rcv_contexts; i++)
hfi1_rcvctrl(dd, HFI1_RCVCTRL_TAILUPD_DIS |
- HFI1_RCVCTRL_CTXT_DIS |
- HFI1_RCVCTRL_INTRAVAIL_DIS |
- HFI1_RCVCTRL_PKEY_DIS |
- HFI1_RCVCTRL_ONE_PKT_EGR_DIS, i);
+ HFI1_RCVCTRL_CTXT_DIS |
+ HFI1_RCVCTRL_INTRAVAIL_DIS |
+ HFI1_RCVCTRL_PKEY_DIS |
+ HFI1_RCVCTRL_ONE_PKT_EGR_DIS, dd->rcd[i]);
/*
* Gracefully stop all sends allowing any in progress to
* trickle out first.
@@ -917,6 +972,10 @@ static void shutdown_device(struct hfi1_devdata *dd)
destroy_workqueue(ppd->hfi1_wq);
ppd->hfi1_wq = NULL;
}
+ if (ppd->link_wq) {
+ destroy_workqueue(ppd->link_wq);
+ ppd->link_wq = NULL;
+ }
}
sdma_exit(dd);
}
@@ -927,14 +986,11 @@ static void shutdown_device(struct hfi1_devdata *dd)
* @rcd: the ctxtdata structure
*
* free up any allocated data for a context
- * This should not touch anything that would affect a simultaneous
- * re-allocation of context data, because it is called after hfi1_mutex
- * is released (and can be called from reinit as well).
* It should never change any chip state, or global driver state.
*/
void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
{
- unsigned e;
+ u32 e;
if (!rcd)
return;
@@ -953,6 +1009,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
/* all the RcvArray entries should have been cleared by now */
kfree(rcd->egrbufs.rcvtids);
+ rcd->egrbufs.rcvtids = NULL;
for (e = 0; e < rcd->egrbufs.alloced; e++) {
if (rcd->egrbufs.buffers[e].dma)
@@ -962,13 +1019,21 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
rcd->egrbufs.buffers[e].dma);
}
kfree(rcd->egrbufs.buffers);
+ rcd->egrbufs.alloced = 0;
+ rcd->egrbufs.buffers = NULL;
sc_free(rcd->sc);
+ rcd->sc = NULL;
+
vfree(rcd->subctxt_uregbase);
vfree(rcd->subctxt_rcvegrbuf);
vfree(rcd->subctxt_rcvhdr_base);
kfree(rcd->opstats);
- kfree(rcd);
+
+ rcd->subctxt_uregbase = NULL;
+ rcd->subctxt_rcvegrbuf = NULL;
+ rcd->subctxt_rcvhdr_base = NULL;
+ rcd->opstats = NULL;
}
/*
@@ -1362,7 +1427,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
tmp[ctxt] = NULL; /* debugging paranoia */
if (rcd) {
hfi1_clear_tids(rcd);
- hfi1_free_ctxtdata(dd, rcd);
+ hfi1_rcd_put(rcd);
}
}
kfree(tmp);
@@ -1532,6 +1597,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
destroy_workqueue(ppd->hfi1_wq);
ppd->hfi1_wq = NULL;
}
+ if (ppd->link_wq) {
+ destroy_workqueue(ppd->link_wq);
+ ppd->link_wq = NULL;
+ }
}
if (!j)
hfi1_device_remove(dd);
diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 04a5082d..96845df 100644
--- a/drivers/infiniband/hw/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
@@ -164,6 +164,7 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
ppd->linkup = 0;
/* clear HW details of the previous connection */
+ ppd->actual_vls_operational = 0;
reset_link_credits(dd);
/* freeze after a link down to guarantee a clean egress */
@@ -196,7 +197,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd)
if (test_and_clear_bit(HFI1_CTXT_WAITING_RCV, &rcd->event_flags)) {
wake_up_interruptible(&rcd->wait);
- hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_DIS, rcd->ctxt);
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_DIS, rcd);
} else if (test_and_clear_bit(HFI1_CTXT_WAITING_URG,
&rcd->event_flags)) {
rcd->urgent++;
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index d9740dd..591697d 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -106,7 +106,9 @@ struct iowait {
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *tx,
- unsigned seq);
+ uint seq,
+ bool pkts_sent
+ );
void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait);
seqlock_t *lock;
@@ -118,6 +120,7 @@ struct iowait {
u32 count;
u32 tx_limit;
u32 tx_count;
+ u8 starved_cnt;
};
#define SDMA_AVAIL_REASON 0
@@ -143,7 +146,8 @@ static inline void iowait_init(
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *tx,
- unsigned seq),
+ uint seq,
+ bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason),
void (*sdma_drained)(struct iowait *wait))
{
@@ -305,4 +309,66 @@ static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
return tx;
}
+/**
+ * iowait_queue - Put the iowait on a wait queue
+ * @pkts_sent: have some packets been sent before queuing?
+ * @w: the iowait struct
+ * @wait_head: the wait queue
+ *
+ * This function is called to insert an iowait struct into a
+ * wait queue after a resource (eg, sdma decriptor or pio
+ * buffer) is run out.
+ */
+static inline void iowait_queue(bool pkts_sent, struct iowait *w,
+ struct list_head *wait_head)
+{
+ /*
+ * To play fair, insert the iowait at the tail of the wait queue if it
+ * has already sent some packets; Otherwise, put it at the head.
+ */
+ if (pkts_sent) {
+ list_add_tail(&w->list, wait_head);
+ w->starved_cnt = 0;
+ } else {
+ list_add(&w->list, wait_head);
+ w->starved_cnt++;
+ }
+}
+
+/**
+ * iowait_starve_clear - clear the wait queue's starve count
+ * @pkts_sent: have some packets been sent?
+ * @w: the iowait struct
+ *
+ * This function is called to clear the starve count. If no
+ * packets have been sent, the starve count will not be cleared.
+ */
+static inline void iowait_starve_clear(bool pkts_sent, struct iowait *w)
+{
+ if (pkts_sent)
+ w->starved_cnt = 0;
+}
+
+/**
+ * iowait_starve_find_max - Find the maximum of the starve count
+ * @w: the iowait struct
+ * @max: a variable containing the max starve count
+ * @idx: the index of the current iowait in an array
+ * @max_idx: a variable containing the array index for the
+ * iowait entry that has the max starve count
+ *
+ * This function is called to compare the starve count of a
+ * given iowait with the given max starve count. The max starve
+ * count and the index will be updated if the iowait's start
+ * count is larger.
+ */
+static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
+ uint idx, uint *max_idx)
+{
+ if (w->starved_cnt > *max) {
+ *max = w->starved_cnt;
+ *max_idx = idx;
+ }
+}
+
#endif
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 5977673..0a3e2df 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -59,6 +59,24 @@
#define OPA_LINK_WIDTH_RESET_OLD 0x0fff
#define OPA_LINK_WIDTH_RESET 0xffff
+struct trap_node {
+ struct list_head list;
+ struct opa_mad_notice_attr data;
+ __be64 tid;
+ int len;
+ u32 retry;
+ u8 in_use;
+ u8 repress;
+};
+
+static int smp_length_check(u32 data_size, u32 request_len)
+{
+ if (unlikely(request_len < data_size))
+ return -EINVAL;
+
+ return 0;
+}
+
static int reply(struct ib_mad_hdr *smp)
{
/*
@@ -89,28 +107,156 @@ void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
ib_dispatch_event(&event);
}
-static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
+/*
+ * If the port is down, clean up all pending traps. We need to be careful
+ * with the given trap, because it may be queued.
+ */
+static void cleanup_traps(struct hfi1_ibport *ibp, struct trap_node *trap)
+{
+ struct trap_node *node, *q;
+ unsigned long flags;
+ struct list_head trap_list;
+ int i;
+
+ for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) {
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
+ list_replace_init(&ibp->rvp.trap_lists[i].list, &trap_list);
+ ibp->rvp.trap_lists[i].list_len = 0;
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+
+ /*
+ * Remove all items from the list, freeing all the non-given
+ * traps.
+ */
+ list_for_each_entry_safe(node, q, &trap_list, list) {
+ list_del(&node->list);
+ if (node != trap)
+ kfree(node);
+ }
+ }
+
+ /*
+ * If this wasn't on one of the lists it would not be freed. If it
+ * was on the list, it is now safe to free.
+ */
+ kfree(trap);
+}
+
+static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp,
+ struct trap_node *trap)
+{
+ struct trap_node *node;
+ struct trap_list *trap_list;
+ unsigned long flags;
+ unsigned long timeout;
+ int found = 0;
+
+ /*
+ * Since the retry (handle timeout) does not remove a trap request
+ * from the list, all we have to do is compare the node.
+ */
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
+ trap_list = &ibp->rvp.trap_lists[trap->data.generic_type & 0x0F];
+
+ list_for_each_entry(node, &trap_list->list, list) {
+ if (node == trap) {
+ node->retry++;
+ found = 1;
+ break;
+ }
+ }
+
+ /* If it is not on the list, add it, limited to RVT-MAX_TRAP_LEN. */
+ if (!found) {
+ if (trap_list->list_len < RVT_MAX_TRAP_LEN) {
+ trap_list->list_len++;
+ list_add_tail(&trap->list, &trap_list->list);
+ } else {
+ pr_warn_ratelimited("hfi1: Maximim trap limit reached for 0x%0x traps\n",
+ trap->data.generic_type);
+ kfree(trap);
+ }
+ }
+
+ /*
+ * Next check to see if there is a timer pending. If not, set it up
+ * and get the first trap from the list.
+ */
+ node = NULL;
+ if (!timer_pending(&ibp->rvp.trap_timer)) {
+ /*
+ * o14-2
+ * If the time out is set we have to wait until it expires
+ * before the trap can be sent.
+ * This should be > RVT_TRAP_TIMEOUT
+ */
+ timeout = (RVT_TRAP_TIMEOUT *
+ (1UL << ibp->rvp.subnet_timeout)) / 1000;
+ mod_timer(&ibp->rvp.trap_timer,
+ jiffies + usecs_to_jiffies(timeout));
+ node = list_first_entry(&trap_list->list, struct trap_node,
+ list);
+ node->in_use = 1;
+ }
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+
+ return node;
+}
+
+static void subn_handle_opa_trap_repress(struct hfi1_ibport *ibp,
+ struct opa_smp *smp)
+{
+ struct trap_list *trap_list;
+ struct trap_node *trap;
+ unsigned long flags;
+ int i;
+
+ if (smp->attr_id != IB_SMP_ATTR_NOTICE)
+ return;
+
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
+ for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) {
+ trap_list = &ibp->rvp.trap_lists[i];
+ trap = list_first_entry_or_null(&trap_list->list,
+ struct trap_node, list);
+ if (trap && trap->tid == smp->tid) {
+ if (trap->in_use) {
+ trap->repress = 1;
+ } else {
+ trap_list->list_len--;
+ list_del(&trap->list);
+ kfree(trap);
+ }
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+}
+
+static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap)
{
struct ib_mad_send_buf *send_buf;
struct ib_mad_agent *agent;
struct opa_smp *smp;
- int ret;
unsigned long flags;
- unsigned long timeout;
int pkey_idx;
u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
agent = ibp->rvp.send_agent;
- if (!agent)
+ if (!agent) {
+ cleanup_traps(ibp, trap);
return;
+ }
/* o14-3.2.1 */
- if (ppd_from_ibp(ibp)->lstate != IB_PORT_ACTIVE)
+ if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) {
+ cleanup_traps(ibp, trap);
return;
+ }
- /* o14-2 */
- if (ibp->rvp.trap_timeout && time_before(jiffies,
- ibp->rvp.trap_timeout))
+ /* Add the trap to the list if necessary and see if we can send it */
+ trap = check_and_add_trap(ibp, trap);
+ if (!trap)
return;
pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
@@ -131,11 +277,21 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
smp->class_version = OPA_SM_CLASS_VERSION;
smp->method = IB_MGMT_METHOD_TRAP;
- ibp->rvp.tid++;
- smp->tid = cpu_to_be64(ibp->rvp.tid);
+
+ /* Only update the transaction ID for new traps (o13-5). */
+ if (trap->tid == 0) {
+ ibp->rvp.tid++;
+ /* make sure that tid != 0 */
+ if (ibp->rvp.tid == 0)
+ ibp->rvp.tid++;
+ trap->tid = cpu_to_be64(ibp->rvp.tid);
+ }
+ smp->tid = trap->tid;
+
smp->attr_id = IB_SMP_ATTR_NOTICE;
/* o14-1: smp->mkey = 0; */
- memcpy(smp->route.lid.data, data, len);
+
+ memcpy(smp->route.lid.data, &trap->data, trap->len);
spin_lock_irqsave(&ibp->rvp.lock, flags);
if (!ibp->rvp.sm_ah) {
@@ -144,65 +300,103 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
if (IS_ERR(ah)) {
- ret = PTR_ERR(ah);
- } else {
- send_buf->ah = ah;
- ibp->rvp.sm_ah = ibah_to_rvtah(ah);
- ret = 0;
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+ return;
}
+ send_buf->ah = ah;
+ ibp->rvp.sm_ah = ibah_to_rvtah(ah);
} else {
- ret = -EINVAL;
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+ return;
}
} else {
send_buf->ah = &ibp->rvp.sm_ah->ibah;
- ret = 0;
}
+
+ /*
+ * If the trap was repressed while things were getting set up, don't
+ * bother sending it. This could happen for a retry.
+ */
+ if (trap->repress) {
+ list_del(&trap->list);
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+ kfree(trap);
+ ib_free_send_mad(send_buf);
+ return;
+ }
+
+ trap->in_use = 0;
spin_unlock_irqrestore(&ibp->rvp.lock, flags);
- if (!ret)
- ret = ib_post_send_mad(send_buf, NULL);
- if (!ret) {
- /* 4.096 usec. */
- timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
- ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
- } else {
+ if (ib_post_send_mad(send_buf, NULL))
ib_free_send_mad(send_buf);
- ibp->rvp.trap_timeout = 0;
+}
+
+void hfi1_handle_trap_timer(unsigned long data)
+{
+ struct hfi1_ibport *ibp = (struct hfi1_ibport *)data;
+ struct trap_node *trap = NULL;
+ unsigned long flags;
+ int i;
+
+ /* Find the trap with the highest priority */
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
+ for (i = 0; !trap && i < RVT_MAX_TRAP_LISTS; i++) {
+ trap = list_first_entry_or_null(&ibp->rvp.trap_lists[i].list,
+ struct trap_node, list);
}
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+
+ if (trap)
+ send_trap(ibp, trap);
+}
+
+static struct trap_node *create_trap_node(u8 type, __be16 trap_num, u32 lid)
+{
+ struct trap_node *trap;
+
+ trap = kzalloc(sizeof(*trap), GFP_ATOMIC);
+ if (!trap)
+ return NULL;
+
+ INIT_LIST_HEAD(&trap->list);
+ trap->data.generic_type = type;
+ trap->data.prod_type_lsb = IB_NOTICE_PROD_CA;
+ trap->data.trap_num = trap_num;
+ trap->data.issuer_lid = cpu_to_be32(lid);
+
+ return trap;
}
/*
- * Send a bad [PQ]_Key trap (ch. 14.3.8).
+ * Send a bad P_Key trap (ch. 14.3.8).
*/
-void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
- u32 qp1, u32 qp2, u16 lid1, u16 lid2)
+void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl,
+ u32 qp1, u32 qp2, u16 lid1, u16 lid2)
{
- struct opa_mad_notice_attr data;
+ struct trap_node *trap;
u32 lid = ppd_from_ibp(ibp)->lid;
u32 _lid1 = lid1;
u32 _lid2 = lid2;
- memset(&data, 0, sizeof(data));
-
- if (trap_num == OPA_TRAP_BAD_P_KEY)
- ibp->rvp.pkey_violations++;
- else
- ibp->rvp.qkey_violations++;
ibp->rvp.n_pkt_drops++;
+ ibp->rvp.pkey_violations++;
+
+ trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_P_KEY,
+ lid);
+ if (!trap)
+ return;
/* Send violation trap */
- data.generic_type = IB_NOTICE_TYPE_SECURITY;
- data.prod_type_lsb = IB_NOTICE_PROD_CA;
- data.trap_num = trap_num;
- data.issuer_lid = cpu_to_be32(lid);
- data.ntc_257_258.lid1 = cpu_to_be32(_lid1);
- data.ntc_257_258.lid2 = cpu_to_be32(_lid2);
- data.ntc_257_258.key = cpu_to_be32(key);
- data.ntc_257_258.sl = sl << 3;
- data.ntc_257_258.qp1 = cpu_to_be32(qp1);
- data.ntc_257_258.qp2 = cpu_to_be32(qp2);
-
- send_trap(ibp, &data, sizeof(data));
+ trap->data.ntc_257_258.lid1 = cpu_to_be32(_lid1);
+ trap->data.ntc_257_258.lid2 = cpu_to_be32(_lid2);
+ trap->data.ntc_257_258.key = cpu_to_be32(key);
+ trap->data.ntc_257_258.sl = sl << 3;
+ trap->data.ntc_257_258.qp1 = cpu_to_be32(qp1);
+ trap->data.ntc_257_258.qp2 = cpu_to_be32(qp2);
+
+ trap->len = sizeof(trap->data);
+ send_trap(ibp, trap);
}
/*
@@ -211,34 +405,36 @@ void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
__be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt)
{
- struct opa_mad_notice_attr data;
+ struct trap_node *trap;
u32 lid = ppd_from_ibp(ibp)->lid;
- memset(&data, 0, sizeof(data));
+ trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_M_KEY,
+ lid);
+ if (!trap)
+ return;
+
/* Send violation trap */
- data.generic_type = IB_NOTICE_TYPE_SECURITY;
- data.prod_type_lsb = IB_NOTICE_PROD_CA;
- data.trap_num = OPA_TRAP_BAD_M_KEY;
- data.issuer_lid = cpu_to_be32(lid);
- data.ntc_256.lid = data.issuer_lid;
- data.ntc_256.method = mad->method;
- data.ntc_256.attr_id = mad->attr_id;
- data.ntc_256.attr_mod = mad->attr_mod;
- data.ntc_256.mkey = mkey;
+ trap->data.ntc_256.lid = trap->data.issuer_lid;
+ trap->data.ntc_256.method = mad->method;
+ trap->data.ntc_256.attr_id = mad->attr_id;
+ trap->data.ntc_256.attr_mod = mad->attr_mod;
+ trap->data.ntc_256.mkey = mkey;
if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
- data.ntc_256.dr_slid = dr_slid;
- data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
- if (hop_cnt > ARRAY_SIZE(data.ntc_256.dr_rtn_path)) {
- data.ntc_256.dr_trunc_hop |=
+ trap->data.ntc_256.dr_slid = dr_slid;
+ trap->data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
+ if (hop_cnt > ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path)) {
+ trap->data.ntc_256.dr_trunc_hop |=
IB_NOTICE_TRAP_DR_TRUNC;
- hop_cnt = ARRAY_SIZE(data.ntc_256.dr_rtn_path);
+ hop_cnt = ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path);
}
- data.ntc_256.dr_trunc_hop |= hop_cnt;
- memcpy(data.ntc_256.dr_rtn_path, return_path,
+ trap->data.ntc_256.dr_trunc_hop |= hop_cnt;
+ memcpy(trap->data.ntc_256.dr_rtn_path, return_path,
hop_cnt);
}
- send_trap(ibp, &data, sizeof(data));
+ trap->len = sizeof(trap->data);
+
+ send_trap(ibp, trap);
}
/*
@@ -246,22 +442,24 @@ static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
*/
void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
{
- struct opa_mad_notice_attr data;
+ struct trap_node *trap;
struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
u32 lid = ppd_from_ibp(ibp)->lid;
- memset(&data, 0, sizeof(data));
+ trap = create_trap_node(IB_NOTICE_TYPE_INFO,
+ OPA_TRAP_CHANGE_CAPABILITY,
+ lid);
+ if (!trap)
+ return;
- data.generic_type = IB_NOTICE_TYPE_INFO;
- data.prod_type_lsb = IB_NOTICE_PROD_CA;
- data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
- data.issuer_lid = cpu_to_be32(lid);
- data.ntc_144.lid = data.issuer_lid;
- data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
+ trap->data.ntc_144.lid = trap->data.issuer_lid;
+ trap->data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
+ trap->data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags);
- send_trap(ibp, &data, sizeof(data));
+ trap->len = sizeof(trap->data);
+ send_trap(ibp, trap);
}
/*
@@ -269,19 +467,19 @@ void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
*/
void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
{
- struct opa_mad_notice_attr data;
+ struct trap_node *trap;
u32 lid = ppd_from_ibp(ibp)->lid;
- memset(&data, 0, sizeof(data));
+ trap = create_trap_node(IB_NOTICE_TYPE_INFO, OPA_TRAP_CHANGE_SYSGUID,
+ lid);
+ if (!trap)
+ return;
- data.generic_type = IB_NOTICE_TYPE_INFO;
- data.prod_type_lsb = IB_NOTICE_PROD_CA;
- data.trap_num = OPA_TRAP_CHANGE_SYSGUID;
- data.issuer_lid = cpu_to_be32(lid);
- data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
- data.ntc_145.lid = data.issuer_lid;
+ trap->data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
+ trap->data.ntc_145.lid = trap->data.issuer_lid;
- send_trap(ibp, &data, sizeof(data));
+ trap->len = sizeof(trap->data);
+ send_trap(ibp, trap);
}
/*
@@ -289,29 +487,30 @@ void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
*/
void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
{
- struct opa_mad_notice_attr data;
+ struct trap_node *trap;
u32 lid = ppd_from_ibp(ibp)->lid;
- memset(&data, 0, sizeof(data));
+ trap = create_trap_node(IB_NOTICE_TYPE_INFO,
+ OPA_TRAP_CHANGE_CAPABILITY,
+ lid);
+ if (!trap)
+ return;
- data.generic_type = IB_NOTICE_TYPE_INFO;
- data.prod_type_lsb = IB_NOTICE_PROD_CA;
- data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
- data.issuer_lid = cpu_to_be32(lid);
- data.ntc_144.lid = data.issuer_lid;
- data.ntc_144.change_flags =
+ trap->data.ntc_144.lid = trap->data.issuer_lid;
+ trap->data.ntc_144.change_flags =
cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG);
- send_trap(ibp, &data, sizeof(data));
+ trap->len = sizeof(trap->data);
+ send_trap(ibp, trap);
}
static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u8 port, u32 *resp_len, u32 max_len)
{
struct opa_node_description *nd;
- if (am) {
+ if (am || smp_length_check(sizeof(*nd), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -328,7 +527,7 @@ static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct opa_node_info *ni;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -338,6 +537,7 @@ static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
/* GUID 0 is illegal */
if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
+ smp_length_check(sizeof(*ni), max_len) ||
get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
@@ -519,7 +719,7 @@ void read_ltp_rtt(struct hfi1_devdata *dd)
static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
int i;
struct hfi1_devdata *dd;
@@ -535,7 +735,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
u32 buffer_units;
u64 tmp = 0;
- if (num_ports != 1) {
+ if (num_ports != 1 || smp_length_check(sizeof(*pi), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -605,7 +805,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
ppd->offline_disabled_reason;
pi->port_states.portphysstate_portstate =
- (hfi1_ibphys_portstate(ppd) << 4) | state;
+ (driver_pstate(ppd) << 4) | state;
pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
@@ -704,11 +904,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
pi->buffer_units = cpu_to_be32(buffer_units);
- pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported |
- OPA_CAP_MASK3_IsEthOnFabricSupported);
- /* Driver does not support mcast/collective configuration */
- pi->opa_cap_mask &=
- cpu_to_be16(~OPA_CAP_MASK3_IsAddrRangeConfigSupported);
+ pi->opa_cap_mask = cpu_to_be16(ibp->rvp.port_cap3_flags);
pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7)
<< 3 | (HFI1_MCAST_NR & 0x7));
@@ -748,7 +944,7 @@ static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u32 n_blocks_req = OPA_AM_NBLK(am);
@@ -771,6 +967,11 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
+ if (smp_length_check(size, max_len)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
if (start_block + n_blocks_req > n_blocks_avail ||
n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
@@ -1074,7 +1275,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
*/
static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct opa_port_info *pi = (struct opa_port_info *)data;
struct ib_event event;
@@ -1095,7 +1296,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
int ret, i, invalid = 0, call_set_mtu = 0;
int call_link_downgrade_policy = 0;
- if (num_ports != 1) {
+ if (num_ports != 1 ||
+ smp_length_check(sizeof(*pi), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1346,7 +1548,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
if (ret)
return ret;
- ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
+ ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
+ max_len);
/* restore re-reg bit per o14-12.2.1 */
pi->clientrereg_subnettimeout |= clientrereg;
@@ -1363,7 +1566,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
return ret;
get_only:
- return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
/**
@@ -1424,7 +1628,7 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u32 n_blocks_sent = OPA_AM_NBLK(am);
@@ -1434,6 +1638,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
int i;
u16 n_blocks_avail;
unsigned npkeys = hfi1_get_npkeys(dd);
+ u32 size = 0;
if (n_blocks_sent == 0) {
pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
@@ -1444,6 +1649,13 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
+ size = sizeof(u16) * (n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE);
+
+ if (smp_length_check(size, max_len)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
if (start_block + n_blocks_sent > n_blocks_avail ||
n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
@@ -1461,7 +1673,8 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
return reply((struct ib_mad_hdr *)smp);
}
- return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
#define ILLEGAL_VL 12
@@ -1522,14 +1735,14 @@ static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
u8 *p = data;
size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
unsigned i;
- if (am) {
+ if (am || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1545,14 +1758,15 @@ static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
u8 *p = data;
+ size_t size = ARRAY_SIZE(ibp->sl_to_sc);
int i;
u8 sc;
- if (am) {
+ if (am || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1567,19 +1781,20 @@ static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
}
}
- return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
u8 *p = data;
size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
unsigned i;
- if (am) {
+ if (am || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1595,13 +1810,14 @@ static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
+ size_t size = ARRAY_SIZE(ibp->sc_to_sl);
u8 *p = data;
int i;
- if (am) {
+ if (am || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1609,19 +1825,20 @@ static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
ibp->sc_to_sl[i] = *p++;
- return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
u32 n_blocks = OPA_AM_NBLK(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
void *vp = (void *)data;
size_t size = 4 * sizeof(u64);
- if (n_blocks != 1) {
+ if (n_blocks != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1636,7 +1853,7 @@ static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
u32 n_blocks = OPA_AM_NBLK(am);
int async_update = OPA_AM_ASYNC(am);
@@ -1644,8 +1861,15 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
void *vp = (void *)data;
struct hfi1_pportdata *ppd;
int lstate;
+ /*
+ * set_sc2vlt_tables writes the information contained in *data
+ * to four 64-bit registers SendSC2VLt[0-3]. We need to make
+ * sure *max_len is not greater than the total size of the four
+ * SendSC2VLt[0-3] registers.
+ */
+ size_t size = 4 * sizeof(u64);
- if (n_blocks != 1 || async_update) {
+ if (n_blocks != 1 || async_update || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1665,27 +1889,28 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
set_sc2vlt_tables(dd, vp);
- return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
u32 n_blocks = OPA_AM_NPORT(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd;
void *vp = (void *)data;
- int size;
+ int size = sizeof(struct sc2vlnt);
- if (n_blocks != 1) {
+ if (n_blocks != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
ppd = dd->pport + (port - 1);
- size = fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
+ fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
if (resp_len)
*resp_len += size;
@@ -1695,15 +1920,16 @@ static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
u32 n_blocks = OPA_AM_NPORT(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd;
void *vp = (void *)data;
int lstate;
+ int size = sizeof(struct sc2vlnt);
- if (n_blocks != 1) {
+ if (n_blocks != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1721,12 +1947,12 @@ static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
}
static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
u32 nports = OPA_AM_NPORT(am);
u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
@@ -1735,7 +1961,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
struct hfi1_pportdata *ppd;
struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
- if (nports != 1) {
+ if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1755,7 +1981,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
ppd->offline_disabled_reason;
psi->port_states.portphysstate_portstate =
- (hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf);
+ (driver_pstate(ppd) << 4) | (lstate & 0xf);
psi->link_width_downgrade_tx_active =
cpu_to_be16(ppd->link_width_downgrade_tx_active);
psi->link_width_downgrade_rx_active =
@@ -1768,7 +1994,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
u32 nports = OPA_AM_NPORT(am);
u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
@@ -1779,7 +2005,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
int ret, invalid = 0;
- if (nports != 1) {
+ if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1809,19 +2035,21 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
if (invalid)
smp->status |= IB_SMP_INVALID_FIELD;
- return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u32 addr = OPA_AM_CI_ADDR(am);
u32 len = OPA_AM_CI_LEN(am) + 1;
int ret;
- if (dd->pport->port_type != PORT_TYPE_QSFP) {
+ if (dd->pport->port_type != PORT_TYPE_QSFP ||
+ smp_length_check(len, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1864,21 +2092,22 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port, u32 *resp_len)
+ struct ib_device *ibdev, u8 port, u32 *resp_len,
+ u32 max_len)
{
u32 num_ports = OPA_AM_NPORT(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd;
struct buffer_control *p = (struct buffer_control *)data;
- int size;
+ int size = sizeof(struct buffer_control);
- if (num_ports != 1) {
+ if (num_ports != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
ppd = dd->pport + (port - 1);
- size = fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
+ fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
trace_bct_get(dd, p);
if (resp_len)
*resp_len += size;
@@ -1887,14 +2116,15 @@ static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port, u32 *resp_len)
+ struct ib_device *ibdev, u8 port, u32 *resp_len,
+ u32 max_len)
{
u32 num_ports = OPA_AM_NPORT(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd;
struct buffer_control *p = (struct buffer_control *)data;
- if (num_ports != 1) {
+ if (num_ports != 1 || smp_length_check(sizeof(*p), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1905,41 +2135,43 @@ static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
return reply((struct ib_mad_hdr *)smp);
}
- return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
u32 num_ports = OPA_AM_NPORT(am);
u8 section = (am & 0x00ff0000) >> 16;
u8 *p = data;
- int size = 0;
+ int size = 256;
- if (num_ports != 1) {
+ if (num_ports != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
switch (section) {
case OPA_VLARB_LOW_ELEMENTS:
- size = fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
+ fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
break;
case OPA_VLARB_HIGH_ELEMENTS:
- size = fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
+ fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
break;
case OPA_VLARB_PREEMPT_ELEMENTS:
- size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
+ fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
break;
case OPA_VLARB_PREEMPT_MATRIX:
- size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
+ fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
break;
default:
pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
be32_to_cpu(smp->attr_mod));
smp->status |= IB_SMP_INVALID_FIELD;
+ size = 0;
break;
}
@@ -1951,14 +2183,15 @@ static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
u32 num_ports = OPA_AM_NPORT(am);
u8 section = (am & 0x00ff0000) >> 16;
u8 *p = data;
+ int size = 256;
- if (num_ports != 1) {
+ if (num_ports != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1986,7 +2219,8 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
break;
}
- return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
struct opa_pma_mad {
@@ -3282,13 +3516,18 @@ struct opa_congestion_info_attr {
static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct opa_congestion_info_attr *p =
(struct opa_congestion_info_attr *)data;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ if (smp_length_check(sizeof(*p), max_len)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
p->congestion_info = 0;
p->control_table_cap = ppd->cc_max_table_entries;
p->congestion_log_length = OPA_CONG_LOG_ELEMS;
@@ -3301,7 +3540,7 @@ static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u8 port, u32 *resp_len, u32 max_len)
{
int i;
struct opa_congestion_setting_attr *p =
@@ -3311,6 +3550,11 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
struct opa_congestion_setting_entry_shadow *entries;
struct cc_state *cc_state;
+ if (smp_length_check(sizeof(*p), max_len)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
rcu_read_lock();
cc_state = get_cc_state(ppd);
@@ -3385,7 +3629,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd)
static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct opa_congestion_setting_attr *p =
(struct opa_congestion_setting_attr *)data;
@@ -3394,6 +3638,11 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
struct opa_congestion_setting_entry_shadow *entries;
int i;
+ if (smp_length_check(sizeof(*p), max_len)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
/*
* Save details from packet into the ppd. Hold the cc_state_lock so
* our information is consistent with anyone trying to apply the state.
@@ -3415,12 +3664,12 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
apply_cc_state(ppd);
return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
}
static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u8 port, u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -3428,7 +3677,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
s64 ts;
int i;
- if (am != 0) {
+ if (am || smp_length_check(sizeof(*cong_log), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -3486,7 +3735,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct ib_cc_table_attr *cc_table_attr =
(struct ib_cc_table_attr *)data;
@@ -3498,9 +3747,10 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
int i, j;
u32 sentry, eentry;
struct cc_state *cc_state;
+ u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
/* sanity check n_blocks, start_block */
- if (n_blocks == 0 ||
+ if (n_blocks == 0 || smp_length_check(size, max_len) ||
start_block + n_blocks > ppd->cc_max_table_entries) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
@@ -3530,14 +3780,14 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
rcu_read_unlock();
if (resp_len)
- *resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
+ *resp_len += size;
return reply((struct ib_mad_hdr *)smp);
}
static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -3548,9 +3798,10 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
int i, j;
u32 sentry, eentry;
u16 ccti_limit;
+ u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
/* sanity check n_blocks, start_block */
- if (n_blocks == 0 ||
+ if (n_blocks == 0 || smp_length_check(size, max_len) ||
start_block + n_blocks > ppd->cc_max_table_entries) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
@@ -3581,7 +3832,8 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
/* now apply the information */
apply_cc_state(ppd);
- return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
struct opa_led_info {
@@ -3594,7 +3846,7 @@ struct opa_led_info {
static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd = dd->pport;
@@ -3602,7 +3854,7 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
u32 nport = OPA_AM_NPORT(am);
u32 is_beaconing_active;
- if (nport != 1) {
+ if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -3624,14 +3876,14 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct opa_led_info *p = (struct opa_led_info *)data;
u32 nport = OPA_AM_NPORT(am);
int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
- if (nport != 1) {
+ if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -3641,12 +3893,13 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
else
shutdown_led_override(dd->pport);
- return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
int ret;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -3654,71 +3907,71 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
switch (attr_id) {
case IB_SMP_ATTR_NODE_DESC:
ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_NODE_INFO:
ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_PORT_INFO:
ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_PKEY_TABLE:
ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SL_TO_SC_MAP:
ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_SL_MAP:
ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_PORT_STATE_INFO:
ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_CABLE_INFO:
ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_VL_ARB_TABLE:
ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_CONGESTION_INFO:
ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
- port, resp_len);
+ port, resp_len, max_len);
break;
case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
- port, resp_len);
+ port, resp_len, max_len);
break;
case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_LED_INFO:
ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_SM_INFO:
if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
@@ -3736,7 +3989,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
int ret;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -3744,51 +3997,51 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
switch (attr_id) {
case IB_SMP_ATTR_PORT_INFO:
ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_PKEY_TABLE:
ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SL_TO_SC_MAP:
ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_SL_MAP:
ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_PORT_STATE_INFO:
ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_VL_ARB_TABLE:
ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
- port, resp_len);
+ port, resp_len, max_len);
break;
case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_LED_INFO:
ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_SM_INFO:
if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
@@ -3844,7 +4097,10 @@ static int subn_get_opa_aggregate(struct opa_smp *smp,
memset(next_smp + sizeof(*agg), 0, agg_data_len);
(void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
- ibdev, port, NULL);
+ ibdev, port, NULL, (u32)agg_data_len);
+
+ if (smp->status & IB_SMP_INVALID_FIELD)
+ break;
if (smp->status & ~IB_SMP_DIRECTION) {
set_aggr_error(agg);
return reply((struct ib_mad_hdr *)smp);
@@ -3887,7 +4143,9 @@ static int subn_set_opa_aggregate(struct opa_smp *smp,
}
(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
- ibdev, port, NULL);
+ ibdev, port, NULL, (u32)agg_data_len);
+ if (smp->status & IB_SMP_INVALID_FIELD)
+ break;
if (smp->status & ~IB_SMP_DIRECTION) {
set_aggr_error(agg);
return reply((struct ib_mad_hdr *)smp);
@@ -3997,12 +4255,13 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
struct opa_smp *smp = (struct opa_smp *)out_mad;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
u8 *data;
- u32 am;
+ u32 am, data_size;
__be16 attr_id;
int ret;
*out_mad = *in_mad;
data = opa_get_smp_data(smp);
+ data_size = (u32)opa_get_smp_data_size(smp);
am = be32_to_cpu(smp->attr_mod);
attr_id = smp->attr_id;
@@ -4046,7 +4305,8 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
default:
clear_opa_smp_data(smp);
ret = subn_get_opa_sma(attr_id, smp, am, data,
- ibdev, port, resp_len);
+ ibdev, port, resp_len,
+ data_size);
break;
case OPA_ATTRIB_ID_AGGREGATE:
ret = subn_get_opa_aggregate(smp, ibdev, port,
@@ -4058,7 +4318,8 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
switch (attr_id) {
default:
ret = subn_set_opa_sma(attr_id, smp, am, data,
- ibdev, port, resp_len);
+ ibdev, port, resp_len,
+ data_size);
break;
case OPA_ATTRIB_ID_AGGREGATE:
ret = subn_set_opa_aggregate(smp, ibdev, port,
@@ -4077,6 +4338,11 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
*/
ret = IB_MAD_RESULT_SUCCESS;
break;
+ case IB_MGMT_METHOD_TRAP_REPRESS:
+ subn_handle_opa_trap_repress(ibp, smp);
+ /* Always successful */
+ ret = IB_MAD_RESULT_SUCCESS;
+ break;
default:
smp->status |= IB_SMP_UNSUP_METHOD;
ret = reply((struct ib_mad_hdr *)smp);
diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 5aa3fd1..4c12450 100644
--- a/drivers/infiniband/hw/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -115,7 +115,7 @@ struct opa_mad_notice_attr {
__be32 lid; /* LID where change occurred */
__be32 new_cap_mask; /* new capability mask */
__be16 reserved2;
- __be16 cap_mask;
+ __be16 cap_mask3;
__be16 change_flags; /* low 4 bits only */
} __packed ntc_144;
@@ -428,5 +428,6 @@ struct sc2vlnt {
COUNTER_MASK(1, 4))
void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port);
+void hfi1_handle_trap_timer(unsigned long data);
#endif /* _HFI1_MAD_H */
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index ccbf52c..d41fd87 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -217,21 +217,27 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
return node;
}
-struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler,
- unsigned long addr, unsigned long len)
+bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
+ unsigned long addr, unsigned long len,
+ struct mmu_rb_node **rb_node)
{
struct mmu_rb_node *node;
unsigned long flags;
+ bool ret = false;
spin_lock_irqsave(&handler->lock, flags);
node = __mmu_rb_search(handler, addr, len);
if (node) {
+ if (node->addr == addr && node->len == len)
+ goto unlock;
__mmu_int_rb_remove(node, &handler->root);
list_del(&node->list); /* remove from LRU list */
+ ret = true;
}
+unlock:
spin_unlock_irqrestore(&handler->lock, flags);
-
- return node;
+ *rb_node = node;
+ return ret;
}
void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h
index 754f6eb..f04cec1 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.h
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.h
@@ -81,7 +81,8 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg);
void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
struct mmu_rb_node *mnode);
-struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler,
- unsigned long addr, unsigned long len);
+bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
+ unsigned long addr, unsigned long len,
+ struct mmu_rb_node **rb_node);
#endif /* _HFI1_MMU_RB_H */
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 6a9f6f9..82447b7 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -68,7 +68,7 @@
/*
* Code to adjust PCIe capabilities.
*/
-static void tune_pcie_caps(struct hfi1_devdata *);
+static int tune_pcie_caps(struct hfi1_devdata *);
/*
* Do all the common PCIe setup and initialization.
@@ -161,6 +161,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
{
unsigned long len;
resource_size_t addr;
+ int ret = 0;
dd->pcidev = pdev;
pci_set_drvdata(pdev, dd);
@@ -179,47 +180,54 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
return -EINVAL;
}
- dd->kregbase = ioremap_nocache(addr, TXE_PIO_SEND);
- if (!dd->kregbase)
+ dd->kregbase1 = ioremap_nocache(addr, RCV_ARRAY);
+ if (!dd->kregbase1) {
+ dd_dev_err(dd, "UC mapping of kregbase1 failed\n");
return -ENOMEM;
+ }
+ dd_dev_info(dd, "UC base1: %p for %x\n", dd->kregbase1, RCV_ARRAY);
+ dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT);
+ dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count);
+ dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8;
+
+ dd->kregbase2 = ioremap_nocache(
+ addr + dd->base2_start,
+ TXE_PIO_SEND - dd->base2_start);
+ if (!dd->kregbase2) {
+ dd_dev_err(dd, "UC mapping of kregbase2 failed\n");
+ goto nomem;
+ }
+ dd_dev_info(dd, "UC base2: %p for %x\n", dd->kregbase2,
+ TXE_PIO_SEND - dd->base2_start);
dd->piobase = ioremap_wc(addr + TXE_PIO_SEND, TXE_PIO_SIZE);
if (!dd->piobase) {
- iounmap(dd->kregbase);
- return -ENOMEM;
+ dd_dev_err(dd, "WC mapping of send buffers failed\n");
+ goto nomem;
}
+ dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE);
- dd->flags |= HFI1_PRESENT; /* now register routines work */
-
- dd->kregend = dd->kregbase + TXE_PIO_SEND;
dd->physaddr = addr; /* used for io_remap, etc. */
/*
- * Re-map the chip's RcvArray as write-combining to allow us
+ * Map the chip's RcvArray as write-combining to allow us
* to write an entire cacheline worth of entries in one shot.
- * If this re-map fails, just continue - the RcvArray programming
- * function will handle both cases.
*/
- dd->chip_rcv_array_count = read_csr(dd, RCV_ARRAY_CNT);
dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY,
dd->chip_rcv_array_count * 8);
- dd_dev_info(dd, "WC Remapped RcvArray: %p\n", dd->rcvarray_wc);
- /*
- * Save BARs and command to rewrite after device reset.
- */
- pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0);
- pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, &dd->pcibar1);
- pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom);
- pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command);
- pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl);
- pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &dd->pcie_lnkctl);
- pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2,
- &dd->pcie_devctl2);
- pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0);
- pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, &dd->pci_lnkctl3);
- pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2);
+ if (!dd->rcvarray_wc) {
+ dd_dev_err(dd, "WC mapping of receive array failed\n");
+ goto nomem;
+ }
+ dd_dev_info(dd, "WC RcvArray: %p for %x\n",
+ dd->rcvarray_wc, dd->chip_rcv_array_count * 8);
+ dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */
return 0;
+nomem:
+ ret = -ENOMEM;
+ hfi1_pcie_ddcleanup(dd);
+ return ret;
}
/*
@@ -229,59 +237,19 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
*/
void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd)
{
- u64 __iomem *base = (void __iomem *)dd->kregbase;
-
dd->flags &= ~HFI1_PRESENT;
- dd->kregbase = NULL;
- iounmap(base);
+ if (dd->kregbase1)
+ iounmap(dd->kregbase1);
+ dd->kregbase1 = NULL;
+ if (dd->kregbase2)
+ iounmap(dd->kregbase2);
+ dd->kregbase2 = NULL;
if (dd->rcvarray_wc)
iounmap(dd->rcvarray_wc);
+ dd->rcvarray_wc = NULL;
if (dd->piobase)
iounmap(dd->piobase);
-}
-
-static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt,
- struct hfi1_msix_entry *hfi1_msix_entry)
-{
- int ret;
- int nvec = *msixcnt;
- struct msix_entry *msix_entry;
- int i;
-
- /*
- * We can't pass hfi1_msix_entry array to msix_setup
- * so use a dummy msix_entry array and copy the allocated
- * irq back to the hfi1_msix_entry array.
- */
- msix_entry = kmalloc_array(nvec, sizeof(*msix_entry), GFP_KERNEL);
- if (!msix_entry) {
- ret = -ENOMEM;
- goto do_intx;
- }
-
- for (i = 0; i < nvec; i++)
- msix_entry[i] = hfi1_msix_entry[i].msix;
-
- ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec);
- if (ret < 0)
- goto free_msix_entry;
- nvec = ret;
-
- for (i = 0; i < nvec; i++)
- hfi1_msix_entry[i].msix = msix_entry[i];
-
- kfree(msix_entry);
- *msixcnt = nvec;
- return;
-
-free_msix_entry:
- kfree(msix_entry);
-
-do_intx:
- dd_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, falling back to INTx\n",
- nvec, ret);
- *msixcnt = 0;
- hfi1_enable_intx(dd->pcidev);
+ dd->piobase = NULL;
}
/* return the PCIe link speed from the given link status */
@@ -314,8 +282,14 @@ static u32 extract_width(u16 linkstat)
static void update_lbus_info(struct hfi1_devdata *dd)
{
u16 linkstat;
+ int ret;
+
+ ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat);
+ if (ret) {
+ dd_dev_err(dd, "Unable to read from PCI config\n");
+ return;
+ }
- pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat);
dd->lbus_width = extract_width(linkstat);
dd->lbus_speed = extract_speed(linkstat);
snprintf(dd->lbus_info, sizeof(dd->lbus_info),
@@ -330,6 +304,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
{
u32 linkcap;
struct pci_dev *parent = dd->pcidev->bus->self;
+ int ret;
if (!pci_is_pcie(dd->pcidev)) {
dd_dev_err(dd, "Can't find PCI Express capability!\n");
@@ -339,7 +314,12 @@ int pcie_speeds(struct hfi1_devdata *dd)
/* find if our max speed is Gen3 and parent supports Gen3 speeds */
dd->link_gen3_capable = 1;
- pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
+ ret = pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
+ if (ret) {
+ dd_dev_err(dd, "Unable to read from PCI config\n");
+ return ret;
+ }
+
if ((linkcap & PCI_EXP_LNKCAP_SLS) != GEN3_SPEED_VECTOR) {
dd_dev_info(dd,
"This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n",
@@ -364,49 +344,150 @@ int pcie_speeds(struct hfi1_devdata *dd)
}
/*
- * Returns in *nent:
- * - actual number of interrupts allocated
+ * Returns:
+ * - actual number of interrupts allocated or
* - 0 if fell back to INTx.
+ * - error
*/
-void request_msix(struct hfi1_devdata *dd, u32 *nent,
- struct hfi1_msix_entry *entry)
+int request_msix(struct hfi1_devdata *dd, u32 msireq)
{
- int pos;
+ int nvec, ret;
- pos = dd->pcidev->msix_cap;
- if (*nent && pos) {
- msix_setup(dd, pos, nent, entry);
- /* did it, either MSI-X or INTx */
- } else {
- *nent = 0;
- hfi1_enable_intx(dd->pcidev);
+ nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq,
+ PCI_IRQ_MSIX | PCI_IRQ_LEGACY);
+ if (nvec < 0) {
+ dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec);
+ return nvec;
}
- tune_pcie_caps(dd);
+ ret = tune_pcie_caps(dd);
+ if (ret) {
+ dd_dev_err(dd, "tune_pcie_caps() failed: %d\n", ret);
+ pci_free_irq_vectors(dd->pcidev);
+ return ret;
+ }
+
+ /* check for legacy IRQ */
+ if (nvec == 1 && !dd->pcidev->msix_enabled)
+ return 0;
+
+ return nvec;
}
-void hfi1_enable_intx(struct pci_dev *pdev)
+/* restore command and BARs after a reset has wiped them out */
+int restore_pci_variables(struct hfi1_devdata *dd)
{
- /* first, turn on INTx */
- pci_intx(pdev, 1);
- /* then turn off MSI-X */
- pci_disable_msix(pdev);
+ int ret = 0;
+
+ ret = pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
+ if (ret)
+ goto error;
+
+ ret = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
+ dd->pcibar0);
+ if (ret)
+ goto error;
+
+ ret = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
+ dd->pcibar1);
+ if (ret)
+ goto error;
+
+ ret = pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom);
+ if (ret)
+ goto error;
+
+ ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL,
+ dd->pcie_devctl);
+ if (ret)
+ goto error;
+
+ ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL,
+ dd->pcie_lnkctl);
+ if (ret)
+ goto error;
+
+ ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2,
+ dd->pcie_devctl2);
+ if (ret)
+ goto error;
+
+ ret = pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0);
+ if (ret)
+ goto error;
+
+ ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
+ dd->pci_lnkctl3);
+ if (ret)
+ goto error;
+
+ ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2);
+ if (ret)
+ goto error;
+
+ return 0;
+
+error:
+ dd_dev_err(dd, "Unable to write to PCI config\n");
+ return ret;
}
-/* restore command and BARs after a reset has wiped them out */
-void restore_pci_variables(struct hfi1_devdata *dd)
+/* Save BARs and command to rewrite after device reset */
+int save_pci_variables(struct hfi1_devdata *dd)
{
- pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
- pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, dd->pcibar0);
- pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, dd->pcibar1);
- pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom);
- pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, dd->pcie_devctl);
- pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, dd->pcie_lnkctl);
- pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2,
- dd->pcie_devctl2);
- pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0);
- pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, dd->pci_lnkctl3);
- pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2);
+ int ret = 0;
+
+ ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
+ &dd->pcibar0);
+ if (ret)
+ goto error;
+
+ ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
+ &dd->pcibar1);
+ if (ret)
+ goto error;
+
+ ret = pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom);
+ if (ret)
+ goto error;
+
+ ret = pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command);
+ if (ret)
+ goto error;
+
+ ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL,
+ &dd->pcie_devctl);
+ if (ret)
+ goto error;
+
+ ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL,
+ &dd->pcie_lnkctl);
+ if (ret)
+ goto error;
+
+ ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2,
+ &dd->pcie_devctl2);
+ if (ret)
+ goto error;
+
+ ret = pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0);
+ if (ret)
+ goto error;
+
+ ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
+ &dd->pci_lnkctl3);
+ if (ret)
+ goto error;
+
+ ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2);
+ if (ret)
+ goto error;
+
+ return 0;
+
+error:
+ dd_dev_err(dd, "Unable to read from PCI config\n");
+ return ret;
}
/*
@@ -421,21 +502,33 @@ uint aspm_mode = ASPM_MODE_DISABLED;
module_param_named(aspm, aspm_mode, uint, S_IRUGO);
MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
-static void tune_pcie_caps(struct hfi1_devdata *dd)
+static int tune_pcie_caps(struct hfi1_devdata *dd)
{
struct pci_dev *parent;
u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
u16 rc_mrrs, ep_mrrs, max_mrrs, ectl;
+ int ret;
/*
* Turn on extended tags in DevCtl in case the BIOS has turned it off
* to improve WFR SDMA bandwidth
*/
- pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl);
+ ret = pcie_capability_read_word(dd->pcidev,
+ PCI_EXP_DEVCTL, &ectl);
+ if (ret) {
+ dd_dev_err(dd, "Unable to read from PCI config\n");
+ return ret;
+ }
+
if (!(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
dd_dev_info(dd, "Enabling PCIe extended tags\n");
ectl |= PCI_EXP_DEVCTL_EXT_TAG;
- pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl);
+ ret = pcie_capability_write_word(dd->pcidev,
+ PCI_EXP_DEVCTL, ectl);
+ if (ret) {
+ dd_dev_err(dd, "Unable to write to PCI config\n");
+ return ret;
+ }
}
/* Find out supported and configured values for parent (root) */
parent = dd->pcidev->bus->self;
@@ -444,14 +537,14 @@ static void tune_pcie_caps(struct hfi1_devdata *dd)
* access to the upstream component.
*/
if (!parent)
- return;
+ return -EINVAL;
if (!pci_is_root_bus(parent->bus)) {
dd_dev_info(dd, "Parent not root\n");
- return;
+ return -EINVAL;
}
if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev))
- return;
+ return -EINVAL;
rc_mpss = parent->pcie_mpss;
rc_mps = ffs(pcie_get_mps(parent)) - 8;
/* Find out supported and configured values for endpoint (us) */
@@ -497,6 +590,8 @@ static void tune_pcie_caps(struct hfi1_devdata *dd)
ep_mrrs = max_mrrs;
pcie_set_readrq(dd->pcidev, ep_mrrs);
}
+
+ return 0;
}
/* End of PCIe capability tuning */
@@ -728,6 +823,7 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs,
u32 violation;
u32 i;
u8 c_minus1, c0, c_plus1;
+ int ret;
for (i = 0; i < 11; i++) {
/* set index */
@@ -739,8 +835,14 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs,
pci_write_config_dword(pdev, PCIE_CFG_REG_PL102,
eq_value(c_minus1, c0, c_plus1));
/* check if these coefficients violate EQ rules */
- pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL105,
- &violation);
+ ret = pci_read_config_dword(dd->pcidev,
+ PCIE_CFG_REG_PL105, &violation);
+ if (ret) {
+ dd_dev_err(dd, "Unable to read from PCI config\n");
+ hit_error = 1;
+ break;
+ }
+
if (violation
& PCIE_CFG_REG_PL105_GEN3_EQ_VIOLATE_COEF_RULES_SMASK){
if (hit_error == 0) {
@@ -1194,7 +1296,13 @@ retry:
* that it is Gen3 capable earlier.
*/
dd_dev_info(dd, "%s: setting parent target link speed\n", __func__);
- pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2);
+ ret = pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2);
+ if (ret) {
+ dd_dev_err(dd, "Unable to read from PCI config\n");
+ return_error = 1;
+ goto done;
+ }
+
dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
(u32)lnkctl2);
/* only write to parent if target is not as high as ours */
@@ -1203,20 +1311,37 @@ retry:
lnkctl2 |= target_vector;
dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
(u32)lnkctl2);
- pcie_capability_write_word(parent, PCI_EXP_LNKCTL2, lnkctl2);
+ ret = pcie_capability_write_word(parent,
+ PCI_EXP_LNKCTL2, lnkctl2);
+ if (ret) {
+ dd_dev_err(dd, "Unable to write to PCI config\n");
+ return_error = 1;
+ goto done;
+ }
} else {
dd_dev_info(dd, "%s: ..target speed is OK\n", __func__);
}
dd_dev_info(dd, "%s: setting target link speed\n", __func__);
- pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2);
+ ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2);
+ if (ret) {
+ dd_dev_err(dd, "Unable to read from PCI config\n");
+ return_error = 1;
+ goto done;
+ }
+
dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
(u32)lnkctl2);
lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
lnkctl2 |= target_vector;
dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
(u32)lnkctl2);
- pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2);
+ ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2);
+ if (ret) {
+ dd_dev_err(dd, "Unable to write to PCI config\n");
+ return_error = 1;
+ goto done;
+ }
/* step 5h: arm gasket logic */
/* hold DC in reset across the SBR */
@@ -1266,7 +1391,14 @@ retry:
/* restore PCI space registers we know were reset */
dd_dev_info(dd, "%s: calling restore_pci_variables\n", __func__);
- restore_pci_variables(dd);
+ ret = restore_pci_variables(dd);
+ if (ret) {
+ dd_dev_err(dd, "%s: Could not restore PCI variables\n",
+ __func__);
+ return_error = 1;
+ goto done;
+ }
+
/* restore firmware control */
write_csr(dd, MISC_CFG_FW_CTRL, fw_ctrl);
@@ -1296,7 +1428,13 @@ retry:
setextled(dd, 0);
/* check for any per-lane errors */
- pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, &reg32);
+ ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, &reg32);
+ if (ret) {
+ dd_dev_err(dd, "Unable to read from PCI config\n");
+ return_error = 1;
+ goto done;
+ }
+
dd_dev_info(dd, "%s: per-lane errors: 0x%x\n", __func__, reg32);
/* extract status, look for our HFI */
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index ed72b5a..7108a4b 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1012,7 +1012,7 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
"%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
__func__, sc->sw_index,
sc->hw_context, (u32)reg);
- queue_work(dd->pport->hfi1_wq,
+ queue_work(dd->pport->link_wq,
&dd->pport->link_bounce_work);
break;
}
@@ -1568,7 +1568,8 @@ static void sc_piobufavail(struct send_context *sc)
struct rvt_qp *qp;
struct hfi1_qp_priv *priv;
unsigned long flags;
- unsigned i, n = 0;
+ uint i, n = 0, max_idx = 0;
+ u8 max_starved_cnt = 0;
if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
dd->send_contexts[sc->sw_index].type != SC_VL15)
@@ -1591,6 +1592,7 @@ static void sc_piobufavail(struct send_context *sc)
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
+ iowait_starve_find_max(wait, &max_starved_cnt, n, &max_idx);
/* refcount held until actual wake up */
qps[n++] = qp;
}
@@ -1605,9 +1607,14 @@ static void sc_piobufavail(struct send_context *sc)
}
write_sequnlock_irqrestore(&dev->iowait_lock, flags);
- for (i = 0; i < n; i++)
- hfi1_qp_wakeup(qps[i],
+ /* Wake up the most starved one first */
+ if (n)
+ hfi1_qp_wakeup(qps[max_idx],
RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
+ for (i = 0; i < n; i++)
+ if (i != max_idx)
+ hfi1_qp_wakeup(qps[i],
+ RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
}
/* translate a send credit update to a bit code of reasons */
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 838fe84..5c38a24 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -58,8 +58,13 @@ static int validate_scratch_checksum(struct hfi1_devdata *dd)
version = (temp_scratch & BITMAP_VERSION_SMASK) >> BITMAP_VERSION_SHIFT;
/* Prevent power on default of all zeroes from passing checksum */
- if (!version)
+ if (!version) {
+ dd_dev_err(dd, "%s: Config bitmap uninitialized\n", __func__);
+ dd_dev_err(dd,
+ "%s: Please update your BIOS to support active channels\n",
+ __func__);
return 0;
+ }
/*
* ASIC scratch 0 only contains the checksum and bitmap version as
@@ -84,6 +89,8 @@ static int validate_scratch_checksum(struct hfi1_devdata *dd)
if (checksum + temp_scratch == 0xFFFF)
return 1;
+
+ dd_dev_err(dd, "%s: Configuration bitmap corrupted\n", __func__);
return 0;
}
@@ -136,7 +143,6 @@ static void save_platform_config_fields(struct hfi1_devdata *dd)
void get_platform_config(struct hfi1_devdata *dd)
{
int ret = 0;
- unsigned long size = 0;
u8 *temp_platform_config = NULL;
u32 esize;
@@ -145,11 +151,6 @@ void get_platform_config(struct hfi1_devdata *dd)
save_platform_config_fields(dd);
return;
}
- dd_dev_err(dd, "%s: Config bitmap corrupted/uninitialized\n",
- __func__);
- dd_dev_err(dd,
- "%s: Please update your BIOS to support active channels\n",
- __func__);
} else {
ret = eprom_read_platform_config(dd,
(void **)&temp_platform_config,
@@ -160,15 +161,6 @@ void get_platform_config(struct hfi1_devdata *dd)
dd->platform_config.size = esize;
return;
}
- /* fail, try EFI variable */
-
- ret = read_hfi1_efi_var(dd, "configuration", &size,
- (void **)&temp_platform_config);
- if (!ret) {
- dd->platform_config.data = temp_platform_config;
- dd->platform_config.size = size;
- return;
- }
}
dd_dev_err(dd,
"%s: Failed to get platform config, falling back to sub-optimal default file\n",
@@ -242,7 +234,7 @@ static int qual_power(struct hfi1_pportdata *ppd)
if (ppd->offline_disabled_reason ==
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) {
- dd_dev_info(
+ dd_dev_err(
ppd->dd,
"%s: Port disabled due to system power restrictions\n",
__func__);
@@ -268,7 +260,7 @@ static int qual_bitrate(struct hfi1_pportdata *ppd)
if (ppd->offline_disabled_reason ==
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY)) {
- dd_dev_info(
+ dd_dev_err(
ppd->dd,
"%s: Cable failed bitrate check, disabling port\n",
__func__);
@@ -709,15 +701,15 @@ static void apply_tunings(
ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
GENERAL_CONFIG, config_data);
if (ret != HCMD_SUCCESS)
- dd_dev_info(ppd->dd,
- "%s: Failed set ext device config params\n",
- __func__);
+ dd_dev_err(ppd->dd,
+ "%s: Failed set ext device config params\n",
+ __func__);
}
if (tx_preset_index == OPA_INVALID_INDEX) {
if (ppd->port_type == PORT_TYPE_QSFP && limiting_active)
- dd_dev_info(ppd->dd, "%s: Invalid Tx preset index\n",
- __func__);
+ dd_dev_err(ppd->dd, "%s: Invalid Tx preset index\n",
+ __func__);
return;
}
@@ -900,7 +892,7 @@ static int tune_qsfp(struct hfi1_pportdata *ppd,
case 0xD: /* fallthrough */
case 0xF:
default:
- dd_dev_info(ppd->dd, "%s: Unknown/unsupported cable\n",
+ dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n",
__func__);
break;
}
@@ -942,7 +934,7 @@ void tune_serdes(struct hfi1_pportdata *ppd)
case PORT_TYPE_DISCONNECTED:
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_DISCONNECTED);
- dd_dev_info(dd, "%s: Port disconnected, disabling port\n",
+ dd_dev_warn(dd, "%s: Port disconnected, disabling port\n",
__func__);
goto bail;
case PORT_TYPE_FIXED:
@@ -1027,7 +1019,7 @@ void tune_serdes(struct hfi1_pportdata *ppd)
}
break;
default:
- dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__);
+ dd_dev_warn(ppd->dd, "%s: Unknown port type\n", __func__);
ppd->port_type = PORT_TYPE_UNKNOWN;
tuning_method = OPA_UNKNOWN_TUNING;
total_atten = 0;
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 1a7af9f..b801d84 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -68,17 +68,12 @@ static int iowait_sleep(
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *stx,
- unsigned seq);
+ unsigned int seq,
+ bool pkts_sent);
static void iowait_wakeup(struct iowait *wait, int reason);
static void iowait_sdma_drained(struct iowait *wait);
static void qp_pio_drain(struct rvt_qp *qp);
-static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
- struct rvt_qpn_map *map, unsigned off)
-{
- return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
-}
-
const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
[IB_WR_RDMA_WRITE] = {
.length = sizeof(struct ib_rdma_wr),
@@ -377,7 +372,8 @@ static int iowait_sleep(
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *stx,
- unsigned seq)
+ uint seq,
+ bool pkts_sent)
{
struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq);
struct rvt_qp *qp;
@@ -408,7 +404,8 @@ static int iowait_sleep(
ibp->rvp.n_dmawait++;
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
- list_add_tail(&priv->s_iowait.list, &sde->dmawait);
+ iowait_queue(pkts_sent, &priv->s_iowait,
+ &sde->dmawait);
priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
rvt_get_qp(qp);
@@ -607,7 +604,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
send_context = qp_to_send_context(qp, priv->s_sc);
seq_printf(s,
- "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n",
+ "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n",
iter->n,
qp_idle(qp) ? "I" : "B",
qp->ibqp.qp_num,
@@ -630,6 +627,10 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
qp->s_last, qp->s_acked, qp->s_cur,
qp->s_tail, qp->s_head, qp->s_size,
qp->s_avail,
+ /* ack_queue ring pointers, size */
+ qp->s_tail_ack_queue, qp->r_head_ack_queue,
+ HFI1_MAX_RDMA_ATOMIC,
+ /* remote QP info */
qp->remote_qpn,
rdma_ah_get_dlid(&qp->remote_ah_attr),
rdma_ah_get_sl(&qp->remote_ah_attr),
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 1080778..baa67bf 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -765,7 +765,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
ohdr->u.aeth = rvt_compute_aeth(qp);
sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT);
+ pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr)
& 0xf) << 4;
hdr.lrh[0] = cpu_to_be16(lrh0);
@@ -798,7 +798,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
goto queue_ack;
}
- trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr);
+ trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
+ &hdr, ib_is_sc5(sc5));
/* write the pbc and data */
ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords);
@@ -1009,7 +1010,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
return;
}
- psn = be32_to_cpu(ohdr->bth[2]);
+ psn = ib_bth_get_psn(ohdr);
reset_sending_psn(qp, psn);
/*
@@ -1915,17 +1916,16 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
void hfi1_rc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
- struct ib_header *hdr = packet->hdr;
- u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct ib_other_headers *ohdr = packet->ohdr;
- u32 bth0, opcode;
+ u32 bth0;
+ u32 opcode = packet->opcode;
u32 hdrsize = packet->hlen;
u32 psn;
- u32 pad;
+ u32 pad = packet->pad;
struct ib_wc wc;
u32 pmtu = qp->pmtu;
int diff;
@@ -1937,14 +1937,13 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
u32 rkey;
lockdep_assert_held(&qp->r_lock);
+
bth0 = be32_to_cpu(ohdr->bth[0]);
- if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
+ if (hfi1_ruc_check_hdr(ibp, packet))
return;
is_fecn = process_ecn(qp, packet, false);
-
- psn = be32_to_cpu(ohdr->bth[2]);
- opcode = ib_bth_get_opcode(ohdr);
+ psn = ib_bth_get_psn(ohdr);
/*
* Process responses (ACKs) before anything else. Note that the
@@ -2074,8 +2073,6 @@ no_immediate_data:
wc.wc_flags = 0;
wc.ex.imm_data = 0;
send_last:
- /* Get the number of bytes the message was padded by. */
- pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -2368,28 +2365,19 @@ send_ack:
void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
- struct ib_header *hdr,
- u32 rcv_flags,
+ struct hfi1_packet *packet,
struct rvt_qp *qp)
{
- int has_grh = rcv_flags & HFI1_HAS_GRH;
- struct ib_other_headers *ohdr;
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
int diff;
u32 opcode;
- u32 psn, bth0;
-
- /* Check for GRH */
- ohdr = &hdr->u.oth;
- if (has_grh)
- ohdr = &hdr->u.l.oth;
+ u32 psn;
- bth0 = be32_to_cpu(ohdr->bth[0]);
- if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0))
+ if (hfi1_ruc_check_hdr(ibp, packet))
return;
- psn = be32_to_cpu(ohdr->bth[2]);
- opcode = ib_bth_get_opcode(ohdr);
+ psn = ib_bth_get_psn(packet->ohdr);
+ opcode = ib_bth_get_opcode(packet->ohdr);
/* Only deal with RDMA Writes for now */
if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 3a17dab..4afa00f 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -74,8 +74,10 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
if (wqe->sg_list[i].length == 0)
continue;
/* Check LKEY */
- if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
- &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+ ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
+ NULL, &wqe->sg_list[i],
+ IB_ACCESS_LOCAL_WRITE);
+ if (unlikely(ret <= 0))
goto bad_lkey;
qp->r_len += wqe->sg_list[i].length;
j++;
@@ -214,100 +216,95 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
*
* The s_lock will be acquired around the hfi1_migrate_qp() call.
*/
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
- int has_grh, struct rvt_qp *qp, u32 bth0)
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet)
{
__be64 guid;
unsigned long flags;
+ struct rvt_qp *qp = packet->qp;
u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
-
- if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
- if (!has_grh) {
+ u32 dlid = packet->dlid;
+ u32 slid = packet->slid;
+ u32 sl = packet->sl;
+ int migrated;
+ u32 bth0, bth1;
+
+ bth0 = be32_to_cpu(packet->ohdr->bth[0]);
+ bth1 = be32_to_cpu(packet->ohdr->bth[1]);
+ migrated = bth0 & IB_BTH_MIG_REQ;
+
+ if (qp->s_mig_state == IB_MIG_ARMED && migrated) {
+ if (!packet->grh) {
if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
IB_AH_GRH)
- goto err;
+ return 1;
} else {
const struct ib_global_route *grh;
if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
IB_AH_GRH))
- goto err;
+ return 1;
grh = rdma_ah_read_grh(&qp->alt_ah_attr);
guid = get_sguid(ibp, grh->sgid_index);
- if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
+ if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix,
guid))
- goto err;
+ return 1;
if (!gid_ok(
- &hdr->u.l.grh.sgid,
+ &packet->grh->sgid,
grh->dgid.global.subnet_prefix,
grh->dgid.global.interface_id))
- goto err;
+ return 1;
}
- if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
- ib_get_slid(hdr)))) {
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
- (u16)bth0,
- ib_get_sl(hdr),
- 0, qp->ibqp.qp_num,
- ib_get_slid(hdr),
- ib_get_dlid(hdr));
- goto err;
+ if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
+ sc5, slid))) {
+ hfi1_bad_pkey(ibp, (u16)bth0, sl,
+ 0, qp->ibqp.qp_num, slid, dlid);
+ return 1;
}
/* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
- if (ib_get_slid(hdr) !=
- rdma_ah_get_dlid(&qp->alt_ah_attr) ||
+ if (slid != rdma_ah_get_dlid(&qp->alt_ah_attr) ||
ppd_from_ibp(ibp)->port !=
rdma_ah_get_port_num(&qp->alt_ah_attr))
- goto err;
+ return 1;
spin_lock_irqsave(&qp->s_lock, flags);
hfi1_migrate_qp(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else {
- if (!has_grh) {
+ if (!packet->grh) {
if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
IB_AH_GRH)
- goto err;
+ return 1;
} else {
const struct ib_global_route *grh;
if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
IB_AH_GRH))
- goto err;
+ return 1;
grh = rdma_ah_read_grh(&qp->remote_ah_attr);
guid = get_sguid(ibp, grh->sgid_index);
- if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
+ if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix,
guid))
- goto err;
+ return 1;
if (!gid_ok(
- &hdr->u.l.grh.sgid,
+ &packet->grh->sgid,
grh->dgid.global.subnet_prefix,
grh->dgid.global.interface_id))
- goto err;
+ return 1;
}
- if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
- ib_get_slid(hdr)))) {
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
- (u16)bth0,
- ib_get_sl(hdr),
- 0, qp->ibqp.qp_num,
- ib_get_slid(hdr),
- ib_get_dlid(hdr));
- goto err;
+ if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
+ sc5, slid))) {
+ hfi1_bad_pkey(ibp, (u16)bth0, sl,
+ 0, qp->ibqp.qp_num, slid, dlid);
+ return 1;
}
/* Validate the SLID. See Ch. 9.6.1.5 */
- if (ib_get_slid(hdr) !=
- rdma_ah_get_dlid(&qp->remote_ah_attr) ||
+ if ((slid != rdma_ah_get_dlid(&qp->remote_ah_attr)) ||
ppd_from_ibp(ibp)->port != qp->port_num)
- goto err;
- if (qp->s_mig_state == IB_MIG_REARM &&
- !(bth0 & IB_BTH_MIG_REQ))
+ return 1;
+ if (qp->s_mig_state == IB_MIG_REARM && !migrated)
qp->s_mig_state = IB_MIG_ARMED;
}
return 0;
-
-err:
- return 1;
}
/**
@@ -816,6 +813,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
static bool schedule_send_yield(struct rvt_qp *qp,
struct hfi1_pkt_state *ps)
{
+ ps->pkts_sent = true;
+
if (unlikely(time_after(jiffies, ps->timeout))) {
if (!ps->in_thread ||
workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) {
@@ -912,6 +911,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
ps.timeout = jiffies + ps.timeout_int;
ps.cpu = priv->s_sde ? priv->s_sde->cpu :
cpumask_first(cpumask_of_node(ps.ppd->dd->node));
+ ps.pkts_sent = false;
/* insure a pre-built packet is handled */
ps.s_txreq = get_waiting_verbs_txreq(qp);
@@ -934,7 +934,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
spin_lock_irqsave(&qp->s_lock, ps.flags);
}
} while (make_req(qp, &ps));
-
+ iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
}
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index bfd0d51..6781bcd 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -246,7 +246,7 @@ static void __sdma_process_event(
enum sdma_events event);
static void dump_sdma_state(struct sdma_engine *sde);
static void sdma_make_progress(struct sdma_engine *sde, u64 status);
-static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail);
+static void sdma_desc_avail(struct sdma_engine *sde, uint avail);
static void sdma_flush_descq(struct sdma_engine *sde);
/**
@@ -325,7 +325,7 @@ static void sdma_wait_for_packet_egress(struct sdma_engine *sde,
/* timed out - bounce the link */
dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n",
__func__, sde->this_idx, (u32)reg);
- queue_work(dd->pport->hfi1_wq,
+ queue_work(dd->pport->link_wq,
&dd->pport->link_bounce_work);
break;
}
@@ -1340,10 +1340,8 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
* @dd: hfi1_devdata
* @port: port number (currently only zero)
*
- * sdma_init initializes the specified number of engines.
- *
- * The code initializes each sde, its csrs. Interrupts
- * are not required to be enabled.
+ * Initializes each sde and its csrs.
+ * Interrupts are not required to be enabled.
*
* Returns:
* 0 - success, -errno on failure
@@ -1764,13 +1762,14 @@ retry:
*
* This is called with head_lock held.
*/
-static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail)
+static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
{
struct iowait *wait, *nw;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
- unsigned i, n = 0, seq;
+ uint i, n = 0, seq, max_idx = 0;
struct sdma_txreq *stx;
struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
+ u8 max_starved_cnt = 0;
#ifdef CONFIG_SDMA_VERBOSITY
dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
@@ -1805,6 +1804,9 @@ static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail)
if (num_desc > avail)
break;
avail -= num_desc;
+ /* Find the most starved wait memeber */
+ iowait_starve_find_max(wait, &max_starved_cnt,
+ n, &max_idx);
list_del_init(&wait->list);
waits[n++] = wait;
}
@@ -1813,8 +1815,13 @@ static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail)
}
} while (read_seqretry(&dev->iowait_lock, seq));
+ /* Schedule the most starved one first */
+ if (n)
+ waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON);
+
for (i = 0; i < n; i++)
- waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
+ if (i != max_idx)
+ waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
}
/* head_lock must be held */
@@ -2351,7 +2358,8 @@ static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
static int sdma_check_progress(
struct sdma_engine *sde,
struct iowait *wait,
- struct sdma_txreq *tx)
+ struct sdma_txreq *tx,
+ bool pkts_sent)
{
int ret;
@@ -2364,7 +2372,7 @@ static int sdma_check_progress(
seq = raw_seqcount_begin(
(const seqcount_t *)&sde->head_lock.seqcount);
- ret = wait->sleep(sde, wait, tx, seq);
+ ret = wait->sleep(sde, wait, tx, seq, pkts_sent);
if (ret == -EAGAIN)
sde->desc_avail = sdma_descq_freecnt(sde);
} else {
@@ -2378,6 +2386,7 @@ static int sdma_check_progress(
* @sde: sdma engine to use
* @wait: wait structure to use when full (may be NULL)
* @tx: sdma_txreq to submit
+ * @pkts_sent: has any packet been sent yet?
*
* The call submits the tx into the ring. If a iowait structure is non-NULL
* the packet will be queued to the list in wait.
@@ -2389,7 +2398,8 @@ static int sdma_check_progress(
*/
int sdma_send_txreq(struct sdma_engine *sde,
struct iowait *wait,
- struct sdma_txreq *tx)
+ struct sdma_txreq *tx,
+ bool pkts_sent)
{
int ret = 0;
u16 tail;
@@ -2431,7 +2441,7 @@ unlock_noconn:
ret = -ECOMM;
goto unlock;
nodesc:
- ret = sdma_check_progress(sde, wait, tx);
+ ret = sdma_check_progress(sde, wait, tx, pkts_sent);
if (ret == -EAGAIN) {
ret = 0;
goto retry;
@@ -2500,8 +2510,10 @@ retry:
}
update_tail:
total_count = submit_count + flush_count;
- if (wait)
+ if (wait) {
iowait_sdma_add(wait, total_count);
+ iowait_starve_clear(submit_count > 0, wait);
+ }
if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail);
spin_unlock_irqrestore(&sde->tail_lock, flags);
@@ -2529,7 +2541,7 @@ unlock_noconn:
ret = -ECOMM;
goto update_tail;
nodesc:
- ret = sdma_check_progress(sde, wait, tx);
+ ret = sdma_check_progress(sde, wait, tx, submit_count > 0);
if (ret == -EAGAIN) {
ret = 0;
goto retry;
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 64f10b8..107011d 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -852,7 +852,8 @@ struct iowait;
int sdma_send_txreq(struct sdma_engine *sde,
struct iowait *wait,
- struct sdma_txreq *tx);
+ struct sdma_txreq *tx,
+ bool pkts_sent);
int sdma_send_txlist(struct sdma_engine *sde,
struct iowait *wait,
struct list_head *tx_list,
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index eafae48..b80b74d 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -47,7 +47,7 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
-u8 ibhdr_exhdr_len(struct ib_header *hdr)
+u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr)
{
struct ib_other_headers *ohdr;
u8 opcode;
@@ -61,13 +61,18 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr)
0 : hdr_len_by_opcode[opcode] - (12 + 8);
}
-#define IMM_PRN "imm %d"
-#define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x"
-#define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x"
-#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x"
-#define IETH_PRN "ieth rkey 0x%.8x"
-#define ATOMICACKETH_PRN "origdata %llx"
-#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx"
+const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet)
+{
+ return "IB";
+}
+
+#define IMM_PRN "imm:%d"
+#define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x"
+#define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x"
+#define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x"
+#define IETH_PRN "ieth rkey:0x%.8x"
+#define ATOMICACKETH_PRN "origdata:%llx"
+#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
#define OP(transport, op) IB_OPCODE_## transport ## _ ## op
@@ -84,6 +89,43 @@ static const char *parse_syndrome(u8 syndrome)
return "";
}
+void hfi1_trace_parse_bth(struct ib_other_headers *ohdr,
+ u8 *ack, u8 *becn, u8 *fecn, u8 *mig,
+ u8 *se, u8 *pad, u8 *opcode, u8 *tver,
+ u16 *pkey, u32 *psn, u32 *qpn)
+{
+ *ack = ib_bth_get_ackreq(ohdr);
+ *becn = ib_bth_get_becn(ohdr);
+ *fecn = ib_bth_get_fecn(ohdr);
+ *mig = ib_bth_get_migreq(ohdr);
+ *se = ib_bth_get_se(ohdr);
+ *pad = ib_bth_get_pad(ohdr);
+ *opcode = ib_bth_get_opcode(ohdr);
+ *tver = ib_bth_get_tver(ohdr);
+ *pkey = ib_bth_get_pkey(ohdr);
+ *psn = ib_bth_get_psn(ohdr);
+ *qpn = ib_bth_get_qpn(ohdr);
+}
+
+void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5,
+ struct ib_other_headers **ohdr,
+ u8 *lnh, u8 *lver, u8 *sl, u8 *sc,
+ u16 *len, u32 *dlid, u32 *slid)
+{
+ *lnh = ib_get_lnh(hdr);
+ *lver = ib_get_lver(hdr);
+ *sl = ib_get_sl(hdr);
+ *sc = ib_get_sc(hdr) | (sc5 << 4);
+ *len = ib_get_len(hdr);
+ *dlid = ib_get_dlid(hdr);
+ *slid = ib_get_slid(hdr);
+
+ if (*lnh == HFI1_LRH_BTH)
+ *ohdr = &hdr->u.oth;
+ else
+ *ohdr = &hdr->u.l.oth;
+}
+
const char *parse_everbs_hdrs(
struct trace_seq *p,
u8 opcode,
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index 090f6b5..0f2d2da 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -55,8 +55,57 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_ibhdrs
-u8 ibhdr_exhdr_len(struct ib_header *hdr);
+#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode }
+#define show_ib_opcode(opcode) \
+__print_symbolic(opcode, \
+ ib_opcode_name(RC_SEND_FIRST), \
+ ib_opcode_name(RC_SEND_MIDDLE), \
+ ib_opcode_name(RC_SEND_LAST), \
+ ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_SEND_ONLY), \
+ ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_RDMA_WRITE_FIRST), \
+ ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \
+ ib_opcode_name(RC_RDMA_WRITE_LAST), \
+ ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_RDMA_WRITE_ONLY), \
+ ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_RDMA_READ_REQUEST), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \
+ ib_opcode_name(RC_ACKNOWLEDGE), \
+ ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
+ ib_opcode_name(RC_COMPARE_SWAP), \
+ ib_opcode_name(RC_FETCH_ADD), \
+ ib_opcode_name(UC_SEND_FIRST), \
+ ib_opcode_name(UC_SEND_MIDDLE), \
+ ib_opcode_name(UC_SEND_LAST), \
+ ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(UC_SEND_ONLY), \
+ ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(UC_RDMA_WRITE_FIRST), \
+ ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \
+ ib_opcode_name(UC_RDMA_WRITE_LAST), \
+ ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(UC_RDMA_WRITE_ONLY), \
+ ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(UD_SEND_ONLY), \
+ ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(CNP))
+
const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
+u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr);
+const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet);
+void hfi1_trace_parse_bth(struct ib_other_headers *ohdr,
+ u8 *ack, u8 *becn, u8 *fecn, u8 *mig,
+ u8 *se, u8 *pad, u8 *opcode, u8 *tver,
+ u16 *pkey, u32 *psn, u32 *qpn);
+void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5,
+ struct ib_other_headers **ohdr,
+ u8 *lnh, u8 *lver, u8 *sl, u8 *sc,
+ u16 *len, u32 *dlid, u32 *slid);
#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
@@ -66,139 +115,198 @@ __print_symbolic(lrh, \
lrh_name(LRH_BTH), \
lrh_name(LRH_GRH))
-#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x"
+#define LRH_PRN "len:%d sc:%d dlid:0x%.4x slid:0x%.4x"
+#define LRH_9B_PRN "lnh:%d,%s lver:%d sl:%d "
#define BTH_PRN \
- "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \
- "f %d b %d qpn 0x%.6x a %d psn 0x%.8x"
-#define EHDR_PRN "%s"
+ "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d pkey:0x%.4x " \
+ "f:%d b:%d qpn:0x%.6x a:%d psn:0x%.8x"
+#define EHDR_PRN "hlen:%d %s"
-DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
+DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template,
TP_PROTO(struct hfi1_devdata *dd,
- struct ib_header *hdr),
- TP_ARGS(dd, hdr),
+ struct hfi1_packet *packet,
+ bool sc5),
+ TP_ARGS(dd, packet, sc5),
TP_STRUCT__entry(
DD_DEV_ENTRY(dd)
- /* LRH */
- __field(u8, vl)
+ __field(u8, lnh)
__field(u8, lver)
__field(u8, sl)
+ __field(u16, len)
+ __field(u32, dlid)
+ __field(u8, sc)
+ __field(u32, slid)
+ __field(u8, opcode)
+ __field(u8, se)
+ __field(u8, mig)
+ __field(u8, pad)
+ __field(u8, tver)
+ __field(u16, pkey)
+ __field(u8, fecn)
+ __field(u8, becn)
+ __field(u32, qpn)
+ __field(u8, ack)
+ __field(u32, psn)
+ /* extended headers */
+ __dynamic_array(u8, ehdrs,
+ hfi1_trace_ib_hdr_len(packet->hdr))
+ ),
+ TP_fast_assign(
+ struct ib_other_headers *ohdr;
+
+ DD_DEV_ASSIGN(dd);
+
+ hfi1_trace_parse_9b_hdr(packet->hdr, sc5,
+ &ohdr,
+ &__entry->lnh,
+ &__entry->lver,
+ &__entry->sl,
+ &__entry->sc,
+ &__entry->len,
+ &__entry->dlid,
+ &__entry->slid);
+
+ hfi1_trace_parse_bth(ohdr, &__entry->ack,
+ &__entry->becn, &__entry->fecn,
+ &__entry->mig, &__entry->se,
+ &__entry->pad, &__entry->opcode,
+ &__entry->tver, &__entry->pkey,
+ &__entry->psn, &__entry->qpn);
+ /* extended headers */
+ memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
+ __get_dynamic_array_len(ehdrs));
+ ),
+ TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " "
+ BTH_PRN " " EHDR_PRN,
+ __get_str(dev),
+ __entry->len,
+ __entry->sc,
+ __entry->dlid,
+ __entry->slid,
+ __entry->lnh, show_lnh(__entry->lnh),
+ __entry->lver,
+ __entry->sl,
+ /* BTH */
+ __entry->opcode, show_ib_opcode(__entry->opcode),
+ __entry->se,
+ __entry->mig,
+ __entry->pad,
+ __entry->tver,
+ __entry->pkey,
+ __entry->fecn,
+ __entry->becn,
+ __entry->qpn,
+ __entry->ack,
+ __entry->psn,
+ /* extended headers */
+ __get_dynamic_array_len(ehdrs),
+ __parse_ib_ehdrs(
+ __entry->opcode,
+ (void *)__get_dynamic_array(ehdrs))
+ )
+);
+
+DEFINE_EVENT(hfi1_input_ibhdr_template, input_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct hfi1_packet *packet, bool sc5),
+ TP_ARGS(dd, packet, sc5));
+
+DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct ib_header *hdr,
+ bool sc5),
+ TP_ARGS(dd, hdr, sc5),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd)
__field(u8, lnh)
- __field(u16, dlid)
+ __field(u8, lver)
+ __field(u8, sl)
__field(u16, len)
- __field(u16, slid)
- /* BTH */
+ __field(u32, dlid)
+ __field(u8, sc)
+ __field(u32, slid)
__field(u8, opcode)
__field(u8, se)
- __field(u8, m)
+ __field(u8, mig)
__field(u8, pad)
__field(u8, tver)
__field(u16, pkey)
- __field(u8, f)
- __field(u8, b)
+ __field(u8, fecn)
+ __field(u8, becn)
__field(u32, qpn)
- __field(u8, a)
+ __field(u8, ack)
__field(u32, psn)
/* extended headers */
- __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
+ __dynamic_array(u8, ehdrs,
+ hfi1_trace_ib_hdr_len(hdr))
),
- TP_fast_assign(
+ TP_fast_assign(
struct ib_other_headers *ohdr;
DD_DEV_ASSIGN(dd);
- /* LRH */
- __entry->vl =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
- __entry->lver =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
- __entry->sl =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
- __entry->lnh =
- (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
- __entry->dlid =
- be16_to_cpu(hdr->lrh[1]);
- /* allow for larger len */
- __entry->len =
- be16_to_cpu(hdr->lrh[2]);
- __entry->slid =
- be16_to_cpu(hdr->lrh[3]);
- /* BTH */
- if (__entry->lnh == HFI1_LRH_BTH)
- ohdr = &hdr->u.oth;
- else
- ohdr = &hdr->u.l.oth;
- __entry->opcode =
- (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
- __entry->se =
- (be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
- __entry->m =
- (be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
- __entry->pad =
- (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
- __entry->tver =
- (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
- __entry->pkey =
- be32_to_cpu(ohdr->bth[0]) & 0xffff;
- __entry->f =
- (be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) &
- IB_FECN_MASK;
- __entry->b =
- (be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) &
- IB_BECN_MASK;
- __entry->qpn =
- be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
- __entry->a =
- (be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
- /* allow for larger PSN */
- __entry->psn =
- be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
+
+ hfi1_trace_parse_9b_hdr(hdr, sc5,
+ &ohdr, &__entry->lnh,
+ &__entry->lver, &__entry->sl,
+ &__entry->sc, &__entry->len,
+ &__entry->dlid, &__entry->slid);
+
+ hfi1_trace_parse_bth(ohdr, &__entry->ack,
+ &__entry->becn, &__entry->fecn,
+ &__entry->mig, &__entry->se,
+ &__entry->pad, &__entry->opcode,
+ &__entry->tver, &__entry->pkey,
+ &__entry->psn, &__entry->qpn);
+
/* extended headers */
- memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
- ibhdr_exhdr_len(hdr));
- ),
- TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
- __get_str(dev),
- /* LRH */
- __entry->vl,
- __entry->lver,
- __entry->sl,
- __entry->lnh, show_lnh(__entry->lnh),
- __entry->dlid,
- __entry->len,
- __entry->slid,
- /* BTH */
- __entry->opcode, show_ib_opcode(__entry->opcode),
- __entry->se,
- __entry->m,
- __entry->pad,
- __entry->tver,
- __entry->pkey,
- __entry->f,
- __entry->b,
- __entry->qpn,
- __entry->a,
- __entry->psn,
- /* extended headers */
- __parse_ib_ehdrs(
- __entry->opcode,
- (void *)__get_dynamic_array(ehdrs))
- )
+ memcpy(__get_dynamic_array(ehdrs),
+ &ohdr->u, __get_dynamic_array_len(ehdrs));
+ ),
+ TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " "
+ BTH_PRN " " EHDR_PRN,
+ __get_str(dev),
+ __entry->len,
+ __entry->sc,
+ __entry->dlid,
+ __entry->slid,
+ __entry->lnh, show_lnh(__entry->lnh),
+ __entry->lver,
+ __entry->sl,
+ /* BTH */
+ __entry->opcode, show_ib_opcode(__entry->opcode),
+ __entry->se,
+ __entry->mig,
+ __entry->pad,
+ __entry->tver,
+ __entry->pkey,
+ __entry->fecn,
+ __entry->becn,
+ __entry->qpn,
+ __entry->ack,
+ __entry->psn,
+ /* extended headers */
+ __get_dynamic_array_len(ehdrs),
+ __parse_ib_ehdrs(
+ __entry->opcode,
+ (void *)__get_dynamic_array(ehdrs))
+ )
);
-DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
- TP_ARGS(dd, hdr));
+DEFINE_EVENT(hfi1_output_ibhdr_template, pio_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct ib_header *hdr, bool sc5),
+ TP_ARGS(dd, hdr, sc5));
-DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
- TP_ARGS(dd, hdr));
+DEFINE_EVENT(hfi1_output_ibhdr_template, ack_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct ib_header *hdr, bool sc5),
+ TP_ARGS(dd, hdr, sc5));
-DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
- TP_ARGS(dd, hdr));
+DEFINE_EVENT(hfi1_output_ibhdr_template, sdma_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct ib_header *hdr, bool sc5),
+ TP_ARGS(dd, hdr, sc5));
-DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
- TP_ARGS(dd, hdr));
#endif /* __HFI1_TRACE_IBHDRS_H */
diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h
index deac77d..8db2253 100644
--- a/drivers/infiniband/hw/hfi1/trace_misc.h
+++ b/drivers/infiniband/hw/hfi1/trace_misc.h
@@ -72,6 +72,26 @@ TRACE_EVENT(hfi1_interrupt,
__entry->src)
);
+DECLARE_EVENT_CLASS(
+ hfi1_csr_template,
+ TP_PROTO(void __iomem *addr, u64 value),
+ TP_ARGS(addr, value),
+ TP_STRUCT__entry(
+ __field(void __iomem *, addr)
+ __field(u64, value)
+ ),
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->value = value;
+ ),
+ TP_printk("addr %p value %llx", __entry->addr, __entry->value)
+);
+
+DEFINE_EVENT(
+ hfi1_csr_template, hfi1_write_rcvarray,
+ TP_PROTO(void __iomem *addr, u64 value),
+ TP_ARGS(addr, value));
+
#ifdef CONFIG_FAULT_INJECTION
TRACE_EVENT(hfi1_fault_opcode,
TP_PROTO(struct rvt_qp *qp, u8 opcode),
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index f77e59f..bebf0a8 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -52,9 +52,25 @@
#include "hfi.h"
+#define tidtype_name(type) { PT_##type, #type }
+#define show_tidtype(type) \
+__print_symbolic(type, \
+ tidtype_name(EXPECTED), \
+ tidtype_name(EAGER), \
+ tidtype_name(INVALID)) \
+
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_rx
+#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype }
+#define show_packettype(etype) \
+__print_symbolic(etype, \
+ packettype_name(EXPECTED), \
+ packettype_name(EAGER), \
+ packettype_name(IB), \
+ packettype_name(ERROR), \
+ packettype_name(BYPASS))
+
TRACE_EVENT(hfi1_rcvhdr,
TP_PROTO(struct hfi1_devdata *dd,
u32 ctxt,
@@ -98,7 +114,7 @@ TRACE_EVENT(hfi1_rcvhdr,
);
TRACE_EVENT(hfi1_receive_interrupt,
- TP_PROTO(struct hfi1_devdata *dd, u32 ctxt),
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt),
TP_ARGS(dd, ctxt),
TP_STRUCT__entry(DD_DEV_ENTRY(dd)
__field(u32, ctxt)
@@ -129,7 +145,8 @@ TRACE_EVENT(hfi1_receive_interrupt,
)
);
-TRACE_EVENT(hfi1_exp_tid_reg,
+DECLARE_EVENT_CLASS(
+ hfi1_exp_tid_reg_unreg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr,
u32 npages, unsigned long va, unsigned long pa,
dma_addr_t dma),
@@ -163,38 +180,45 @@ TRACE_EVENT(hfi1_exp_tid_reg,
)
);
-TRACE_EVENT(hfi1_exp_tid_unreg,
- TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
- unsigned long va, unsigned long pa, dma_addr_t dma),
- TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
- TP_STRUCT__entry(
- __field(unsigned int, ctxt)
- __field(u16, subctxt)
- __field(u32, rarr)
- __field(u32, npages)
- __field(unsigned long, va)
- __field(unsigned long, pa)
- __field(dma_addr_t, dma)
- ),
- TP_fast_assign(
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->rarr = rarr;
- __entry->npages = npages;
- __entry->va = va;
- __entry->pa = pa;
- __entry->dma = dma;
- ),
- TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
- __entry->ctxt,
- __entry->subctxt,
- __entry->rarr,
- __entry->npages,
- __entry->pa,
- __entry->va,
- __entry->dma
- )
- );
+DEFINE_EVENT(
+ hfi1_exp_tid_reg_unreg, hfi1_exp_tid_unreg,
+ TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
+ unsigned long va, unsigned long pa, dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma));
+
+DEFINE_EVENT(
+ hfi1_exp_tid_reg_unreg, hfi1_exp_tid_reg,
+ TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
+ unsigned long va, unsigned long pa, dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma));
+
+TRACE_EVENT(
+ hfi1_put_tid,
+ TP_PROTO(struct hfi1_devdata *dd,
+ u32 index, u32 type, unsigned long pa, u16 order),
+ TP_ARGS(dd, index, type, pa, order),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd)
+ __field(unsigned long, pa);
+ __field(u32, index);
+ __field(u32, type);
+ __field(u16, order);
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd);
+ __entry->pa = pa;
+ __entry->index = index;
+ __entry->type = type;
+ __entry->order = order;
+ ),
+ TP_printk("[%s] type %s pa %lx index %u order %u",
+ __get_str(dev),
+ show_tidtype(__entry->type),
+ __entry->pa,
+ __entry->index,
+ __entry->order
+ )
+);
TRACE_EVENT(hfi1_exp_tid_inval,
TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr,
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 5da1e45..76c2451 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -297,31 +297,25 @@ bail_no_tx:
void hfi1_uc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
- struct ib_header *hdr = packet->hdr;
- u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
struct ib_other_headers *ohdr = packet->ohdr;
- u32 bth0, opcode;
+ u32 opcode = packet->opcode;
u32 hdrsize = packet->hlen;
u32 psn;
- u32 pad;
+ u32 pad = packet->pad;
struct ib_wc wc;
u32 pmtu = qp->pmtu;
struct ib_reth *reth;
- int has_grh = rcv_flags & HFI1_HAS_GRH;
int ret;
- bth0 = be32_to_cpu(ohdr->bth[0]);
- if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0))
+ if (hfi1_ruc_check_hdr(ibp, packet))
return;
process_ecn(qp, packet, true);
- psn = be32_to_cpu(ohdr->bth[2]);
- opcode = ib_bth_get_opcode(ohdr);
-
+ psn = ib_bth_get_psn(ohdr);
/* Compare the PSN verses the expected PSN. */
if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) {
/*
@@ -432,8 +426,6 @@ no_immediate_data:
wc.ex.imm_data = 0;
wc.wc_flags = 0;
send_last:
- /* Get the number of bytes the message was padded by. */
- pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -527,8 +519,6 @@ rdma_first:
rdma_last_imm:
wc.wc_flags = IB_WC_WITH_IMM;
- /* Get the number of bytes the message was padded by. */
- pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 6a4e95c..6bf7a1b 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -110,10 +110,10 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
((1 << ppd->lmc) - 1));
if (unlikely(ingress_pkey_check(ppd, pkey, sc5,
qp->s_pkey_index, slid))) {
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, pkey,
- rdma_ah_get_sl(ah_attr),
- sqp->ibqp.qp_num, qp->ibqp.qp_num,
- slid, rdma_ah_get_dlid(ah_attr));
+ hfi1_bad_pkey(ibp, pkey,
+ rdma_ah_get_sl(ah_attr),
+ sqp->ibqp.qp_num, qp->ibqp.qp_num,
+ slid, rdma_ah_get_dlid(ah_attr));
goto drop;
}
}
@@ -128,18 +128,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
sqp->qkey : swqe->ud_wr.remote_qkey;
- if (unlikely(qkey != qp->qkey)) {
- u16 lid;
-
- lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
- ((1 << ppd->lmc) - 1));
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey,
- rdma_ah_get_sl(ah_attr),
- sqp->ibqp.qp_num, qp->ibqp.qp_num,
- lid,
- rdma_ah_get_dlid(ah_attr));
- goto drop;
- }
+ if (unlikely(qkey != qp->qkey))
+ goto drop; /* silently drop per IBTA spec */
}
/*
@@ -549,7 +539,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
hdr.lrh[3] = cpu_to_be16(slid);
plen = 2 /* PBC */ + hwords;
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
vl = sc_to_vlt(ppd->dd, sc5);
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
if (ctxt) {
@@ -668,36 +658,31 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
void hfi1_ud_rcv(struct hfi1_packet *packet)
{
struct ib_other_headers *ohdr = packet->ohdr;
- int opcode;
u32 hdrsize = packet->hlen;
struct ib_wc wc;
u32 qkey;
u32 src_qp;
- u16 dlid, pkey;
+ u16 pkey;
int mgmt_pkey_idx = -1;
struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct ib_header *hdr = packet->hdr;
- u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
- bool has_grh = rcv_flags & HFI1_HAS_GRH;
u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf);
u32 bth1;
- u8 sl_from_sc, sl;
- u16 slid;
- u8 extra_bytes;
+ u8 sl_from_sc;
+ u8 extra_bytes = packet->pad;
+ u8 opcode = packet->opcode;
+ u8 sl = packet->sl;
+ u32 dlid = packet->dlid;
+ u32 slid = packet->slid;
- qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
- src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
- dlid = ib_get_dlid(hdr);
bth1 = be32_to_cpu(ohdr->bth[1]);
- slid = ib_get_slid(hdr);
+ qkey = ib_get_qkey(ohdr);
+ src_qp = ib_get_sqpn(ohdr);
pkey = ib_bth_get_pkey(ohdr);
- opcode = ib_bth_get_opcode(ohdr);
- sl = ib_get_sl(hdr);
- extra_bytes = ib_bth_get_pad(ohdr);
extra_bytes += (SIZE_OF_CRC << 2);
sl_from_sc = ibp->sc_to_sl[sc5];
@@ -727,10 +712,10 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
* for invalid pkeys is optional according to
* IB spec (release 1.3, section 10.9.4)
*/
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
- pkey, sl,
- src_qp, qp->ibqp.qp_num,
- slid, dlid);
+ hfi1_bad_pkey(ibp,
+ pkey, sl,
+ src_qp, qp->ibqp.qp_num,
+ slid, dlid);
return;
}
} else {
@@ -739,12 +724,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
if (mgmt_pkey_idx < 0)
goto drop;
}
- if (unlikely(qkey != qp->qkey)) {
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, sl,
- src_qp, qp->ibqp.qp_num,
- slid, dlid);
+ if (unlikely(qkey != qp->qkey)) /* Silent drop */
return;
- }
+
/* Drop invalid MAD packets (see 13.5.3.1). */
if (unlikely(qp->ibqp.qp_num == 1 &&
(tlen > 2048 || (sc5 == 0xF))))
@@ -811,7 +793,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
qp->r_flags |= RVT_R_REUSE_SGE;
goto drop;
}
- if (has_grh) {
+ if (packet->grh) {
hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh,
sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH;
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index a8f0aa4..d9036ba 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -51,14 +51,6 @@
#include "trace.h"
#include "mmu_rb.h"
-struct tid_group {
- struct list_head list;
- u32 base;
- u8 size;
- u8 used;
- u8 map;
-};
-
struct tid_rb_node {
struct mmu_rb_node mmu;
unsigned long phys;
@@ -75,8 +67,6 @@ struct tid_pageset {
u16 count;
};
-#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
-
#define num_user_pages(vaddr, len) \
(1 + (((((unsigned long)(vaddr) + \
(unsigned long)(len) - 1) & PAGE_MASK) - \
@@ -109,96 +99,14 @@ static struct mmu_rb_ops tid_rb_ops = {
.invalidate = tid_rb_invalidate
};
-static inline u32 rcventry2tidinfo(u32 rcventry)
-{
- u32 pair = rcventry & ~0x1;
-
- return EXP_TID_SET(IDX, pair >> 1) |
- EXP_TID_SET(CTRL, 1 << (rcventry - pair));
-}
-
-static inline void exp_tid_group_init(struct exp_tid_set *set)
-{
- INIT_LIST_HEAD(&set->list);
- set->count = 0;
-}
-
-static inline void tid_group_remove(struct tid_group *grp,
- struct exp_tid_set *set)
-{
- list_del_init(&grp->list);
- set->count--;
-}
-
-static inline void tid_group_add_tail(struct tid_group *grp,
- struct exp_tid_set *set)
-{
- list_add_tail(&grp->list, &set->list);
- set->count++;
-}
-
-static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
-{
- struct tid_group *grp =
- list_first_entry(&set->list, struct tid_group, list);
- list_del_init(&grp->list);
- set->count--;
- return grp;
-}
-
-static inline void tid_group_move(struct tid_group *group,
- struct exp_tid_set *s1,
- struct exp_tid_set *s2)
-{
- tid_group_remove(group, s1);
- tid_group_add_tail(group, s2);
-}
-
-int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd)
-{
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct hfi1_devdata *dd = fd->dd;
- u32 tidbase;
- u32 i;
- struct tid_group *grp, *gptr;
-
- exp_tid_group_init(&uctxt->tid_group_list);
- exp_tid_group_init(&uctxt->tid_used_list);
- exp_tid_group_init(&uctxt->tid_full_list);
-
- tidbase = uctxt->expected_base;
- for (i = 0; i < uctxt->expected_count /
- dd->rcv_entries.group_size; i++) {
- grp = kzalloc(sizeof(*grp), GFP_KERNEL);
- if (!grp)
- goto grp_failed;
-
- grp->size = dd->rcv_entries.group_size;
- grp->base = tidbase;
- tid_group_add_tail(grp, &uctxt->tid_group_list);
- tidbase += dd->rcv_entries.group_size;
- }
-
- return 0;
-
-grp_failed:
- list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
- list) {
- list_del_init(&grp->list);
- kfree(grp);
- }
-
- return -ENOMEM;
-}
-
/*
* Initialize context and file private data needed for Expected
* receive caching. This needs to be done after the context has
* been configured with the eager/expected RcvEntry counts.
*/
-int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd)
+int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt)
{
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
int ret = 0;
@@ -266,18 +174,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd)
return ret;
}
-void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt)
-{
- struct tid_group *grp, *gptr;
-
- list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
- list) {
- list_del_init(&grp->list);
- kfree(grp);
- }
- hfi1_clear_tids(uctxt);
-}
-
void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
@@ -303,23 +199,6 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
}
/*
- * Write an "empty" RcvArray entry.
- * This function exists so the TID registaration code can use it
- * to write to unused/unneeded entries and still take advantage
- * of the WC performance improvements. The HFI will ignore this
- * write to the RcvArray entry.
- */
-static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
-{
- /*
- * Doing the WC fill writes only makes sense if the device is
- * present and the RcvArray has been mapped as WC memory.
- */
- if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc)
- writeq(0, dd->rcvarray_wc + (index * 8));
-}
-
-/*
* RcvArray entry allocation for Expected Receives is done by the
* following algorithm:
*
@@ -935,12 +814,11 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
node->npages, node->mmu.addr, node->phys,
node->dma_addr);
- hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0);
/*
* Make sure device has seen the write before we unpin the
* pages.
*/
- flush_wc();
+ hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0);
pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len,
PCI_DMA_FROMDEVICE);
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index 5250c89..6cbaa4c 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
@@ -49,30 +49,10 @@
#include "hfi.h"
-#define EXP_TID_TIDLEN_MASK 0x7FFULL
-#define EXP_TID_TIDLEN_SHIFT 0
-#define EXP_TID_TIDCTRL_MASK 0x3ULL
-#define EXP_TID_TIDCTRL_SHIFT 20
-#define EXP_TID_TIDIDX_MASK 0x3FFULL
-#define EXP_TID_TIDIDX_SHIFT 22
-#define EXP_TID_GET(tid, field) \
- (((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK)
+#include "exp_rcv.h"
-#define EXP_TID_SET(field, value) \
- (((value) & EXP_TID_TID##field##_MASK) << \
- EXP_TID_TID##field##_SHIFT)
-#define EXP_TID_CLEAR(tid, field) ({ \
- (tid) &= ~(EXP_TID_TID##field##_MASK << \
- EXP_TID_TID##field##_SHIFT); \
- })
-#define EXP_TID_RESET(tid, field, value) do { \
- EXP_TID_CLEAR(tid, field); \
- (tid) |= EXP_TID_SET(field, (value)); \
- } while (0)
-
-void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt);
-int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd);
-int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd);
+int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt);
void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd);
int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
struct hfi1_tid_info *tinfo);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index d55339f..8f02707 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -94,43 +94,13 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
/* Number of BTH.PSN bits used for sequence number in expected rcvs */
#define BTH_SEQ_MASK 0x7ffull
-/*
- * Define fields in the KDETH header so we can update the header
- * template.
- */
-#define KDETH_OFFSET_SHIFT 0
-#define KDETH_OFFSET_MASK 0x7fff
-#define KDETH_OM_SHIFT 15
-#define KDETH_OM_MASK 0x1
-#define KDETH_TID_SHIFT 16
-#define KDETH_TID_MASK 0x3ff
-#define KDETH_TIDCTRL_SHIFT 26
-#define KDETH_TIDCTRL_MASK 0x3
-#define KDETH_INTR_SHIFT 28
-#define KDETH_INTR_MASK 0x1
-#define KDETH_SH_SHIFT 29
-#define KDETH_SH_MASK 0x1
-#define KDETH_HCRC_UPPER_SHIFT 16
-#define KDETH_HCRC_UPPER_MASK 0xff
-#define KDETH_HCRC_LOWER_SHIFT 24
-#define KDETH_HCRC_LOWER_MASK 0xff
-
#define AHG_KDETH_INTR_SHIFT 12
#define AHG_KDETH_SH_SHIFT 13
+#define AHG_KDETH_ARRAY_SIZE 9
#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
-#define KDETH_GET(val, field) \
- (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK)
-#define KDETH_SET(dw, field, val) do { \
- u32 dwval = le32_to_cpu(dw); \
- dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \
- dwval |= (((val) & KDETH_##field##_MASK) << \
- KDETH_##field##_SHIFT); \
- dw = cpu_to_le32(dwval); \
- } while (0)
-
#define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \
do { \
if ((idx) < ARRAY_SIZE((arr))) \
@@ -141,23 +111,10 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
return -ERANGE; \
} while (0)
-/* KDETH OM multipliers and switch over point */
-#define KDETH_OM_SMALL 4
-#define KDETH_OM_SMALL_SHIFT 2
-#define KDETH_OM_LARGE 64
-#define KDETH_OM_LARGE_SHIFT 6
-#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
-
/* Tx request flag bits */
#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
-/* SDMA request flag bits */
-#define SDMA_REQ_FOR_THREAD 1
-#define SDMA_REQ_SEND_DONE 2
-#define SDMA_REQ_HAS_ERROR 3
-#define SDMA_REQ_DONE_ERROR 4
-
#define SDMA_PKT_Q_INACTIVE BIT(0)
#define SDMA_PKT_Q_ACTIVE BIT(1)
#define SDMA_PKT_Q_DEFERRED BIT(2)
@@ -204,25 +161,41 @@ struct evict_data {
};
struct user_sdma_request {
- struct sdma_req_info info;
- struct hfi1_user_sdma_pkt_q *pq;
- struct hfi1_user_sdma_comp_q *cq;
/* This is the original header from user space */
struct hfi1_pkt_header hdr;
+
+ /* Read mostly fields */
+ struct hfi1_user_sdma_pkt_q *pq ____cacheline_aligned_in_smp;
+ struct hfi1_user_sdma_comp_q *cq;
/*
* Pointer to the SDMA engine for this request.
* Since different request could be on different VLs,
* each request will need it's own engine pointer.
*/
struct sdma_engine *sde;
- s8 ahg_idx;
- u32 ahg[9];
+ struct sdma_req_info info;
+ /* TID array values copied from the tid_iov vector */
+ u32 *tids;
+ /* total length of the data in the request */
+ u32 data_len;
+ /* number of elements copied to the tids array */
+ u16 n_tids;
/*
- * KDETH.Offset (Eager) field
- * We need to remember the initial value so the headers
- * can be updated properly.
+ * We copy the iovs for this request (based on
+ * info.iovcnt). These are only the data vectors
*/
- u32 koffset;
+ u8 data_iovs;
+ s8 ahg_idx;
+
+ /* Writeable fields shared with interrupt */
+ u64 seqcomp ____cacheline_aligned_in_smp;
+ u64 seqsubmitted;
+ /* status of the last txreq completed */
+ int status;
+
+ /* Send side fields */
+ struct list_head txps ____cacheline_aligned_in_smp;
+ u64 seqnum;
/*
* KDETH.OFFSET (TID) field
* The offset can cover multiple packets, depending on the
@@ -230,29 +203,21 @@ struct user_sdma_request {
*/
u32 tidoffset;
/*
- * We copy the iovs for this request (based on
- * info.iovcnt). These are only the data vectors
+ * KDETH.Offset (Eager) field
+ * We need to remember the initial value so the headers
+ * can be updated properly.
*/
- unsigned data_iovs;
- /* total length of the data in the request */
- u32 data_len;
+ u32 koffset;
+ u32 sent;
+ /* TID index copied from the tid_iov vector */
+ u16 tididx;
/* progress index moving along the iovs array */
- unsigned iov_idx;
+ u8 iov_idx;
+ u8 done;
+ u8 has_error;
+
struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
- /* number of elements copied to the tids array */
- u16 n_tids;
- /* TID array values copied from the tid_iov vector */
- u32 *tids;
- u16 tididx;
- u32 sent;
- u64 seqnum;
- u64 seqcomp;
- u64 seqsubmitted;
- struct list_head txps;
- unsigned long flags;
- /* status of the last txreq completed */
- int status;
-};
+} ____cacheline_aligned_in_smp;
/*
* A single txreq could span up to 3 physical pages when the MTU
@@ -307,7 +272,8 @@ static int defer_packet_queue(
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *txreq,
- unsigned int seq);
+ uint seq,
+ bool pkts_sent);
static void activate_packet_queue(struct iowait *wait, int reason);
static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
unsigned long len);
@@ -329,7 +295,8 @@ static int defer_packet_queue(
struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *txreq,
- unsigned seq)
+ uint seq,
+ bool pkts_sent)
{
struct hfi1_user_sdma_pkt_q *pq =
container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
@@ -349,7 +316,7 @@ static int defer_packet_queue(
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
write_seqlock(&dev->iowait_lock);
if (list_empty(&pq->busy.list))
- list_add_tail(&pq->busy.list, &sde->dmawait);
+ iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
write_sequnlock(&dev->iowait_lock);
return -EBUSY;
eagain:
@@ -379,7 +346,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
struct hfi1_devdata *dd;
struct hfi1_user_sdma_comp_q *cq;
struct hfi1_user_sdma_pkt_q *pq;
- unsigned long flags;
if (!uctxt || !fd)
return -EBADF;
@@ -393,7 +359,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
if (!pq)
return -ENOMEM;
- INIT_LIST_HEAD(&pq->list);
pq->dd = dd;
pq->ctxt = uctxt->ctxt;
pq->subctxt = fd->subctxt;
@@ -454,10 +419,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
fd->pq = pq;
fd->cq = cq;
- spin_lock_irqsave(&uctxt->sdma_qlock, flags);
- list_add(&pq->list, &uctxt->sdma_queues);
- spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
-
return 0;
pq_mmu_fail:
@@ -476,11 +437,10 @@ pq_reqs_nomem:
return ret;
}
-int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
+int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt)
{
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_user_sdma_pkt_q *pq;
- unsigned long flags;
hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit,
uctxt->ctxt, fd->subctxt);
@@ -488,10 +448,6 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
if (pq) {
if (pq->handler)
hfi1_mmu_rb_unregister(pq->handler);
- spin_lock_irqsave(&uctxt->sdma_qlock, flags);
- if (!list_empty(&pq->list))
- list_del_init(&pq->list);
- spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
iowait_sdma_drain(&pq->busy);
/* Wait until all requests have been freed. */
wait_event_interruptible(
@@ -607,12 +563,20 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit,
uctxt->ctxt, fd->subctxt, info.comp_idx);
req = pq->reqs + info.comp_idx;
- memset(req, 0, sizeof(*req));
req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
+ req->data_len = 0;
req->pq = pq;
req->cq = cq;
req->status = -1;
req->ahg_idx = -1;
+ req->iov_idx = 0;
+ req->sent = 0;
+ req->seqnum = 0;
+ req->seqcomp = 0;
+ req->seqsubmitted = 0;
+ req->tids = NULL;
+ req->done = 0;
+ req->has_error = 0;
INIT_LIST_HEAD(&req->txps);
memcpy(&req->info, &info, sizeof(info));
@@ -701,12 +665,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
/* Save all the IO vector structures */
for (i = 0; i < req->data_iovs; i++) {
+ req->iovs[i].offset = 0;
INIT_LIST_HEAD(&req->iovs[i].list);
memcpy(&req->iovs[i].iov,
iovec + idx++,
sizeof(req->iovs[i].iov));
ret = pin_vector_pages(req, &req->iovs[i]);
if (ret) {
+ req->data_iovs = i;
req->status = ret;
goto free_req;
}
@@ -749,6 +715,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
}
req->tids = tmp;
req->n_tids = ntids;
+ req->tididx = 0;
idx++;
}
@@ -791,12 +758,12 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
* request have been submitted to the SDMA engine. However, it
* will not wait for send completions.
*/
- while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
+ while (req->seqsubmitted != req->info.npkts) {
ret = user_sdma_send_pkts(req, pcount);
if (ret < 0) {
if (ret != -EBUSY) {
req->status = ret;
- set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+ WRITE_ONCE(req->has_error, 1);
if (ACCESS_ONCE(req->seqcomp) ==
req->seqsubmitted - 1)
goto free_req;
@@ -898,10 +865,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
pq = req->pq;
/* If tx completion has reported an error, we are done. */
- if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
- set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+ if (READ_ONCE(req->has_error))
return -EFAULT;
- }
/*
* Check if we might have sent the entire request already
@@ -924,10 +889,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
* with errors. If so, we are not going to process any
* more packets from this request.
*/
- if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
- set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+ if (READ_ONCE(req->has_error))
return -EFAULT;
- }
tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL);
if (!tx)
@@ -1024,11 +987,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
datalen);
if (changes < 0)
goto free_tx;
- sdma_txinit_ahg(&tx->txreq,
- SDMA_TXREQ_F_USE_AHG,
- datalen, req->ahg_idx, changes,
- req->ahg, sizeof(req->hdr),
- user_sdma_txreq_cb);
}
} else {
ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
@@ -1105,7 +1063,7 @@ dosend:
ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
req->seqsubmitted += count;
if (req->seqsubmitted == req->info.npkts) {
- set_bit(SDMA_REQ_SEND_DONE, &req->flags);
+ WRITE_ONCE(req->done, 1);
/*
* The txreq has already been submitted to the HW queue
* so we can free the AHG entry now. Corruption will not
@@ -1155,14 +1113,23 @@ static int pin_vector_pages(struct user_sdma_request *req,
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct sdma_mmu_node *node = NULL;
struct mmu_rb_node *rb_node;
-
- rb_node = hfi1_mmu_rb_extract(pq->handler,
- (unsigned long)iovec->iov.iov_base,
- iovec->iov.iov_len);
- if (rb_node)
+ bool extracted;
+
+ extracted =
+ hfi1_mmu_rb_remove_unless_exact(pq->handler,
+ (unsigned long)
+ iovec->iov.iov_base,
+ iovec->iov.iov_len, &rb_node);
+ if (rb_node) {
node = container_of(rb_node, struct sdma_mmu_node, rb);
- else
- rb_node = NULL;
+ if (!extracted) {
+ atomic_inc(&node->refcount);
+ iovec->pages = node->pages;
+ iovec->npages = node->npages;
+ iovec->node = node;
+ return 0;
+ }
+ }
if (!node) {
node = kzalloc(sizeof(*node), GFP_KERNEL);
@@ -1423,21 +1390,22 @@ done:
}
static int set_txreq_header_ahg(struct user_sdma_request *req,
- struct user_sdma_txreq *tx, u32 len)
+ struct user_sdma_txreq *tx, u32 datalen)
{
+ u32 ahg[AHG_KDETH_ARRAY_SIZE];
int diff = 0;
u8 omfactor; /* KDETH.OM */
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct hfi1_pkt_header *hdr = &req->hdr;
u16 pbclen = le16_to_cpu(hdr->pbc[0]);
- u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len));
+ u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
if (PBC2LRH(pbclen) != lrhlen) {
/* PBC.PbcLengthDWs */
- AHG_HEADER_SET(req->ahg, diff, 0, 0, 12,
+ AHG_HEADER_SET(ahg, diff, 0, 0, 12,
cpu_to_le16(LRH2PBC(lrhlen)));
/* LRH.PktLen (we need the full 16 bits due to byte swap) */
- AHG_HEADER_SET(req->ahg, diff, 3, 0, 16,
+ AHG_HEADER_SET(ahg, diff, 3, 0, 16,
cpu_to_be16(lrhlen >> 2));
}
@@ -1449,13 +1417,12 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
(HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff);
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
val32 |= 1UL << 31;
- AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
- AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
+ AHG_HEADER_SET(ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
+ AHG_HEADER_SET(ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
/* KDETH.Offset */
- AHG_HEADER_SET(req->ahg, diff, 15, 0, 16,
+ AHG_HEADER_SET(ahg, diff, 15, 0, 16,
cpu_to_le16(req->koffset & 0xffff));
- AHG_HEADER_SET(req->ahg, diff, 15, 16, 16,
- cpu_to_le16(req->koffset >> 16));
+ AHG_HEADER_SET(ahg, diff, 15, 16, 16, cpu_to_le16(req->koffset >> 16));
if (req_opcode(req->info.ctrl) == EXPECTED) {
__le16 val;
@@ -1473,9 +1440,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
* we have to check again.
*/
if (++req->tididx > req->n_tids - 1 ||
- !req->tids[req->tididx]) {
+ !req->tids[req->tididx])
return -EINVAL;
- }
tidval = req->tids[req->tididx];
}
omfactor = ((EXP_TID_GET(tidval, LEN) *
@@ -1483,7 +1449,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
KDETH_OM_SMALL_SHIFT;
/* KDETH.OM and KDETH.OFFSET (TID) */
- AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
+ AHG_HEADER_SET(ahg, diff, 7, 0, 16,
((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
((req->tidoffset >> omfactor)
& 0x7fff)));
@@ -1503,12 +1469,20 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
AHG_KDETH_INTR_SHIFT));
}
- AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
+ AHG_HEADER_SET(ahg, diff, 7, 16, 14, val);
}
+ if (diff < 0)
+ return diff;
trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
req->info.comp_idx, req->sde->this_idx,
- req->ahg_idx, req->ahg, diff, tidval);
+ req->ahg_idx, ahg, diff, tidval);
+ sdma_txinit_ahg(&tx->txreq,
+ SDMA_TXREQ_F_USE_AHG,
+ datalen, req->ahg_idx, diff,
+ ahg, sizeof(req->hdr),
+ user_sdma_txreq_cb);
+
return diff;
}
@@ -1537,7 +1511,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
if (status != SDMA_TXREQ_S_OK) {
SDMA_DBG(req, "SDMA completion with error %d",
status);
- set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
+ WRITE_ONCE(req->has_error, 1);
}
req->seqcomp = tx->seqnum;
@@ -1556,8 +1530,8 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
if (status != SDMA_TXREQ_S_OK)
req->status = status;
if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
- (test_bit(SDMA_REQ_SEND_DONE, &req->flags) ||
- test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) {
+ (READ_ONCE(req->done) ||
+ READ_ONCE(req->has_error))) {
user_sdma_free_request(req, false);
pq_update(pq);
set_comp_state(pq, cq, idx, ERROR, req->status);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index e5b10ae..84c199d 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -56,8 +56,7 @@
extern uint extended_psn;
struct hfi1_user_sdma_pkt_q {
- struct list_head list;
- unsigned ctxt;
+ u16 ctxt;
u16 subctxt;
u16 n_max_reqs;
atomic_t n_reqs;
@@ -82,7 +81,8 @@ struct hfi1_user_sdma_comp_q {
int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
struct hfi1_filedata *fd);
-int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd);
+int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt);
int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
struct iovec *iovec, unsigned long dim,
unsigned long *count);
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 2d19f9b..c88c03c 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -508,13 +508,14 @@ again:
/*
* Make sure the QP is ready and able to accept the given opcode.
*/
-static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
+static inline opcode_handler qp_ok(struct hfi1_packet *packet)
{
if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
return NULL;
- if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
- (opcode == IB_OPCODE_CNP))
- return opcode_handler_tbl[opcode];
+ if (((packet->opcode & RVT_OPCODE_QP_MASK) ==
+ packet->qp->allowed_ops) ||
+ (packet->opcode == IB_OPCODE_CNP))
+ return opcode_handler_tbl[packet->opcode];
return NULL;
}
@@ -548,69 +549,34 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
return pbc;
}
-/**
- * hfi1_ib_rcv - process an incoming packet
- * @packet: data packet information
- *
- * This is called to process an incoming packet at interrupt level.
- *
- * Tlen is the length of the header + data + CRC in bytes.
- */
-void hfi1_ib_rcv(struct hfi1_packet *packet)
+static inline void hfi1_handle_packet(struct hfi1_packet *packet,
+ bool is_mcast)
{
+ u32 qp_num;
struct hfi1_ctxtdata *rcd = packet->rcd;
- struct ib_header *hdr = packet->hdr;
- u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
opcode_handler packet_handler;
unsigned long flags;
- u32 qp_num;
- int lnh;
- u8 opcode;
- u16 lid;
-
- /* Check for GRH */
- lnh = ib_get_lnh(hdr);
- if (lnh == HFI1_LRH_BTH) {
- packet->ohdr = &hdr->u.oth;
- } else if (lnh == HFI1_LRH_GRH) {
- u32 vtf;
-
- packet->ohdr = &hdr->u.l.oth;
- if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
- goto drop;
- vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
- if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
- goto drop;
- packet->rcv_flags |= HFI1_HAS_GRH;
- } else {
- goto drop;
- }
- trace_input_ibhdr(rcd->dd, hdr);
+ inc_opstats(packet->tlen, &rcd->opstats->stats[packet->opcode]);
- opcode = ib_bth_get_opcode(packet->ohdr);
- inc_opstats(tlen, &rcd->opstats->stats[opcode]);
-
- /* Get the destination QP number. */
- qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK;
- lid = ib_get_dlid(hdr);
- if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
- (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) {
+ if (unlikely(is_mcast)) {
struct rvt_mcast *mcast;
struct rvt_mcast_qp *p;
- if (lnh != HFI1_LRH_GRH)
+ if (!packet->grh)
goto drop;
- mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid);
+ mcast = rvt_mcast_find(&ibp->rvp,
+ &packet->grh->dgid,
+ packet->dlid);
if (!mcast)
goto drop;
list_for_each_entry_rcu(p, &mcast->qp_list, list) {
packet->qp = p->qp;
spin_lock_irqsave(&packet->qp->r_lock, flags);
- packet_handler = qp_ok(opcode, packet);
+ packet_handler = qp_ok(packet);
if (likely(packet_handler))
packet_handler(packet);
else
@@ -624,19 +590,21 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
if (atomic_dec_return(&mcast->refcount) <= 1)
wake_up(&mcast->wait);
} else {
+ /* Get the destination QP number. */
+ qp_num = ib_bth_get_qpn(packet->ohdr);
rcu_read_lock();
packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
if (!packet->qp) {
rcu_read_unlock();
goto drop;
}
- if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode,
+ if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode,
true))) {
rcu_read_unlock();
goto drop;
}
spin_lock_irqsave(&packet->qp->r_lock, flags);
- packet_handler = qp_ok(opcode, packet);
+ packet_handler = qp_ok(packet);
if (likely(packet_handler))
packet_handler(packet);
else
@@ -645,11 +613,29 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
rcu_read_unlock();
}
return;
-
drop:
ibp->rvp.n_pkt_drops++;
}
+/**
+ * hfi1_ib_rcv - process an incoming packet
+ * @packet: data packet information
+ *
+ * This is called to process an incoming packet at interrupt level.
+ */
+void hfi1_ib_rcv(struct hfi1_packet *packet)
+{
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ bool is_mcast = false;
+
+ if (unlikely(hfi1_check_mcast(packet->dlid)))
+ is_mcast = true;
+
+ trace_input_ibhdr(rcd->dd, packet,
+ !!(packet->rhf & RHF_DC_INFO_SMASK));
+ hfi1_handle_packet(packet, is_mcast);
+}
+
/*
* This is called from a timer to check for QPs
* which need kernel memory in order to send a packet.
@@ -863,7 +849,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
/* No vl15 here */
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
pbc = hfi1_fault_tx(qp, opcode, pbc);
@@ -878,14 +864,15 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (unlikely(ret))
goto bail_build;
}
- ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq);
+ ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq,
+ ps->pkts_sent);
if (unlikely(ret < 0)) {
if (ret == -ECOMM)
goto bail_ecomm;
return ret;
}
trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
- &ps->s_txreq->phdr.hdr);
+ &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
return ret;
bail_ecomm:
@@ -935,7 +922,8 @@ static int pio_wait(struct rvt_qp *qp,
dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN);
qp->s_flags |= flag;
was_empty = list_empty(&sc->piowait);
- list_add_tail(&priv->s_iowait.list, &sc->piowait);
+ iowait_queue(ps->pkts_sent, &priv->s_iowait,
+ &sc->piowait);
priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
rvt_get_qp(qp);
@@ -999,7 +987,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u8 opcode = get_opcode(&tx->phdr.hdr);
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
pbc = hfi1_fault_tx(qp, opcode, pbc);
pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
@@ -1058,7 +1046,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
}
trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
- &ps->s_txreq->phdr.hdr);
+ &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
pio_bail:
if (qp->s_wqe) {
@@ -1368,7 +1356,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
props->lmc = ppd->lmc;
/* OPA logical states match IB logical states */
props->state = driver_lstate(ppd);
- props->phys_state = hfi1_ibphys_portstate(ppd);
+ props->phys_state = driver_pstate(ppd);
props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
/* see rate_show() in ib core/sysfs.c */
@@ -1547,13 +1535,22 @@ static void init_ibport(struct hfi1_pportdata *ppd)
ibp->sc_to_sl[i] = i;
}
+ for (i = 0; i < RVT_MAX_TRAP_LISTS ; i++)
+ INIT_LIST_HEAD(&ibp->rvp.trap_lists[i].list);
+ setup_timer(&ibp->rvp.trap_timer, hfi1_handle_trap_timer,
+ (unsigned long)ibp);
+
spin_lock_init(&ibp->rvp.lock);
/* Set the prefix to the default value (see ch. 4.1.1) */
ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
ibp->rvp.sm_lid = 0;
- /* Below should only set bits defined in OPA PortInfo.CapabilityMask */
+ /*
+ * Below should only set bits defined in OPA PortInfo.CapabilityMask
+ * and PortInfo.CapabilityMask3
+ */
ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP |
IB_PORT_CAP_MASK_NOTICE_SUP;
+ ibp->rvp.port_cap3_flags = OPA_CAP_MASK3_IsSharedSpaceSupported;
ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
@@ -1564,14 +1561,13 @@ static void init_ibport(struct hfi1_pportdata *ppd)
RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
}
-static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str,
- size_t str_len)
+static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str)
{
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
struct hfi1_ibdev *dev = dev_from_rdi(rdi);
u32 ver = dd_from_dev(dev)->dc8051_ver;
- snprintf(str, str_len, "%u.%u.%u", dc8051_ver_maj(ver),
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%u", dc8051_ver_maj(ver),
dc8051_ver_min(ver), dc8051_ver_patch(ver));
}
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index cd635d0..34267c7 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -143,6 +143,7 @@ struct hfi1_pkt_state {
unsigned long timeout_int;
int cpu;
bool in_thread;
+ bool pkts_sent;
};
#define HFI1_PSN_CREDIT 16
@@ -236,8 +237,8 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
/*
* This must be called with s_lock held.
*/
-void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
- u32 qp1, u32 qp2, u16 lid1, u16 lid2);
+void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl,
+ u32 qp1, u32 qp2, u16 lid1, u16 lid2);
void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
void hfi1_sys_guid_chg(struct hfi1_ibport *ibp);
void hfi1_node_desc_chg(struct hfi1_ibport *ibp);
@@ -307,8 +308,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet);
void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
- struct ib_header *hdr,
- u32 rcv_flags,
+ struct hfi1_packet *packet,
struct rvt_qp *qp);
u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
@@ -346,8 +346,7 @@ static inline u8 get_opcode(struct ib_header *h)
return be32_to_cpu(h->u.l.oth.bth[0]) >> 24;
}
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
- int has_grh, struct rvt_qp *qp, u32 bth0);
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet);
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
const struct ib_global_route *grh, u32 hwords, u32 nwords);
diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h
index 4a621cd..eec7c14 100644
--- a/drivers/infiniband/hw/hfi1/vnic.h
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -103,6 +103,7 @@ struct hfi1_vnic_sdma {
struct sdma_txreq stx;
unsigned int state;
u8 q_idx;
+ bool pkts_sent;
};
/**
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index 339f0cd..89b0564 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -95,7 +95,7 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
- hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
+ hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
uctxt->is_vnic = true;
done:
@@ -106,7 +106,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
struct hfi1_ctxtdata **vnic_ctxt)
{
struct hfi1_ctxtdata *uctxt;
- unsigned int ctxt;
+ u16 ctxt;
int ret;
if (dd->flags & HFI1_FROZEN)
@@ -156,11 +156,11 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
return ret;
bail:
/*
- * hfi1_free_ctxtdata() also releases send_context
- * structure if uctxt->sc is not null
+ * hfi1_rcd_put() will call hfi1_free_ctxtdata(), which will
+ * release send_context structure if uctxt->sc is not null
*/
dd->rcd[uctxt->ctxt] = NULL;
- hfi1_free_ctxtdata(dd, uctxt);
+ hfi1_rcd_put(uctxt);
dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret);
return ret;
}
@@ -186,7 +186,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
HFI1_RCVCTRL_INTRAVAIL_DIS |
HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
- HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
+ HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
/*
* VNIC contexts are allocated from user context pool.
* Release them back to user context pool.
@@ -208,7 +208,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
hfi1_clear_ctxt_pkey(dd, uctxt);
hfi1_stats.sps_ctxts--;
- hfi1_free_ctxtdata(dd, uctxt);
+ hfi1_rcd_put(uctxt);
}
void hfi1_vnic_setup(struct hfi1_devdata *dd)
@@ -751,6 +751,7 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
if (rc)
break;
+ hfi1_rcd_get(dd->vnic.ctxt[i]);
dd->vnic.ctxt[i]->vnic_q_idx = i;
}
@@ -762,6 +763,7 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
*/
while (i-- > dd->vnic.num_ctxt) {
deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+ hfi1_rcd_put(dd->vnic.ctxt[i]);
dd->vnic.ctxt[i] = NULL;
}
goto alloc_fail;
@@ -791,6 +793,7 @@ static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
if (--dd->vnic.num_vports == 0) {
for (i = 0; i < dd->vnic.num_ctxt; i++) {
deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+ hfi1_rcd_put(dd->vnic.ctxt[i]);
dd->vnic.ctxt[i] = NULL;
}
hfi1_deinit_vnic_rsm(dd);
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index 51a817d..7815d74 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -198,11 +198,16 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
goto free_desc;
tx->retry_count = 0;
- ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq);
+ ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq,
+ vnic_sdma->pkts_sent);
/* When -ECOMM, sdma callback will be called with ABORT status */
if (unlikely(ret && unlikely(ret != -ECOMM)))
goto free_desc;
+ if (!ret) {
+ vnic_sdma->pkts_sent = true;
+ iowait_starve_clear(vnic_sdma->pkts_sent, &vnic_sdma->wait);
+ }
return ret;
free_desc:
@@ -211,6 +216,8 @@ free_desc:
tx_err:
if (ret != -EBUSY)
dev_kfree_skb_any(skb);
+ else
+ vnic_sdma->pkts_sent = false;
return ret;
}
@@ -225,7 +232,8 @@ tx_err:
static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
struct iowait *wait,
struct sdma_txreq *txreq,
- unsigned int seq)
+ uint seq,
+ bool pkts_sent)
{
struct hfi1_vnic_sdma *vnic_sdma =
container_of(wait, struct hfi1_vnic_sdma, wait);
@@ -239,7 +247,7 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
write_seqlock(&dev->iowait_lock);
if (list_empty(&vnic_sdma->wait.list))
- list_add_tail(&vnic_sdma->wait.list, &sde->dmawait);
+ iowait_queue(pkts_sent, wait, &sde->dmawait);
write_sequnlock(&dev->iowait_lock);
return -EBUSY;
}
OpenPOWER on IntegriCloud