summaryrefslogtreecommitdiffstats
path: root/sys/dev
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev')
-rw-r--r--sys/dev/ahci/ahci.c4
-rw-r--r--sys/dev/ahci/ahci_pci.c2
-rw-r--r--sys/dev/e1000/e1000_api.c4
-rw-r--r--sys/dev/e1000/e1000_hw.h8
-rw-r--r--sys/dev/e1000/e1000_ich8lan.c17
-rw-r--r--sys/dev/e1000/e1000_ich8lan.h2
-rw-r--r--sys/dev/e1000/e1000_phy.c22
-rw-r--r--sys/dev/e1000/if_em.c6
-rw-r--r--sys/dev/filemon/filemon.c2
-rw-r--r--sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c155
-rw-r--r--sys/dev/hyperv/storvsc/hv_vstorage.h6
-rw-r--r--sys/dev/ntb/if_ntb/if_ntb.c1822
-rw-r--r--sys/dev/ntb/ntb.c462
-rw-r--r--sys/dev/ntb/ntb.h409
-rw-r--r--sys/dev/ntb/ntb_hw/ntb_hw.c1356
-rw-r--r--sys/dev/ntb/ntb_hw/ntb_hw.h125
-rw-r--r--sys/dev/ntb/ntb_hw/ntb_regs.h3
-rw-r--r--sys/dev/ntb/ntb_if.m210
-rw-r--r--sys/dev/ntb/ntb_transport.c1518
-rw-r--r--sys/dev/ntb/ntb_transport.h61
-rw-r--r--sys/dev/pci/pci_pci.c49
21 files changed, 3673 insertions, 2570 deletions
diff --git a/sys/dev/ahci/ahci.c b/sys/dev/ahci/ahci.c
index ec42b3c..ee0e957 100644
--- a/sys/dev/ahci/ahci.c
+++ b/sys/dev/ahci/ahci.c
@@ -416,7 +416,8 @@ ahci_setup_interrupt(device_t dev)
else if (ctlr->numirqs == 1 || i >= ctlr->channels ||
(ctlr->ccc && i == ctlr->cccv))
ctlr->irqs[i].mode = AHCI_IRQ_MODE_ALL;
- else if (i == ctlr->numirqs - 1)
+ else if (ctlr->channels > ctlr->numirqs &&
+ i == ctlr->numirqs - 1)
ctlr->irqs[i].mode = AHCI_IRQ_MODE_AFTER;
else
ctlr->irqs[i].mode = AHCI_IRQ_MODE_ONE;
@@ -465,6 +466,7 @@ ahci_intr(void *data)
} else { /* AHCI_IRQ_MODE_AFTER */
unit = irq->r_irq_rid - 1;
is = ATA_INL(ctlr->r_mem, AHCI_IS);
+ is &= (0xffffffff << unit);
}
/* CCC interrupt is edge triggered. */
if (ctlr->ccc)
diff --git a/sys/dev/ahci/ahci_pci.c b/sys/dev/ahci/ahci_pci.c
index 6b6a1a6..4a44484 100644
--- a/sys/dev/ahci/ahci_pci.c
+++ b/sys/dev/ahci/ahci_pci.c
@@ -187,7 +187,7 @@ static const struct {
{0xa10f8086, 0x00, "Intel Sunrise Point (RAID)", 0},
{0x23238086, 0x00, "Intel DH89xxCC", 0},
{0x2360197b, 0x00, "JMicron JMB360", 0},
- {0x2361197b, 0x00, "JMicron JMB361", AHCI_Q_NOFORCE},
+ {0x2361197b, 0x00, "JMicron JMB361", AHCI_Q_NOFORCE | AHCI_Q_1CH},
{0x2362197b, 0x00, "JMicron JMB362", 0},
{0x2363197b, 0x00, "JMicron JMB363", AHCI_Q_NOFORCE},
{0x2365197b, 0x00, "JMicron JMB365", AHCI_Q_NOFORCE},
diff --git a/sys/dev/e1000/e1000_api.c b/sys/dev/e1000/e1000_api.c
index 28379cc..52e2609 100644
--- a/sys/dev/e1000/e1000_api.c
+++ b/sys/dev/e1000/e1000_api.c
@@ -304,6 +304,10 @@ s32 e1000_set_mac_type(struct e1000_hw *hw)
case E1000_DEV_ID_PCH_SPT_I219_LM2:
case E1000_DEV_ID_PCH_SPT_I219_V2:
case E1000_DEV_ID_PCH_LBG_I219_LM3:
+ case E1000_DEV_ID_PCH_SPT_I219_LM4:
+ case E1000_DEV_ID_PCH_SPT_I219_V4:
+ case E1000_DEV_ID_PCH_SPT_I219_LM5:
+ case E1000_DEV_ID_PCH_SPT_I219_V5:
mac->type = e1000_pch_spt;
break;
case E1000_DEV_ID_82575EB_COPPER:
diff --git a/sys/dev/e1000/e1000_hw.h b/sys/dev/e1000/e1000_hw.h
index 1792e14..e1464a7 100644
--- a/sys/dev/e1000/e1000_hw.h
+++ b/sys/dev/e1000/e1000_hw.h
@@ -142,6 +142,10 @@ struct e1000_hw;
#define E1000_DEV_ID_PCH_SPT_I219_LM2 0x15B7 /* Sunrise Point-H PCH */
#define E1000_DEV_ID_PCH_SPT_I219_V2 0x15B8 /* Sunrise Point-H PCH */
#define E1000_DEV_ID_PCH_LBG_I219_LM3 0x15B9 /* LEWISBURG PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_LM4 0x15D7
+#define E1000_DEV_ID_PCH_SPT_I219_V4 0x15D8
+#define E1000_DEV_ID_PCH_SPT_I219_LM5 0x15E3
+#define E1000_DEV_ID_PCH_SPT_I219_V5 0x15D6
#define E1000_DEV_ID_82576 0x10C9
#define E1000_DEV_ID_82576_FIBER 0x10E6
#define E1000_DEV_ID_82576_SERDES 0x10E7
@@ -957,9 +961,13 @@ struct e1000_dev_spec_ich8lan {
E1000_MUTEX nvm_mutex;
E1000_MUTEX swflag_mutex;
bool nvm_k1_enabled;
+ bool disable_k1_off;
bool eee_disable;
u16 eee_lp_ability;
enum e1000_ulp_state ulp_state;
+ bool ulp_capability_disabled;
+ bool during_suspend_flow;
+ bool during_dpg_exit;
};
struct e1000_dev_spec_82575 {
diff --git a/sys/dev/e1000/e1000_ich8lan.c b/sys/dev/e1000/e1000_ich8lan.c
index ae97a8c..4c50ce2 100644
--- a/sys/dev/e1000/e1000_ich8lan.c
+++ b/sys/dev/e1000/e1000_ich8lan.c
@@ -288,7 +288,7 @@ static void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw)
mac_reg &= ~E1000_CTRL_LANPHYPC_VALUE;
E1000_WRITE_REG(hw, E1000_CTRL, mac_reg);
E1000_WRITE_FLUSH(hw);
- usec_delay(10);
+ msec_delay(1);
mac_reg &= ~E1000_CTRL_LANPHYPC_OVERRIDE;
E1000_WRITE_REG(hw, E1000_CTRL, mac_reg);
E1000_WRITE_FLUSH(hw);
@@ -1625,7 +1625,17 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
hw->phy.ops.write_reg_locked(hw,
I217_PLL_CLOCK_GATE_REG,
phy_reg);
- }
+
+ if (speed == SPEED_1000) {
+ hw->phy.ops.read_reg_locked(hw, HV_PM_CTRL,
+ &phy_reg);
+
+ phy_reg |= HV_PM_CTRL_K1_CLK_REQ;
+
+ hw->phy.ops.write_reg_locked(hw, HV_PM_CTRL,
+ phy_reg);
+ }
+ }
hw->phy.ops.release(hw);
if (ret_val)
@@ -1718,7 +1728,8 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
u32 pcieanacfg = E1000_READ_REG(hw, E1000_PCIEANACFG);
u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6);
- if (pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE)
+ if ((pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE) &&
+ (hw->dev_spec.ich8lan.disable_k1_off == FALSE))
fextnvm6 |= E1000_FEXTNVM6_K1_OFF_ENABLE;
else
fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE;
diff --git a/sys/dev/e1000/e1000_ich8lan.h b/sys/dev/e1000/e1000_ich8lan.h
index edc1dd1..6d81291 100644
--- a/sys/dev/e1000/e1000_ich8lan.h
+++ b/sys/dev/e1000/e1000_ich8lan.h
@@ -239,7 +239,7 @@
/* PHY Power Management Control */
#define HV_PM_CTRL PHY_REG(770, 17)
-#define HV_PM_CTRL_PLL_STOP_IN_K1_GIGA 0x100
+#define HV_PM_CTRL_K1_CLK_REQ 0x200
#define HV_PM_CTRL_K1_ENABLE 0x4000
#define I217_PLL_CLOCK_GATE_REG PHY_REG(772, 28)
diff --git a/sys/dev/e1000/e1000_phy.c b/sys/dev/e1000/e1000_phy.c
index 847d315..9684b43 100644
--- a/sys/dev/e1000/e1000_phy.c
+++ b/sys/dev/e1000/e1000_phy.c
@@ -4146,12 +4146,13 @@ s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data)
*data = E1000_READ_REG(hw, E1000_MPHY_DATA);
/* Disable access to mPHY if it was originally disabled */
- if (locked) {
+ if (locked)
ready = e1000_is_mphy_ready(hw);
- if (!ready)
- return -E1000_ERR_PHY;
- }
- E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, E1000_MPHY_DIS_ACCESS);
+ if (!ready)
+ return -E1000_ERR_PHY;
+ E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
+ E1000_MPHY_DIS_ACCESS);
+
return E1000_SUCCESS;
}
@@ -4210,12 +4211,13 @@ s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data,
E1000_WRITE_REG(hw, E1000_MPHY_DATA, data);
/* Disable access to mPHY if it was originally disabled */
- if (locked) {
+ if (locked)
ready = e1000_is_mphy_ready(hw);
- if (!ready)
- return -E1000_ERR_PHY;
- }
- E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, E1000_MPHY_DIS_ACCESS);
+ if (!ready)
+ return -E1000_ERR_PHY;
+ E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
+ E1000_MPHY_DIS_ACCESS);
+
return E1000_SUCCESS;
}
diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c
index b8e9750..7e2690e 100644
--- a/sys/dev/e1000/if_em.c
+++ b/sys/dev/e1000/if_em.c
@@ -193,6 +193,12 @@ static em_vendor_info_t em_vendor_info_array[] =
{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
+ PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
+ PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
/* required last entry */
{ 0, 0, 0, 0, 0}
};
diff --git a/sys/dev/filemon/filemon.c b/sys/dev/filemon/filemon.c
index 919af9d..26e1bc3 100644
--- a/sys/dev/filemon/filemon.c
+++ b/sys/dev/filemon/filemon.c
@@ -137,6 +137,8 @@ filemon_proc_get(struct proc *p)
{
struct filemon *filemon;
+ if (p->p_filemon == NULL)
+ return (NULL);
PROC_LOCK(p);
filemon = filemon_acquire(p->p_filemon);
PROC_UNLOCK(p);
diff --git a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
index 8f3efef..b79e10c 100644
--- a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
+++ b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
@@ -810,6 +810,7 @@ hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
* because the fields will be used later in storvsc_io_done().
*/
request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
+ request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status;
request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
@@ -1945,28 +1946,6 @@ create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
return(0);
}
-/*
- * SCSI Inquiry checks qualifier and type.
- * If qualifier is 011b, means the device server is not capable
- * of supporting a peripheral device on this logical unit, and
- * the type should be set to 1Fh.
- *
- * Return 1 if it is valid, 0 otherwise.
- */
-static inline int
-is_inquiry_valid(const struct scsi_inquiry_data *inq_data)
-{
- uint8_t type;
- if (SID_QUAL(inq_data) != SID_QUAL_LU_CONNECTED) {
- return (0);
- }
- type = SID_TYPE(inq_data);
- if (type == T_NODEVICE) {
- return (0);
- }
- return (1);
-}
-
/**
* @brief completion function before returning to CAM
*
@@ -1985,7 +1964,6 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
bus_dma_segment_t *ori_sglist = NULL;
int ori_sg_count = 0;
-
/* destroy bounce buffer if it is used */
if (reqp->bounce_sgl_count) {
ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
@@ -2040,88 +2018,71 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
ccb->ccb_h.status &= ~CAM_STATUS_MASK;
if (vm_srb->scsi_status == SCSI_STATUS_OK) {
const struct scsi_generic *cmd;
- /*
- * Check whether the data for INQUIRY cmd is valid or
- * not. Windows 10 and Windows 2016 send all zero
- * inquiry data to VM even for unpopulated slots.
- */
+
+ if (vm_srb->srb_status != SRB_STATUS_SUCCESS) {
+ if (vm_srb->srb_status == SRB_STATUS_INVALID_LUN) {
+ xpt_print(ccb->ccb_h.path, "invalid LUN %d\n",
+ vm_srb->lun);
+ } else {
+ xpt_print(ccb->ccb_h.path, "Unknown SRB flag: %d\n",
+ vm_srb->srb_status);
+ }
+ /*
+ * If there are errors, for example, invalid LUN,
+ * host will inform VM through SRB status.
+ */
+ ccb->ccb_h.status |= CAM_SEL_TIMEOUT;
+ } else {
+ ccb->ccb_h.status |= CAM_REQ_CMP;
+ }
+
cmd = (const struct scsi_generic *)
((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
if (cmd->opcode == INQUIRY) {
- /*
- * The host of Windows 10 or 2016 server will response
- * the inquiry request with invalid data for unexisted device:
- [0x7f 0x0 0x5 0x2 0x1f ... ]
- * But on windows 2012 R2, the response is:
- [0x7f 0x0 0x0 0x0 0x0 ]
- * That is why here wants to validate the inquiry response.
- * The validation will skip the INQUIRY whose response is short,
- * which is less than SHORT_INQUIRY_LENGTH (36).
- *
- * For more information about INQUIRY, please refer to:
- * ftp://ftp.avc-pioneer.com/Mtfuji_7/Proposal/Jun09/INQUIRY.pdf
- */
- struct scsi_inquiry_data *inq_data =
- (struct scsi_inquiry_data *)csio->data_ptr;
- uint8_t* resp_buf = (uint8_t*)csio->data_ptr;
- /* Get the buffer length reported by host */
- int resp_xfer_len = vm_srb->transfer_len;
- /* Get the available buffer length */
- int resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
- int data_len = (resp_buf_len < resp_xfer_len) ? resp_buf_len : resp_xfer_len;
- if (data_len < SHORT_INQUIRY_LENGTH) {
- ccb->ccb_h.status |= CAM_REQ_CMP;
- if (bootverbose && data_len >= 5) {
- mtx_lock(&sc->hs_lock);
- xpt_print(ccb->ccb_h.path,
- "storvsc skips the validation for short inquiry (%d)"
- " [%x %x %x %x %x]\n",
- data_len,resp_buf[0],resp_buf[1],resp_buf[2],
- resp_buf[3],resp_buf[4]);
- mtx_unlock(&sc->hs_lock);
- }
- } else if (is_inquiry_valid(inq_data) == 0) {
- ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
+ struct scsi_inquiry_data *inq_data =
+ (struct scsi_inquiry_data *)csio->data_ptr;
+ uint8_t *resp_buf = (uint8_t *)csio->data_ptr;
+ int resp_xfer_len, resp_buf_len, data_len;
+
+ /* Get the buffer length reported by host */
+ resp_xfer_len = vm_srb->transfer_len;
+ /* Get the available buffer length */
+ resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
+ data_len = (resp_buf_len < resp_xfer_len) ?
+ resp_buf_len : resp_xfer_len;
+
if (bootverbose && data_len >= 5) {
- mtx_lock(&sc->hs_lock);
- xpt_print(ccb->ccb_h.path,
- "storvsc uninstalled invalid device"
- " [%x %x %x %x %x]\n",
- resp_buf[0],resp_buf[1],resp_buf[2],resp_buf[3],resp_buf[4]);
- mtx_unlock(&sc->hs_lock);
+ xpt_print(ccb->ccb_h.path, "storvsc inquiry "
+ "(%d) [%x %x %x %x %x ... ]\n", data_len,
+ resp_buf[0], resp_buf[1], resp_buf[2],
+ resp_buf[3], resp_buf[4]);
}
- } else {
- char vendor[16];
- cam_strvis(vendor, inq_data->vendor, sizeof(inq_data->vendor),
- sizeof(vendor));
- /**
- * XXX: upgrade SPC2 to SPC3 if host is WIN8 or WIN2012 R2
- * in order to support UNMAP feature
- */
- if (!strncmp(vendor,"Msft",4) &&
- SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
- (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
- vmstor_proto_version== VMSTOR_PROTOCOL_VERSION_WIN8)) {
- inq_data->version = SCSI_REV_SPC3;
- if (bootverbose) {
- mtx_lock(&sc->hs_lock);
- xpt_print(ccb->ccb_h.path,
- "storvsc upgrades SPC2 to SPC3\n");
- mtx_unlock(&sc->hs_lock);
+ if (vm_srb->srb_status == SRB_STATUS_SUCCESS &&
+ data_len > SHORT_INQUIRY_LENGTH) {
+ char vendor[16];
+
+ cam_strvis(vendor, inq_data->vendor,
+ sizeof(inq_data->vendor), sizeof(vendor));
+
+ /*
+ * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or
+ * WIN2012 R2 in order to support UNMAP feature.
+ */
+ if (!strncmp(vendor, "Msft", 4) &&
+ SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
+ (vmstor_proto_version ==
+ VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
+ vmstor_proto_version ==
+ VMSTOR_PROTOCOL_VERSION_WIN8)) {
+ inq_data->version = SCSI_REV_SPC3;
+ if (bootverbose) {
+ xpt_print(ccb->ccb_h.path,
+ "storvsc upgrades "
+ "SPC2 to SPC3\n");
+ }
}
}
- ccb->ccb_h.status |= CAM_REQ_CMP;
- if (bootverbose) {
- mtx_lock(&sc->hs_lock);
- xpt_print(ccb->ccb_h.path,
- "storvsc has passed inquiry response (%d) validation\n",
- data_len);
- mtx_unlock(&sc->hs_lock);
- }
- }
- } else {
- ccb->ccb_h.status |= CAM_REQ_CMP;
}
} else {
mtx_lock(&sc->hs_lock);
diff --git a/sys/dev/hyperv/storvsc/hv_vstorage.h b/sys/dev/hyperv/storvsc/hv_vstorage.h
index f2b9480..9205e35 100644
--- a/sys/dev/hyperv/storvsc/hv_vstorage.h
+++ b/sys/dev/hyperv/storvsc/hv_vstorage.h
@@ -249,9 +249,9 @@ struct vstor_packet {
/**
* SRB Status Masks (can be combined with above status codes)
*/
-#define SRB_STATUS_QUEUE_FROZEN 0x40
-#define SRB_STATUS_AUTOSENSE_VALID 0x80
-
+#define SRB_STATUS_QUEUE_FROZEN 0x40
+#define SRB_STATUS_AUTOSENSE_VALID 0x80
+#define SRB_STATUS_INVALID_LUN 0X20
/**
* Packet flags
diff --git a/sys/dev/ntb/if_ntb/if_ntb.c b/sys/dev/ntb/if_ntb/if_ntb.c
index 7b659f7..c67ae0d 100644
--- a/sys/dev/ntb/if_ntb/if_ntb.c
+++ b/sys/dev/ntb/if_ntb/if_ntb.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
* Copyright (C) 2013 Intel Corporation
* Copyright (C) 2015 EMC Corporation
* All rights reserved.
@@ -25,21 +26,27 @@
* SUCH DAMAGE.
*/
+/*
+ * The Non-Transparent Bridge (NTB) is a device that allows you to connect
+ * two or more systems using a PCI-e links, providing remote memory access.
+ *
+ * This module contains a driver for simulated Ethernet device, using
+ * underlying NTB Transport device.
+ *
+ * NOTE: Much of the code in this module is shared with Linux. Any patches may
+ * be picked up and redistributed in Linux with a dual GPL/BSD license.
+ */
+
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
-#include <sys/bitset.h>
+#include <sys/buf_ring.h>
#include <sys/bus.h>
-#include <sys/ktr.h>
#include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
#include <sys/module.h>
-#include <sys/mutex.h>
-#include <sys/queue.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
@@ -48,421 +55,164 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/if_media.h>
#include <net/if_types.h>
+#include <net/if_media.h>
#include <net/if_var.h>
#include <net/bpf.h>
#include <net/ethernet.h>
-#include <vm/vm.h>
-#include <vm/pmap.h>
-
#include <machine/bus.h>
-#include <machine/cpufunc.h>
-
-#include <netinet/in.h>
-#include <netinet/ip.h>
-
-#include "../ntb_hw/ntb_hw.h"
-
-/*
- * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
- * allows you to connect two systems using a PCI-e link.
- *
- * This module contains a protocol for sending and receiving messages, and
- * exposes that protocol through a simulated ethernet device called ntb.
- *
- * NOTE: Much of the code in this module is shared with Linux. Any patches may
- * be picked up and redistributed in Linux with a dual GPL/BSD license.
- */
-#define QP_SETSIZE 64
-BITSET_DEFINE(_qpset, QP_SETSIZE);
-#define test_bit(pos, addr) BIT_ISSET(QP_SETSIZE, (pos), (addr))
-#define set_bit(pos, addr) BIT_SET(QP_SETSIZE, (pos), (addr))
-#define clear_bit(pos, addr) BIT_CLR(QP_SETSIZE, (pos), (addr))
-#define ffs_bit(addr) BIT_FFS(QP_SETSIZE, (addr))
+#include "../ntb_transport.h"
#define KTR_NTB KTR_SPARE3
+#define NTB_MEDIATYPE (IFM_ETHER | IFM_AUTO | IFM_FDX)
-#define NTB_TRANSPORT_VERSION 4
-#define NTB_RX_MAX_PKTS 64
-#define NTB_RXQ_SIZE 300
-
-enum ntb_link_event {
- NTB_LINK_DOWN = 0,
- NTB_LINK_UP,
-};
+#define NTB_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
+#define NTB_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
+#define NTB_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \
+ CSUM_PSEUDO_HDR | \
+ CSUM_IP_CHECKED | CSUM_IP_VALID | \
+ CSUM_SCTP_VALID)
static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW, 0, "if_ntb");
-static unsigned g_if_ntb_debug_level;
-SYSCTL_UINT(_hw_if_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
- &g_if_ntb_debug_level, 0, "if_ntb log level -- higher is more verbose");
-#define ntb_printf(lvl, ...) do { \
- if ((lvl) <= g_if_ntb_debug_level) { \
- if_printf(nt->ifp, __VA_ARGS__); \
- } \
-} while (0)
-
-static unsigned transport_mtu = IP_MAXPACKET + ETHER_HDR_LEN + ETHER_CRC_LEN;
-
-static uint64_t max_mw_size;
-SYSCTL_UQUAD(_hw_if_ntb, OID_AUTO, max_mw_size, CTLFLAG_RDTUN, &max_mw_size, 0,
- "If enabled (non-zero), limit the size of large memory windows. "
- "Both sides of the NTB MUST set the same value here.");
-
-static unsigned max_num_clients;
-SYSCTL_UINT(_hw_if_ntb, OID_AUTO, max_num_clients, CTLFLAG_RDTUN,
- &max_num_clients, 0, "Maximum number of NTB transport clients. "
- "0 (default) - use all available NTB memory windows; "
- "positive integer N - Limit to N memory windows.");
-
-static unsigned enable_xeon_watchdog;
-SYSCTL_UINT(_hw_if_ntb, OID_AUTO, enable_xeon_watchdog, CTLFLAG_RDTUN,
- &enable_xeon_watchdog, 0, "If non-zero, write a register every second to "
- "keep a watchdog from tearing down the NTB link");
-
-STAILQ_HEAD(ntb_queue_list, ntb_queue_entry);
-
-typedef uint32_t ntb_q_idx_t;
-
-struct ntb_queue_entry {
- /* ntb_queue list reference */
- STAILQ_ENTRY(ntb_queue_entry) entry;
-
- /* info on data to be transferred */
- void *cb_data;
- void *buf;
- uint32_t len;
- uint32_t flags;
-
- struct ntb_transport_qp *qp;
- struct ntb_payload_header *x_hdr;
- ntb_q_idx_t index;
-};
-
-struct ntb_rx_info {
- ntb_q_idx_t entry;
-};
-
-struct ntb_transport_qp {
- struct ntb_transport_ctx *transport;
- struct ntb_softc *ntb;
-
- void *cb_data;
-
- bool client_ready;
- volatile bool link_is_up;
- uint8_t qp_num; /* Only 64 QPs are allowed. 0-63 */
-
- struct ntb_rx_info *rx_info;
- struct ntb_rx_info *remote_rx_info;
-
- void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
- void *data, int len);
- struct ntb_queue_list tx_free_q;
- struct mtx ntb_tx_free_q_lock;
- caddr_t tx_mw;
- bus_addr_t tx_mw_phys;
- ntb_q_idx_t tx_index;
- ntb_q_idx_t tx_max_entry;
- uint64_t tx_max_frame;
-
- void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
- void *data, int len);
- struct ntb_queue_list rx_post_q;
- struct ntb_queue_list rx_pend_q;
- /* ntb_rx_q_lock: synchronize access to rx_XXXX_q */
- struct mtx ntb_rx_q_lock;
- struct task rx_completion_task;
- struct task rxc_db_work;
- caddr_t rx_buff;
- ntb_q_idx_t rx_index;
- ntb_q_idx_t rx_max_entry;
- uint64_t rx_max_frame;
-
- void (*event_handler)(void *data, enum ntb_link_event status);
- struct callout link_work;
- struct callout queue_full;
- struct callout rx_full;
-
- uint64_t last_rx_no_buf;
+static unsigned g_if_ntb_num_queues = UINT_MAX;
+SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN,
+ &g_if_ntb_num_queues, 0, "Number of queues per interface");
- /* Stats */
- uint64_t rx_bytes;
- uint64_t rx_pkts;
- uint64_t rx_ring_empty;
- uint64_t rx_err_no_buf;
- uint64_t rx_err_oflow;
- uint64_t rx_err_ver;
- uint64_t tx_bytes;
- uint64_t tx_pkts;
- uint64_t tx_ring_full;
- uint64_t tx_err_no_buf;
-};
-
-struct ntb_queue_handlers {
- void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
- void *data, int len);
- void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
- void *data, int len);
- void (*event_handler)(void *data, enum ntb_link_event status);
-};
-
-struct ntb_transport_mw {
- vm_paddr_t phys_addr;
- size_t phys_size;
- size_t xlat_align;
- size_t xlat_align_size;
- bus_addr_t addr_limit;
- /* Tx buff is off vbase / phys_addr */
- caddr_t vbase;
- size_t xlat_size;
- size_t buff_size;
- /* Rx buff is off virt_addr / dma_addr */
- caddr_t virt_addr;
- bus_addr_t dma_addr;
-};
-
-struct ntb_transport_ctx {
- struct ntb_softc *ntb;
- struct ifnet *ifp;
- struct ntb_transport_mw mw_vec[NTB_MAX_NUM_MW];
- struct ntb_transport_qp *qp_vec;
- struct _qpset qp_bitmap;
- struct _qpset qp_bitmap_free;
- unsigned mw_count;
- unsigned qp_count;
- volatile bool link_is_up;
- struct callout link_work;
- struct callout link_watchdog;
- struct task link_cleanup;
- uint64_t bufsize;
- u_char eaddr[ETHER_ADDR_LEN];
- struct mtx tx_lock;
- struct mtx rx_lock;
-
- /* The hardcoded single queuepair in ntb_setup_interface() */
+struct ntb_net_queue {
+ struct ntb_net_ctx *sc;
+ if_t ifp;
struct ntb_transport_qp *qp;
+ struct buf_ring *br;
+ struct task tx_task;
+ struct taskqueue *tx_tq;
+ struct mtx tx_lock;
+ struct callout queue_full;
};
-static struct ntb_transport_ctx net_softc;
-
-enum {
- IF_NTB_DESC_DONE_FLAG = 1 << 0,
- IF_NTB_LINK_DOWN_FLAG = 1 << 1,
-};
-
-struct ntb_payload_header {
- ntb_q_idx_t ver;
- uint32_t len;
- uint32_t flags;
-};
-
-enum {
- /*
- * The order of this enum is part of the if_ntb remote protocol. Do
- * not reorder without bumping protocol version (and it's probably best
- * to keep the protocol in lock-step with the Linux NTB driver.
- */
- IF_NTB_VERSION = 0,
- IF_NTB_QP_LINKS,
- IF_NTB_NUM_QPS,
- IF_NTB_NUM_MWS,
- /*
- * N.B.: transport_link_work assumes MW1 enums = MW0 + 2.
- */
- IF_NTB_MW0_SZ_HIGH,
- IF_NTB_MW0_SZ_LOW,
- IF_NTB_MW1_SZ_HIGH,
- IF_NTB_MW1_SZ_LOW,
- IF_NTB_MAX_SPAD,
-
- /*
- * Some NTB-using hardware have a watchdog to work around NTB hangs; if
- * a register or doorbell isn't written every few seconds, the link is
- * torn down. Write an otherwise unused register every few seconds to
- * work around this watchdog.
- */
- IF_NTB_WATCHDOG_SPAD = 15
+struct ntb_net_ctx {
+ if_t ifp;
+ struct ifmedia media;
+ u_char eaddr[ETHER_ADDR_LEN];
+ int num_queues;
+ struct ntb_net_queue *queues;
+ int mtu;
};
-CTASSERT(IF_NTB_WATCHDOG_SPAD < XEON_SPAD_COUNT &&
- IF_NTB_WATCHDOG_SPAD < ATOM_SPAD_COUNT);
-
-#define QP_TO_MW(nt, qp) ((qp) % nt->mw_count)
-#define NTB_QP_DEF_NUM_ENTRIES 100
-#define NTB_LINK_DOWN_TIMEOUT 10
-static int ntb_handle_module_events(struct module *m, int what, void *arg);
-static int ntb_setup_interface(void);
-static int ntb_teardown_interface(void);
+static int ntb_net_probe(device_t dev);
+static int ntb_net_attach(device_t dev);
+static int ntb_net_detach(device_t dev);
static void ntb_net_init(void *arg);
-static int ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data);
-static void ntb_start(struct ifnet *ifp);
+static int ntb_ifmedia_upd(struct ifnet *);
+static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *);
+static int ntb_ioctl(if_t ifp, u_long command, caddr_t data);
+static int ntb_transmit(if_t ifp, struct mbuf *m);
static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
void *data, int len);
static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
void *data, int len);
static void ntb_net_event_handler(void *data, enum ntb_link_event status);
-static int ntb_transport_probe(struct ntb_softc *ntb);
-static void ntb_transport_free(struct ntb_transport_ctx *);
-static void ntb_transport_init_queue(struct ntb_transport_ctx *nt,
- unsigned int qp_num);
-static void ntb_transport_free_queue(struct ntb_transport_qp *qp);
-static struct ntb_transport_qp *ntb_transport_create_queue(void *data,
- struct ntb_softc *pdev, const struct ntb_queue_handlers *handlers);
-static void ntb_transport_link_up(struct ntb_transport_qp *qp);
-static int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb,
- void *data, unsigned int len);
-static int ntb_process_tx(struct ntb_transport_qp *qp,
- struct ntb_queue_entry *entry);
-static void ntb_memcpy_tx(struct ntb_transport_qp *qp,
- struct ntb_queue_entry *entry, void *offset);
+static void ntb_handle_tx(void *arg, int pending);
static void ntb_qp_full(void *arg);
-static void ntb_transport_rxc_db(void *arg, int pending);
-static int ntb_process_rxc(struct ntb_transport_qp *qp);
-static void ntb_memcpy_rx(struct ntb_transport_qp *qp,
- struct ntb_queue_entry *entry, void *offset);
-static inline void ntb_rx_copy_callback(struct ntb_transport_qp *qp,
- void *data);
-static void ntb_complete_rxc(void *arg, int pending);
-static void ntb_transport_doorbell_callback(void *data, uint32_t vector);
-static void ntb_transport_event_callback(void *data);
-static void ntb_transport_link_work(void *arg);
-static int ntb_set_mw(struct ntb_transport_ctx *, int num_mw, size_t size);
-static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw);
-static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
- unsigned int qp_num);
-static void ntb_qp_link_work(void *arg);
-static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt);
-static void ntb_transport_link_cleanup_work(void *, int);
-static void ntb_qp_link_down(struct ntb_transport_qp *qp);
-static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp);
-static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp);
-static void ntb_transport_link_down(struct ntb_transport_qp *qp);
-static void ntb_send_link_down(struct ntb_transport_qp *qp);
-static void ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
- struct ntb_queue_list *list);
-static struct ntb_queue_entry *ntb_list_rm(struct mtx *lock,
- struct ntb_queue_list *list);
-static struct ntb_queue_entry *ntb_list_mv(struct mtx *lock,
- struct ntb_queue_list *from, struct ntb_queue_list *to);
+static void ntb_qflush(if_t ifp);
static void create_random_local_eui48(u_char *eaddr);
-static unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp);
-static void xeon_link_watchdog_hb(void *);
-
-static const struct ntb_ctx_ops ntb_transport_ops = {
- .link_event = ntb_transport_event_callback,
- .db_event = ntb_transport_doorbell_callback,
-};
-MALLOC_DEFINE(M_NTB_IF, "if_ntb", "ntb network driver");
-
-static inline void
-iowrite32(uint32_t val, void *addr)
-{
-
- bus_space_write_4(X86_BUS_SPACE_MEM, 0/* HACK */, (uintptr_t)addr,
- val);
-}
-
-/* Module setup and teardown */
static int
-ntb_handle_module_events(struct module *m, int what, void *arg)
+ntb_net_probe(device_t dev)
{
- int err = 0;
- switch (what) {
- case MOD_LOAD:
- err = ntb_setup_interface();
- break;
- case MOD_UNLOAD:
- err = ntb_teardown_interface();
- break;
- default:
- err = EOPNOTSUPP;
- break;
- }
- return (err);
+ device_set_desc(dev, "NTB Network Interface");
+ return (0);
}
-static moduledata_t if_ntb_mod = {
- "if_ntb",
- ntb_handle_module_events,
- NULL
-};
-
-DECLARE_MODULE(if_ntb, if_ntb_mod, SI_SUB_KLD, SI_ORDER_ANY);
-MODULE_DEPEND(if_ntb, ntb_hw, 1, 1, 1);
-
static int
-ntb_setup_interface(void)
+ntb_net_attach(device_t dev)
{
- struct ifnet *ifp;
+ struct ntb_net_ctx *sc = device_get_softc(dev);
+ struct ntb_net_queue *q;
+ if_t ifp;
struct ntb_queue_handlers handlers = { ntb_net_rx_handler,
ntb_net_tx_handler, ntb_net_event_handler };
- int rc;
-
- net_softc.ntb = devclass_get_softc(devclass_find("ntb_hw"), 0);
- if (net_softc.ntb == NULL) {
- printf("ntb: Cannot find devclass\n");
- return (ENXIO);
- }
+ int i;
- ifp = net_softc.ifp = if_alloc(IFT_ETHER);
+ ifp = sc->ifp = if_gethandle(IFT_ETHER);
if (ifp == NULL) {
- ntb_transport_free(&net_softc);
printf("ntb: Cannot allocate ifnet structure\n");
return (ENOMEM);
}
- if_initname(ifp, "ntb", 0);
-
- rc = ntb_transport_probe(net_softc.ntb);
- if (rc != 0) {
- printf("ntb: Cannot init transport: %d\n", rc);
- if_free(net_softc.ifp);
- return (rc);
- }
-
- net_softc.qp = ntb_transport_create_queue(ifp, net_softc.ntb,
- &handlers);
- ifp->if_init = ntb_net_init;
- ifp->if_softc = &net_softc;
- ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_ioctl = ntb_ioctl;
- ifp->if_start = ntb_start;
- IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
- ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN;
- IFQ_SET_READY(&ifp->if_snd);
- create_random_local_eui48(net_softc.eaddr);
- ether_ifattach(ifp, net_softc.eaddr);
- ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_JUMBO_MTU;
- ifp->if_capenable = ifp->if_capabilities;
- ifp->if_mtu = ntb_transport_max_size(net_softc.qp) - ETHER_HDR_LEN -
- ETHER_CRC_LEN;
-
- ntb_transport_link_up(net_softc.qp);
- net_softc.bufsize = ntb_transport_max_size(net_softc.qp) +
- sizeof(struct ether_header);
+ if_initname(ifp, device_get_name(dev), device_get_unit(dev));
+ if_setdev(ifp, dev);
+
+ sc->num_queues = min(g_if_ntb_num_queues,
+ ntb_transport_queue_count(dev));
+ sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue),
+ M_DEVBUF, M_WAITOK | M_ZERO);
+ sc->mtu = INT_MAX;
+ for (i = 0; i < sc->num_queues; i++) {
+ q = &sc->queues[i];
+ q->sc = sc;
+ q->ifp = ifp;
+ q->qp = ntb_transport_create_queue(dev, i, &handlers, q);
+ if (q->qp == NULL)
+ break;
+ sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp));
+ mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF);
+ q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock);
+ TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q);
+ q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT,
+ taskqueue_thread_enqueue, &q->tx_tq);
+ taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d",
+ device_get_nameunit(dev), i);
+ callout_init(&q->queue_full, 1);
+ }
+ sc->num_queues = i;
+ device_printf(dev, "%d queue(s)\n", sc->num_queues);
+
+ if_setinitfn(ifp, ntb_net_init);
+ if_setsoftc(ifp, sc);
+ if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
+ if_setioctlfn(ifp, ntb_ioctl);
+ if_settransmitfn(ifp, ntb_transmit);
+ if_setqflushfn(ifp, ntb_qflush);
+ create_random_local_eui48(sc->eaddr);
+ ether_ifattach(ifp, sc->eaddr);
+ if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 |
+ IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
+ if_setcapenable(ifp, IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
+ if_setmtu(ifp, sc->mtu - ETHER_HDR_LEN);
+
+ ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd,
+ ntb_ifmedia_sts);
+ ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL);
+ ifmedia_set(&sc->media, NTB_MEDIATYPE);
+
+ for (i = 0; i < sc->num_queues; i++)
+ ntb_transport_link_up(sc->queues[i].qp);
return (0);
}
static int
-ntb_teardown_interface(void)
+ntb_net_detach(device_t dev)
{
+ struct ntb_net_ctx *sc = device_get_softc(dev);
+ struct ntb_net_queue *q;
+ int i;
- if (net_softc.qp != NULL) {
- ntb_transport_link_down(net_softc.qp);
-
- ntb_transport_free_queue(net_softc.qp);
- ntb_transport_free(&net_softc);
- }
-
- if (net_softc.ifp != NULL) {
- ether_ifdetach(net_softc.ifp);
- if_free(net_softc.ifp);
- net_softc.ifp = NULL;
- }
-
+ for (i = 0; i < sc->num_queues; i++)
+ ntb_transport_link_down(sc->queues[i].qp);
+ ether_ifdetach(sc->ifp);
+ if_free(sc->ifp);
+ ifmedia_removeall(&sc->media);
+ for (i = 0; i < sc->num_queues; i++) {
+ q = &sc->queues[i];
+ ntb_transport_free_queue(q->qp);
+ buf_ring_free(q->br, M_DEVBUF);
+ callout_drain(&q->queue_full);
+ taskqueue_drain_all(q->tx_tq);
+ mtx_destroy(&q->tx_lock);
+ }
+ free(sc->queues, M_DEVBUF);
return (0);
}
@@ -471,1213 +221,268 @@ ntb_teardown_interface(void)
static void
ntb_net_init(void *arg)
{
- struct ntb_transport_ctx *ntb_softc = arg;
- struct ifnet *ifp = ntb_softc->ifp;
+ struct ntb_net_ctx *sc = arg;
+ if_t ifp = sc->ifp;
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- ifp->if_flags |= IFF_UP;
- if_link_state_change(ifp, LINK_STATE_UP);
+ if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
+ if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ?
+ LINK_STATE_UP : LINK_STATE_DOWN);
}
static int
-ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+ntb_ioctl(if_t ifp, u_long command, caddr_t data)
{
- struct ntb_transport_ctx *nt = ifp->if_softc;
+ struct ntb_net_ctx *sc = if_getsoftc(ifp);
struct ifreq *ifr = (struct ifreq *)data;
int error = 0;
switch (command) {
case SIOCSIFMTU:
{
- if (ifr->ifr_mtu > ntb_transport_max_size(nt->qp) -
- ETHER_HDR_LEN - ETHER_CRC_LEN) {
+ if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) {
error = EINVAL;
break;
}
- ifp->if_mtu = ifr->ifr_mtu;
+ if_setmtu(ifp, ifr->ifr_mtu);
break;
}
- default:
- error = ether_ioctl(ifp, command, data);
- break;
- }
- return (error);
-}
-
-
-static void
-ntb_start(struct ifnet *ifp)
-{
- struct mbuf *m_head;
- struct ntb_transport_ctx *nt = ifp->if_softc;
- int rc;
+ case SIOCSIFMEDIA:
+ case SIOCGIFMEDIA:
+ error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
+ break;
- mtx_lock(&nt->tx_lock);
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- CTR0(KTR_NTB, "TX: ntb_start");
- while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
- IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
- CTR1(KTR_NTB, "TX: start mbuf %p", m_head);
- rc = ntb_transport_tx_enqueue(nt->qp, m_head, m_head,
- m_length(m_head, NULL));
- if (rc != 0) {
- CTR1(KTR_NTB,
- "TX: could not tx mbuf %p. Returning to snd q",
- m_head);
- if (rc == EAGAIN) {
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
- callout_reset(&nt->qp->queue_full, hz / 1000,
- ntb_qp_full, ifp);
- }
- break;
+ case SIOCSIFCAP:
+ if (ifr->ifr_reqcap & IFCAP_RXCSUM)
+ if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
+ else
+ if_setcapenablebit(ifp, 0, IFCAP_RXCSUM);
+ if (ifr->ifr_reqcap & IFCAP_TXCSUM) {
+ if_setcapenablebit(ifp, IFCAP_TXCSUM, 0);
+ if_sethwassistbits(ifp, NTB_CSUM_FEATURES, 0);
+ } else {
+ if_setcapenablebit(ifp, 0, IFCAP_TXCSUM);
+ if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES);
+ }
+ if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6)
+ if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
+ else
+ if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6);
+ if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) {
+ if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0);
+ if_sethwassistbits(ifp, NTB_CSUM_FEATURES6, 0);
+ } else {
+ if_setcapenablebit(ifp, 0, IFCAP_TXCSUM_IPV6);
+ if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES6);
}
-
- }
- mtx_unlock(&nt->tx_lock);
-}
-
-/* Network Device Callbacks */
-static void
-ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
- int len)
-{
-
- m_freem(data);
- CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data);
-}
-
-static void
-ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
- int len)
-{
- struct mbuf *m = data;
- struct ifnet *ifp = qp_data;
-
- CTR0(KTR_NTB, "RX: rx handler");
- (*ifp->if_input)(ifp, m);
-}
-
-static void
-ntb_net_event_handler(void *data, enum ntb_link_event status)
-{
- struct ifnet *ifp;
-
- ifp = data;
- (void)ifp;
-
- /* XXX The Linux driver munges with the carrier status here. */
-
- switch (status) {
- case NTB_LINK_DOWN:
- break;
- case NTB_LINK_UP:
break;
- default:
- panic("Bogus ntb_link_event %u\n", status);
- }
-}
-
-/* Transport Init and teardown */
-
-static void
-xeon_link_watchdog_hb(void *arg)
-{
- struct ntb_transport_ctx *nt;
-
- nt = arg;
- ntb_spad_write(nt->ntb, IF_NTB_WATCHDOG_SPAD, 0);
- callout_reset(&nt->link_watchdog, 1 * hz, xeon_link_watchdog_hb, nt);
-}
-
-static int
-ntb_transport_probe(struct ntb_softc *ntb)
-{
- struct ntb_transport_ctx *nt = &net_softc;
- struct ntb_transport_mw *mw;
- uint64_t qp_bitmap;
- int rc;
- unsigned i;
-
- nt->mw_count = ntb_mw_count(ntb);
- for (i = 0; i < nt->mw_count; i++) {
- mw = &nt->mw_vec[i];
-
- rc = ntb_mw_get_range(ntb, i, &mw->phys_addr, &mw->vbase,
- &mw->phys_size, &mw->xlat_align, &mw->xlat_align_size,
- &mw->addr_limit);
- if (rc != 0)
- goto err;
-
- mw->buff_size = 0;
- mw->xlat_size = 0;
- mw->virt_addr = NULL;
- mw->dma_addr = 0;
-
- rc = ntb_mw_set_wc(nt->ntb, i, VM_MEMATTR_WRITE_COMBINING);
- if (rc)
- ntb_printf(0, "Unable to set mw%d caching\n", i);
- }
-
- qp_bitmap = ntb_db_valid_mask(ntb);
- nt->qp_count = flsll(qp_bitmap);
- KASSERT(nt->qp_count != 0, ("bogus db bitmap"));
- nt->qp_count -= 1;
-
- if (max_num_clients != 0 && max_num_clients < nt->qp_count)
- nt->qp_count = max_num_clients;
- else if (nt->mw_count < nt->qp_count)
- nt->qp_count = nt->mw_count;
- KASSERT(nt->qp_count <= QP_SETSIZE, ("invalid qp_count"));
-
- mtx_init(&nt->tx_lock, "ntb transport tx", NULL, MTX_DEF);
- mtx_init(&nt->rx_lock, "ntb transport rx", NULL, MTX_DEF);
-
- nt->qp_vec = malloc(nt->qp_count * sizeof(*nt->qp_vec), M_NTB_IF,
- M_WAITOK | M_ZERO);
-
- for (i = 0; i < nt->qp_count; i++) {
- set_bit(i, &nt->qp_bitmap);
- set_bit(i, &nt->qp_bitmap_free);
- ntb_transport_init_queue(nt, i);
- }
-
- callout_init(&nt->link_work, 0);
- callout_init(&nt->link_watchdog, 0);
- TASK_INIT(&nt->link_cleanup, 0, ntb_transport_link_cleanup_work, nt);
-
- rc = ntb_set_ctx(ntb, nt, &ntb_transport_ops);
- if (rc != 0)
- goto err;
-
- nt->link_is_up = false;
- ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
- ntb_link_event(ntb);
-
- callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt);
- if (enable_xeon_watchdog != 0)
- callout_reset(&nt->link_watchdog, 0, xeon_link_watchdog_hb, nt);
- return (0);
-
-err:
- free(nt->qp_vec, M_NTB_IF);
- nt->qp_vec = NULL;
- return (rc);
-}
-
-static void
-ntb_transport_free(struct ntb_transport_ctx *nt)
-{
- struct ntb_softc *ntb = nt->ntb;
- struct _qpset qp_bitmap_alloc;
- uint8_t i;
-
- ntb_transport_link_cleanup(nt);
- taskqueue_drain(taskqueue_swi, &nt->link_cleanup);
- callout_drain(&nt->link_work);
- callout_drain(&nt->link_watchdog);
-
- BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &qp_bitmap_alloc);
- BIT_NAND(QP_SETSIZE, &qp_bitmap_alloc, &nt->qp_bitmap_free);
-
- /* Verify that all the QPs are freed */
- for (i = 0; i < nt->qp_count; i++)
- if (test_bit(i, &qp_bitmap_alloc))
- ntb_transport_free_queue(&nt->qp_vec[i]);
-
- ntb_link_disable(ntb);
- ntb_clear_ctx(ntb);
-
- for (i = 0; i < nt->mw_count; i++)
- ntb_free_mw(nt, i);
-
- free(nt->qp_vec, M_NTB_IF);
-}
-
-static void
-ntb_transport_init_queue(struct ntb_transport_ctx *nt, unsigned int qp_num)
-{
- struct ntb_transport_mw *mw;
- struct ntb_transport_qp *qp;
- vm_paddr_t mw_base;
- uint64_t mw_size, qp_offset;
- size_t tx_size;
- unsigned num_qps_mw, mw_num, mw_count;
-
- mw_count = nt->mw_count;
- mw_num = QP_TO_MW(nt, qp_num);
- mw = &nt->mw_vec[mw_num];
-
- qp = &nt->qp_vec[qp_num];
- qp->qp_num = qp_num;
- qp->transport = nt;
- qp->ntb = nt->ntb;
- qp->client_ready = false;
- qp->event_handler = NULL;
- ntb_qp_link_down_reset(qp);
-
- if (nt->qp_count % mw_count && mw_num + 1 < nt->qp_count / mw_count)
- num_qps_mw = nt->qp_count / mw_count + 1;
- else
- num_qps_mw = nt->qp_count / mw_count;
-
- mw_base = mw->phys_addr;
- mw_size = mw->phys_size;
-
- tx_size = mw_size / num_qps_mw;
- qp_offset = tx_size * (qp_num / mw_count);
-
- qp->tx_mw = mw->vbase + qp_offset;
- KASSERT(qp->tx_mw != NULL, ("uh oh?"));
-
- /* XXX Assumes that a vm_paddr_t is equivalent to bus_addr_t */
- qp->tx_mw_phys = mw_base + qp_offset;
- KASSERT(qp->tx_mw_phys != 0, ("uh oh?"));
-
- tx_size -= sizeof(struct ntb_rx_info);
- qp->rx_info = (void *)(qp->tx_mw + tx_size);
-
- /* Due to house-keeping, there must be at least 2 buffs */
- qp->tx_max_frame = qmin(tx_size / 2,
- transport_mtu + sizeof(struct ntb_payload_header));
- qp->tx_max_entry = tx_size / qp->tx_max_frame;
-
- callout_init(&qp->link_work, 0);
- callout_init(&qp->queue_full, 1);
- callout_init(&qp->rx_full, 1);
-
- mtx_init(&qp->ntb_rx_q_lock, "ntb rx q", NULL, MTX_SPIN);
- mtx_init(&qp->ntb_tx_free_q_lock, "ntb tx free q", NULL, MTX_SPIN);
- TASK_INIT(&qp->rx_completion_task, 0, ntb_complete_rxc, qp);
- TASK_INIT(&qp->rxc_db_work, 0, ntb_transport_rxc_db, qp);
-
- STAILQ_INIT(&qp->rx_post_q);
- STAILQ_INIT(&qp->rx_pend_q);
- STAILQ_INIT(&qp->tx_free_q);
- callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
-}
-
-static void
-ntb_transport_free_queue(struct ntb_transport_qp *qp)
-{
- struct ntb_queue_entry *entry;
-
- if (qp == NULL)
- return;
-
- callout_drain(&qp->link_work);
-
- ntb_db_set_mask(qp->ntb, 1ull << qp->qp_num);
- taskqueue_drain(taskqueue_swi, &qp->rxc_db_work);
- taskqueue_drain(taskqueue_swi, &qp->rx_completion_task);
-
- qp->cb_data = NULL;
- qp->rx_handler = NULL;
- qp->tx_handler = NULL;
- qp->event_handler = NULL;
-
- while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q)))
- free(entry, M_NTB_IF);
-
- while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q)))
- free(entry, M_NTB_IF);
-
- while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
- free(entry, M_NTB_IF);
-
- set_bit(qp->qp_num, &qp->transport->qp_bitmap_free);
-}
-
-/**
- * ntb_transport_create_queue - Create a new NTB transport layer queue
- * @rx_handler: receive callback function
- * @tx_handler: transmit callback function
- * @event_handler: event callback function
- *
- * Create a new NTB transport layer queue and provide the queue with a callback
- * routine for both transmit and receive. The receive callback routine will be
- * used to pass up data when the transport has received it on the queue. The
- * transmit callback routine will be called when the transport has completed the
- * transmission of the data on the queue and the data is ready to be freed.
- *
- * RETURNS: pointer to newly created ntb_queue, NULL on error.
- */
-static struct ntb_transport_qp *
-ntb_transport_create_queue(void *data, struct ntb_softc *ntb,
- const struct ntb_queue_handlers *handlers)
-{
- struct ntb_queue_entry *entry;
- struct ntb_transport_qp *qp;
- struct ntb_transport_ctx *nt;
- unsigned int free_queue;
- int i;
-
- nt = ntb_get_ctx(ntb, NULL);
- KASSERT(nt != NULL, ("bogus"));
-
- free_queue = ffs_bit(&nt->qp_bitmap);
- if (free_queue == 0)
- return (NULL);
-
- /* decrement free_queue to make it zero based */
- free_queue--;
-
- qp = &nt->qp_vec[free_queue];
- clear_bit(qp->qp_num, &nt->qp_bitmap_free);
- qp->cb_data = data;
- qp->rx_handler = handlers->rx_handler;
- qp->tx_handler = handlers->tx_handler;
- qp->event_handler = handlers->event_handler;
-
- for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
- entry = malloc(sizeof(*entry), M_NTB_IF, M_WAITOK | M_ZERO);
- entry->cb_data = nt->ifp;
- entry->buf = NULL;
- entry->len = transport_mtu;
- ntb_list_add(&qp->ntb_rx_q_lock, entry, &qp->rx_pend_q);
- }
-
- for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
- entry = malloc(sizeof(*entry), M_NTB_IF, M_WAITOK | M_ZERO);
- ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+ default:
+ error = ether_ioctl(ifp, command, data);
+ break;
}
- ntb_db_clear(ntb, 1ull << qp->qp_num);
- ntb_db_clear_mask(ntb, 1ull << qp->qp_num);
- return (qp);
-}
-
-/**
- * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
- * @qp: NTB transport layer queue to be enabled
- *
- * Notify NTB transport layer of client readiness to use queue
- */
-static void
-ntb_transport_link_up(struct ntb_transport_qp *qp)
-{
- struct ntb_transport_ctx *nt;
-
- if (qp == NULL)
- return;
-
- qp->client_ready = true;
-
- nt = qp->transport;
- ntb_printf(2, "qp client ready\n");
-
- if (qp->transport->link_is_up)
- callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
+ return (error);
}
-
-
-/* Transport Tx */
-
-/**
- * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry
- * @qp: NTB transport layer queue the entry is to be enqueued on
- * @cb: per buffer pointer for callback function to use
- * @data: pointer to data buffer that will be sent
- * @len: length of the data buffer
- *
- * Enqueue a new transmit buffer onto the transport queue from which a NTB
- * payload will be transmitted. This assumes that a lock is being held to
- * serialize access to the qp.
- *
- * RETURNS: An appropriate ERRNO error value on error, or zero for success.
- */
static int
-ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
- unsigned int len)
+ntb_ifmedia_upd(struct ifnet *ifp)
{
- struct ntb_queue_entry *entry;
- int rc;
+ struct ntb_net_ctx *sc = if_getsoftc(ifp);
+ struct ifmedia *ifm = &sc->media;
- if (qp == NULL || !qp->link_is_up || len == 0) {
- CTR0(KTR_NTB, "TX: link not up");
+ if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
return (EINVAL);
- }
-
- entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
- if (entry == NULL) {
- CTR0(KTR_NTB, "TX: could not get entry from tx_free_q");
- qp->tx_err_no_buf++;
- return (EBUSY);
- }
- CTR1(KTR_NTB, "TX: got entry %p from tx_free_q", entry);
-
- entry->cb_data = cb;
- entry->buf = data;
- entry->len = len;
- entry->flags = 0;
-
- rc = ntb_process_tx(qp, entry);
- if (rc != 0) {
- ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
- CTR1(KTR_NTB,
- "TX: process_tx failed. Returning entry %p to tx_free_q",
- entry);
- }
- return (rc);
-}
-
-static int
-ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry)
-{
- void *offset;
-
- offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
- CTR3(KTR_NTB,
- "TX: process_tx: tx_pkts=%lu, tx_index=%u, remote entry=%u",
- qp->tx_pkts, qp->tx_index, qp->remote_rx_info->entry);
- if (qp->tx_index == qp->remote_rx_info->entry) {
- CTR0(KTR_NTB, "TX: ring full");
- qp->tx_ring_full++;
- return (EAGAIN);
- }
-
- if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
- if (qp->tx_handler != NULL)
- qp->tx_handler(qp, qp->cb_data, entry->buf,
- EIO);
- else
- m_freem(entry->buf);
-
- entry->buf = NULL;
- ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
- CTR1(KTR_NTB,
- "TX: frame too big. returning entry %p to tx_free_q",
- entry);
- return (0);
- }
- CTR2(KTR_NTB, "TX: copying entry %p to offset %p", entry, offset);
- ntb_memcpy_tx(qp, entry, offset);
-
- qp->tx_index++;
- qp->tx_index %= qp->tx_max_entry;
-
- qp->tx_pkts++;
return (0);
}
static void
-ntb_memcpy_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry,
- void *offset)
-{
- struct ntb_payload_header *hdr;
-
- /* This piece is from Linux' ntb_async_tx() */
- hdr = (struct ntb_payload_header *)((char *)offset + qp->tx_max_frame -
- sizeof(struct ntb_payload_header));
- entry->x_hdr = hdr;
- iowrite32(entry->len, &hdr->len);
- iowrite32(qp->tx_pkts, &hdr->ver);
-
- /* This piece is ntb_memcpy_tx() */
- CTR2(KTR_NTB, "TX: copying %d bytes to offset %p", entry->len, offset);
- if (entry->buf != NULL) {
- m_copydata((struct mbuf *)entry->buf, 0, entry->len, offset);
-
- /*
- * Ensure that the data is fully copied before setting the
- * flags
- */
- wmb();
- }
-
- /* The rest is ntb_tx_copy_callback() */
- iowrite32(entry->flags | IF_NTB_DESC_DONE_FLAG, &hdr->flags);
- CTR1(KTR_NTB, "TX: hdr %p set DESC_DONE", hdr);
-
- ntb_peer_db_set(qp->ntb, 1ull << qp->qp_num);
-
- /*
- * The entry length can only be zero if the packet is intended to be a
- * "link down" or similar. Since no payload is being sent in these
- * cases, there is nothing to add to the completion queue.
- */
- if (entry->len > 0) {
- qp->tx_bytes += entry->len;
-
- if (qp->tx_handler)
- qp->tx_handler(qp, qp->cb_data, entry->buf,
- entry->len);
- else
- m_freem(entry->buf);
- entry->buf = NULL;
- }
-
- CTR3(KTR_NTB,
- "TX: entry %p sent. hdr->ver = %u, hdr->flags = 0x%x, Returning "
- "to tx_free_q", entry, hdr->ver, hdr->flags);
- ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
-}
-
-static void
-ntb_qp_full(void *arg)
+ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
{
+ struct ntb_net_ctx *sc = if_getsoftc(ifp);
- CTR0(KTR_NTB, "TX: qp_full callout");
- ntb_start(arg);
+ ifmr->ifm_status = IFM_AVALID;
+ ifmr->ifm_active = NTB_MEDIATYPE;
+ if (ntb_transport_link_query(sc->queues[0].qp))
+ ifmr->ifm_status |= IFM_ACTIVE;
}
-/* Transport Rx */
static void
-ntb_transport_rxc_db(void *arg, int pending __unused)
+ntb_transmit_locked(struct ntb_net_queue *q)
{
- struct ntb_transport_qp *qp = arg;
- ntb_q_idx_t i;
- int rc;
-
- /*
- * Limit the number of packets processed in a single interrupt to
- * provide fairness to others
- */
- CTR0(KTR_NTB, "RX: transport_rx");
- mtx_lock(&qp->transport->rx_lock);
- for (i = 0; i < qp->rx_max_entry; i++) {
- rc = ntb_process_rxc(qp);
+ if_t ifp = q->ifp;
+ struct mbuf *m;
+ int rc, len;
+ short mflags;
+
+ CTR0(KTR_NTB, "TX: ntb_transmit_locked");
+ while ((m = drbr_peek(ifp, q->br)) != NULL) {
+ CTR1(KTR_NTB, "TX: start mbuf %p", m);
+ if_etherbpfmtap(ifp, m);
+ len = m->m_pkthdr.len;
+ mflags = m->m_flags;
+ rc = ntb_transport_tx_enqueue(q->qp, m, m, len);
if (rc != 0) {
- CTR0(KTR_NTB, "RX: process_rxc failed");
+ CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc);
+ if (rc == EAGAIN) {
+ drbr_putback(ifp, q->br, m);
+ callout_reset_sbt(&q->queue_full,
+ SBT_1MS / 4, SBT_1MS / 4,
+ ntb_qp_full, q, 0);
+ } else {
+ m_freem(m);
+ drbr_advance(ifp, q->br);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ }
break;
}
- }
- mtx_unlock(&qp->transport->rx_lock);
-
- if (i == qp->rx_max_entry)
- taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
- else if ((ntb_db_read(qp->ntb) & (1ull << qp->qp_num)) != 0) {
- /* If db is set, clear it and read it back to commit clear. */
- ntb_db_clear(qp->ntb, 1ull << qp->qp_num);
- (void)ntb_db_read(qp->ntb);
-
- /*
- * An interrupt may have arrived between finishing
- * ntb_process_rxc and clearing the doorbell bit: there might
- * be some more work to do.
- */
- taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
+ drbr_advance(ifp, q->br);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
+ if (mflags & M_MCAST)
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
}
}
static int
-ntb_process_rxc(struct ntb_transport_qp *qp)
+ntb_transmit(if_t ifp, struct mbuf *m)
{
- struct ntb_payload_header *hdr;
- struct ntb_queue_entry *entry;
- caddr_t offset;
-
- offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index;
- hdr = (void *)(offset + qp->rx_max_frame -
- sizeof(struct ntb_payload_header));
-
- CTR1(KTR_NTB, "RX: process_rxc rx_index = %u", qp->rx_index);
- if ((hdr->flags & IF_NTB_DESC_DONE_FLAG) == 0) {
- CTR0(KTR_NTB, "RX: hdr not done");
- qp->rx_ring_empty++;
- return (EAGAIN);
- }
-
- if ((hdr->flags & IF_NTB_LINK_DOWN_FLAG) != 0) {
- CTR0(KTR_NTB, "RX: link down");
- ntb_qp_link_down(qp);
- hdr->flags = 0;
- return (EAGAIN);
- }
-
- if (hdr->ver != (uint32_t)qp->rx_pkts) {
- CTR2(KTR_NTB,"RX: ver != rx_pkts (%x != %lx). "
- "Returning entry to rx_pend_q", hdr->ver, qp->rx_pkts);
- qp->rx_err_ver++;
- return (EIO);
- }
-
- entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q);
- if (entry == NULL) {
- qp->rx_err_no_buf++;
- CTR0(KTR_NTB, "RX: No entries in rx_pend_q");
- return (EAGAIN);
- }
- callout_stop(&qp->rx_full);
- CTR1(KTR_NTB, "RX: rx entry %p from rx_pend_q", entry);
-
- entry->x_hdr = hdr;
- entry->index = qp->rx_index;
-
- if (hdr->len > entry->len) {
- CTR2(KTR_NTB, "RX: len too long. Wanted %ju got %ju",
- (uintmax_t)hdr->len, (uintmax_t)entry->len);
- qp->rx_err_oflow++;
-
- entry->len = -EIO;
- entry->flags |= IF_NTB_DESC_DONE_FLAG;
+ struct ntb_net_ctx *sc = if_getsoftc(ifp);
+ struct ntb_net_queue *q;
+ int error, i;
- taskqueue_enqueue(taskqueue_swi, &qp->rx_completion_task);
- } else {
- qp->rx_bytes += hdr->len;
- qp->rx_pkts++;
-
- CTR1(KTR_NTB, "RX: received %ld rx_pkts", qp->rx_pkts);
-
- entry->len = hdr->len;
-
- ntb_memcpy_rx(qp, entry, offset);
- }
-
- qp->rx_index++;
- qp->rx_index %= qp->rx_max_entry;
+ CTR0(KTR_NTB, "TX: ntb_transmit");
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ i = m->m_pkthdr.flowid % sc->num_queues;
+ else
+ i = curcpu % sc->num_queues;
+ q = &sc->queues[i];
+
+ error = drbr_enqueue(ifp, q->br, m);
+ if (error)
+ return (error);
+
+ if (mtx_trylock(&q->tx_lock)) {
+ ntb_transmit_locked(q);
+ mtx_unlock(&q->tx_lock);
+ } else
+ taskqueue_enqueue(q->tx_tq, &q->tx_task);
return (0);
}
static void
-ntb_memcpy_rx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry,
- void *offset)
+ntb_handle_tx(void *arg, int pending)
{
- struct ifnet *ifp = entry->cb_data;
- unsigned int len = entry->len;
- struct mbuf *m;
-
- CTR2(KTR_NTB, "RX: copying %d bytes from offset %p", len, offset);
- m = m_devget(offset, len, 0, ifp, NULL);
- m->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID;
+ struct ntb_net_queue *q = arg;
- entry->buf = (void *)m;
-
- /* Ensure that the data is globally visible before clearing the flag */
- wmb();
-
- CTR2(KTR_NTB, "RX: copied entry %p to mbuf %p.", entry, m);
- ntb_rx_copy_callback(qp, entry);
+ mtx_lock(&q->tx_lock);
+ ntb_transmit_locked(q);
+ mtx_unlock(&q->tx_lock);
}
-static inline void
-ntb_rx_copy_callback(struct ntb_transport_qp *qp, void *data)
+static void
+ntb_qp_full(void *arg)
{
- struct ntb_queue_entry *entry;
+ struct ntb_net_queue *q = arg;
- entry = data;
- entry->flags |= IF_NTB_DESC_DONE_FLAG;
- taskqueue_enqueue(taskqueue_swi, &qp->rx_completion_task);
+ CTR0(KTR_NTB, "TX: qp_full callout");
+ if (ntb_transport_tx_free_entry(q->qp) > 0)
+ taskqueue_enqueue(q->tx_tq, &q->tx_task);
+ else
+ callout_schedule_sbt(&q->queue_full,
+ SBT_1MS / 4, SBT_1MS / 4, 0);
}
static void
-ntb_complete_rxc(void *arg, int pending)
+ntb_qflush(if_t ifp)
{
- struct ntb_transport_qp *qp = arg;
- struct ntb_queue_entry *entry;
+ struct ntb_net_ctx *sc = if_getsoftc(ifp);
+ struct ntb_net_queue *q;
struct mbuf *m;
- unsigned len;
-
- CTR0(KTR_NTB, "RX: rx_completion_task");
-
- mtx_lock_spin(&qp->ntb_rx_q_lock);
-
- while (!STAILQ_EMPTY(&qp->rx_post_q)) {
- entry = STAILQ_FIRST(&qp->rx_post_q);
- if ((entry->flags & IF_NTB_DESC_DONE_FLAG) == 0)
- break;
-
- entry->x_hdr->flags = 0;
- iowrite32(entry->index, &qp->rx_info->entry);
-
- STAILQ_REMOVE_HEAD(&qp->rx_post_q, entry);
-
- len = entry->len;
- m = entry->buf;
-
- /*
- * Re-initialize queue_entry for reuse; rx_handler takes
- * ownership of the mbuf.
- */
- entry->buf = NULL;
- entry->len = transport_mtu;
- entry->cb_data = qp->transport->ifp;
-
- STAILQ_INSERT_TAIL(&qp->rx_pend_q, entry, entry);
-
- mtx_unlock_spin(&qp->ntb_rx_q_lock);
+ int i;
- CTR2(KTR_NTB, "RX: completing entry %p, mbuf %p", entry, m);
- if (qp->rx_handler != NULL && qp->client_ready)
- qp->rx_handler(qp, qp->cb_data, m, len);
- else
+ for (i = 0; i < sc->num_queues; i++) {
+ q = &sc->queues[i];
+ mtx_lock(&q->tx_lock);
+ while ((m = buf_ring_dequeue_sc(q->br)) != NULL)
m_freem(m);
-
- mtx_lock_spin(&qp->ntb_rx_q_lock);
- }
-
- mtx_unlock_spin(&qp->ntb_rx_q_lock);
-}
-
-static void
-ntb_transport_doorbell_callback(void *data, uint32_t vector)
-{
- struct ntb_transport_ctx *nt = data;
- struct ntb_transport_qp *qp;
- struct _qpset db_bits;
- uint64_t vec_mask;
- unsigned qp_num;
-
- BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &db_bits);
- BIT_NAND(QP_SETSIZE, &db_bits, &nt->qp_bitmap_free);
-
- vec_mask = ntb_db_vector_mask(nt->ntb, vector);
- while (vec_mask != 0) {
- qp_num = ffsll(vec_mask) - 1;
-
- if (test_bit(qp_num, &db_bits)) {
- qp = &nt->qp_vec[qp_num];
- taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
- }
-
- vec_mask &= ~(1ull << qp_num);
- }
-}
-
-/* Link Event handler */
-static void
-ntb_transport_event_callback(void *data)
-{
- struct ntb_transport_ctx *nt = data;
-
- if (ntb_link_is_up(nt->ntb, NULL, NULL)) {
- ntb_printf(1, "HW link up\n");
- callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt);
- } else {
- ntb_printf(1, "HW link down\n");
- taskqueue_enqueue(taskqueue_swi, &nt->link_cleanup);
+ mtx_unlock(&q->tx_lock);
}
+ if_qflush(ifp);
}
-/* Link bring up */
+/* Network Device Callbacks */
static void
-ntb_transport_link_work(void *arg)
-{
- struct ntb_transport_ctx *nt = arg;
- struct ntb_softc *ntb = nt->ntb;
- struct ntb_transport_qp *qp;
- uint64_t val64, size;
- uint32_t val;
- unsigned i;
- int rc;
-
- /* send the local info, in the opposite order of the way we read it */
- for (i = 0; i < nt->mw_count; i++) {
- size = nt->mw_vec[i].phys_size;
-
- if (max_mw_size != 0 && size > max_mw_size)
- size = max_mw_size;
-
- ntb_peer_spad_write(ntb, IF_NTB_MW0_SZ_HIGH + (i * 2),
- size >> 32);
- ntb_peer_spad_write(ntb, IF_NTB_MW0_SZ_LOW + (i * 2), size);
- }
-
- ntb_peer_spad_write(ntb, IF_NTB_NUM_MWS, nt->mw_count);
-
- ntb_peer_spad_write(ntb, IF_NTB_NUM_QPS, nt->qp_count);
-
- ntb_peer_spad_write(ntb, IF_NTB_VERSION, NTB_TRANSPORT_VERSION);
-
- /* Query the remote side for its info */
- val = 0;
- ntb_spad_read(ntb, IF_NTB_VERSION, &val);
- if (val != NTB_TRANSPORT_VERSION)
- goto out;
-
- ntb_spad_read(ntb, IF_NTB_NUM_QPS, &val);
- if (val != nt->qp_count)
- goto out;
-
- ntb_spad_read(ntb, IF_NTB_NUM_MWS, &val);
- if (val != nt->mw_count)
- goto out;
-
- for (i = 0; i < nt->mw_count; i++) {
- ntb_spad_read(ntb, IF_NTB_MW0_SZ_HIGH + (i * 2), &val);
- val64 = (uint64_t)val << 32;
-
- ntb_spad_read(ntb, IF_NTB_MW0_SZ_LOW + (i * 2), &val);
- val64 |= val;
-
- rc = ntb_set_mw(nt, i, val64);
- if (rc != 0)
- goto free_mws;
- }
-
- nt->link_is_up = true;
- ntb_printf(1, "transport link up\n");
-
- for (i = 0; i < nt->qp_count; i++) {
- qp = &nt->qp_vec[i];
-
- ntb_transport_setup_qp_mw(nt, i);
-
- if (qp->client_ready)
- callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
- }
-
- return;
-
-free_mws:
- for (i = 0; i < nt->mw_count; i++)
- ntb_free_mw(nt, i);
-out:
- if (ntb_link_is_up(ntb, NULL, NULL))
- callout_reset(&nt->link_work,
- NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_transport_link_work, nt);
-}
-
-static int
-ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, size_t size)
+ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
+ int len)
{
- struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
- size_t xlat_size, buff_size;
- int rc;
-
- if (size == 0)
- return (EINVAL);
-
- xlat_size = roundup(size, mw->xlat_align_size);
- buff_size = xlat_size;
-
- /* No need to re-setup */
- if (mw->xlat_size == xlat_size)
- return (0);
-
- if (mw->buff_size != 0)
- ntb_free_mw(nt, num_mw);
-
- /* Alloc memory for receiving data. Must be aligned */
- mw->xlat_size = xlat_size;
- mw->buff_size = buff_size;
-
- mw->virt_addr = contigmalloc(mw->buff_size, M_NTB_IF, M_ZERO, 0,
- mw->addr_limit, mw->xlat_align, 0);
- if (mw->virt_addr == NULL) {
- ntb_printf(0, "Unable to allocate MW buffer of size %zu/%zu\n",
- mw->buff_size, mw->xlat_size);
- mw->xlat_size = 0;
- mw->buff_size = 0;
- return (ENOMEM);
- }
- /* TODO: replace with bus_space_* functions */
- mw->dma_addr = vtophys(mw->virt_addr);
-
- /*
- * Ensure that the allocation from contigmalloc is aligned as
- * requested. XXX: This may not be needed -- brought in for parity
- * with the Linux driver.
- */
- if (mw->dma_addr % mw->xlat_align != 0) {
- ntb_printf(0,
- "DMA memory 0x%jx not aligned to BAR size 0x%zx\n",
- (uintmax_t)mw->dma_addr, size);
- ntb_free_mw(nt, num_mw);
- return (ENOMEM);
- }
-
- /* Notify HW the memory location of the receive buffer */
- rc = ntb_mw_set_trans(nt->ntb, num_mw, mw->dma_addr, mw->xlat_size);
- if (rc) {
- ntb_printf(0, "Unable to set mw%d translation\n", num_mw);
- ntb_free_mw(nt, num_mw);
- return (rc);
- }
- return (0);
+ m_freem(data);
+ CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data);
}
static void
-ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
+ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
+ int len)
{
- struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
+ struct ntb_net_queue *q = qp_data;
+ struct ntb_net_ctx *sc = q->sc;
+ struct mbuf *m = data;
+ if_t ifp = q->ifp;
+ uint16_t proto;
- if (mw->virt_addr == NULL)
+ CTR1(KTR_NTB, "RX: rx handler (%d)", len);
+ if (len < 0) {
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
return;
-
- ntb_mw_clear_trans(nt->ntb, num_mw);
- contigfree(mw->virt_addr, mw->xlat_size, M_NTB_IF);
- mw->xlat_size = 0;
- mw->buff_size = 0;
- mw->virt_addr = NULL;
-}
-
-static int
-ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, unsigned int qp_num)
-{
- struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
- struct ntb_transport_mw *mw;
- void *offset;
- ntb_q_idx_t i;
- size_t rx_size;
- unsigned num_qps_mw, mw_num, mw_count;
-
- mw_count = nt->mw_count;
- mw_num = QP_TO_MW(nt, qp_num);
- mw = &nt->mw_vec[mw_num];
-
- if (mw->virt_addr == NULL)
- return (ENOMEM);
-
- if (nt->qp_count % mw_count && mw_num + 1 < nt->qp_count / mw_count)
- num_qps_mw = nt->qp_count / mw_count + 1;
- else
- num_qps_mw = nt->qp_count / mw_count;
-
- rx_size = mw->xlat_size / num_qps_mw;
- qp->rx_buff = mw->virt_addr + rx_size * (qp_num / mw_count);
- rx_size -= sizeof(struct ntb_rx_info);
-
- qp->remote_rx_info = (void*)(qp->rx_buff + rx_size);
-
- /* Due to house-keeping, there must be at least 2 buffs */
- qp->rx_max_frame = qmin(rx_size / 2,
- transport_mtu + sizeof(struct ntb_payload_header));
- qp->rx_max_entry = rx_size / qp->rx_max_frame;
- qp->rx_index = 0;
-
- qp->remote_rx_info->entry = qp->rx_max_entry - 1;
-
- /* Set up the hdr offsets with 0s */
- for (i = 0; i < qp->rx_max_entry; i++) {
- offset = (void *)(qp->rx_buff + qp->rx_max_frame * (i + 1) -
- sizeof(struct ntb_payload_header));
- memset(offset, 0, sizeof(struct ntb_payload_header));
}
- qp->rx_pkts = 0;
- qp->tx_pkts = 0;
- qp->tx_index = 0;
-
- return (0);
-}
-
-static void
-ntb_qp_link_work(void *arg)
-{
- struct ntb_transport_qp *qp = arg;
- struct ntb_softc *ntb = qp->ntb;
- struct ntb_transport_ctx *nt = qp->transport;
- uint32_t val, dummy;
-
- ntb_spad_read(ntb, IF_NTB_QP_LINKS, &val);
-
- ntb_peer_spad_write(ntb, IF_NTB_QP_LINKS, val | (1ull << qp->qp_num));
-
- /* query remote spad for qp ready bits */
- ntb_peer_spad_read(ntb, IF_NTB_QP_LINKS, &dummy);
-
- /* See if the remote side is up */
- if ((val & (1ull << qp->qp_num)) != 0) {
- ntb_printf(2, "qp link up\n");
- qp->link_is_up = true;
-
- if (qp->event_handler != NULL)
- qp->event_handler(qp->cb_data, NTB_LINK_UP);
-
- taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
- } else if (nt->link_is_up)
- callout_reset(&qp->link_work,
- NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp);
-}
-
-/* Link down event*/
-static void
-ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
-{
- struct ntb_transport_qp *qp;
- struct _qpset qp_bitmap_alloc;
- unsigned i;
-
- BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &qp_bitmap_alloc);
- BIT_NAND(QP_SETSIZE, &qp_bitmap_alloc, &nt->qp_bitmap_free);
-
- /* Pass along the info to any clients */
- for (i = 0; i < nt->qp_count; i++)
- if (test_bit(i, &qp_bitmap_alloc)) {
- qp = &nt->qp_vec[i];
- ntb_qp_link_cleanup(qp);
- callout_drain(&qp->link_work);
- }
-
- if (!nt->link_is_up)
- callout_drain(&nt->link_work);
-
- /*
- * The scratchpad registers keep the values if the remote side
- * goes down, blast them now to give them a sane value the next
- * time they are accessed
- */
- for (i = 0; i < IF_NTB_MAX_SPAD; i++)
- ntb_spad_write(nt->ntb, i, 0);
-}
-
-static void
-ntb_transport_link_cleanup_work(void *arg, int pending __unused)
-{
-
- ntb_transport_link_cleanup(arg);
-}
-
-static void
-ntb_qp_link_down(struct ntb_transport_qp *qp)
-{
-
- ntb_qp_link_cleanup(qp);
-}
-
-static void
-ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
-{
-
- qp->link_is_up = false;
-
- qp->tx_index = qp->rx_index = 0;
- qp->tx_bytes = qp->rx_bytes = 0;
- qp->tx_pkts = qp->rx_pkts = 0;
-
- qp->rx_ring_empty = 0;
- qp->tx_ring_full = 0;
-
- qp->rx_err_no_buf = qp->tx_err_no_buf = 0;
- qp->rx_err_oflow = qp->rx_err_ver = 0;
-}
-
-static void
-ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
-{
- struct ntb_transport_ctx *nt = qp->transport;
-
- callout_drain(&qp->link_work);
- ntb_qp_link_down_reset(qp);
-
- if (qp->event_handler != NULL)
- qp->event_handler(qp->cb_data, NTB_LINK_DOWN);
-
- if (nt->link_is_up)
- callout_reset(&qp->link_work,
- NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp);
-}
-
-/* Link commanded down */
-/**
- * ntb_transport_link_down - Notify NTB transport to no longer enqueue data
- * @qp: NTB transport layer queue to be disabled
- *
- * Notify NTB transport layer of client's desire to no longer receive data on
- * transport queue specified. It is the client's responsibility to ensure all
- * entries on queue are purged or otherwise handled appropriately.
- */
-static void
-ntb_transport_link_down(struct ntb_transport_qp *qp)
-{
- uint32_t val;
-
- if (qp == NULL)
- return;
-
- qp->client_ready = false;
-
- ntb_spad_read(qp->ntb, IF_NTB_QP_LINKS, &val);
-
- ntb_peer_spad_write(qp->ntb, IF_NTB_QP_LINKS,
- val & ~(1 << qp->qp_num));
-
- if (qp->link_is_up)
- ntb_send_link_down(qp);
- else
- callout_drain(&qp->link_work);
-}
-
-static void
-ntb_send_link_down(struct ntb_transport_qp *qp)
-{
- struct ntb_queue_entry *entry;
- int i, rc;
-
- if (!qp->link_is_up)
- return;
-
- for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) {
- entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
- if (entry != NULL)
+ m->m_pkthdr.rcvif = ifp;
+ if (sc->num_queues > 1) {
+ m->m_pkthdr.flowid = q - sc->queues;
+ M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
+ }
+ if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
+ m_copydata(m, 12, 2, (void *)&proto);
+ switch (ntohs(proto)) {
+ case ETHERTYPE_IP:
+ if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
+ m->m_pkthdr.csum_data = 0xffff;
+ m->m_pkthdr.csum_flags = NTB_CSUM_SET;
+ }
+ break;
+ case ETHERTYPE_IPV6:
+ if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) {
+ m->m_pkthdr.csum_data = 0xffff;
+ m->m_pkthdr.csum_flags = NTB_CSUM_SET;
+ }
break;
- pause("NTB Wait for link down", hz / 10);
+ }
}
-
- if (entry == NULL)
- return;
-
- entry->cb_data = NULL;
- entry->buf = NULL;
- entry->len = 0;
- entry->flags = IF_NTB_LINK_DOWN_FLAG;
-
- mtx_lock(&qp->transport->tx_lock);
- rc = ntb_process_tx(qp, entry);
- if (rc != 0)
- printf("ntb: Failed to send link down\n");
- mtx_unlock(&qp->transport->tx_lock);
-
- ntb_qp_link_down_reset(qp);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_input(ifp, m);
}
-
-/* List Management */
-
static void
-ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
- struct ntb_queue_list *list)
-{
-
- mtx_lock_spin(lock);
- STAILQ_INSERT_TAIL(list, entry, entry);
- mtx_unlock_spin(lock);
-}
-
-static struct ntb_queue_entry *
-ntb_list_rm(struct mtx *lock, struct ntb_queue_list *list)
-{
- struct ntb_queue_entry *entry;
-
- mtx_lock_spin(lock);
- if (STAILQ_EMPTY(list)) {
- entry = NULL;
- goto out;
- }
- entry = STAILQ_FIRST(list);
- STAILQ_REMOVE_HEAD(list, entry);
-out:
- mtx_unlock_spin(lock);
-
- return (entry);
-}
-
-static struct ntb_queue_entry *
-ntb_list_mv(struct mtx *lock, struct ntb_queue_list *from,
- struct ntb_queue_list *to)
+ntb_net_event_handler(void *data, enum ntb_link_event status)
{
- struct ntb_queue_entry *entry;
+ struct ntb_net_queue *q = data;
+ int new_state;
- mtx_lock_spin(lock);
- if (STAILQ_EMPTY(from)) {
- entry = NULL;
- goto out;
+ switch (status) {
+ case NTB_LINK_DOWN:
+ new_state = LINK_STATE_DOWN;
+ break;
+ case NTB_LINK_UP:
+ new_state = LINK_STATE_UP;
+ break;
+ default:
+ new_state = LINK_STATE_UNKNOWN;
+ break;
}
- entry = STAILQ_FIRST(from);
- STAILQ_REMOVE_HEAD(from, entry);
- STAILQ_INSERT_TAIL(to, entry, entry);
-
-out:
- mtx_unlock_spin(lock);
- return (entry);
+ if_link_state_change(q->ifp, new_state);
}
/* Helper functions */
@@ -1688,27 +493,24 @@ static void
create_random_local_eui48(u_char *eaddr)
{
static uint8_t counter = 0;
- uint32_t seed = ticks;
eaddr[0] = EUI48_LOCALLY_ADMINISTERED;
- memcpy(&eaddr[1], &seed, sizeof(uint32_t));
+ arc4rand(&eaddr[1], 4, 0);
eaddr[5] = counter++;
}
-/**
- * ntb_transport_max_size - Query the max payload size of a qp
- * @qp: NTB transport layer queue to be queried
- *
- * Query the maximum payload size permissible on the given qp
- *
- * RETURNS: the max payload size of a qp
- */
-static unsigned int
-ntb_transport_max_size(struct ntb_transport_qp *qp)
-{
-
- if (qp == NULL)
- return (0);
+static device_method_t ntb_net_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, ntb_net_probe),
+ DEVMETHOD(device_attach, ntb_net_attach),
+ DEVMETHOD(device_detach, ntb_net_detach),
+ DEVMETHOD_END
+};
- return (qp->tx_max_frame - sizeof(struct ntb_payload_header));
-}
+devclass_t ntb_net_devclass;
+static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods,
+ sizeof(struct ntb_net_ctx));
+DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, ntb_net_devclass,
+ NULL, NULL);
+MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1);
+MODULE_VERSION(if_ntb, 1);
diff --git a/sys/dev/ntb/ntb.c b/sys/dev/ntb/ntb.c
new file mode 100644
index 0000000..44c0c61
--- /dev/null
+++ b/sys/dev/ntb/ntb.c
@@ -0,0 +1,462 @@
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/rmlock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+
+#include "ntb.h"
+
+devclass_t ntb_hw_devclass;
+SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
+
+struct ntb_child {
+ device_t dev;
+ int enabled;
+ int mwoff;
+ int mwcnt;
+ int spadoff;
+ int spadcnt;
+ int dboff;
+ int dbmask;
+ void *ctx;
+ const struct ntb_ctx_ops *ctx_ops;
+ struct rmlock ctx_lock;
+ struct ntb_child *next;
+};
+
+int
+ntb_register_device(device_t dev)
+{
+ struct ntb_child **cpp = device_get_softc(dev);
+ struct ntb_child *nc;
+ int i, mw, mwu, mwt, spad, spadu, spadt, db, dbu, dbt;
+ char cfg[128] = "";
+ char buf[32];
+ char *n, *np, *c, *p, *name;
+
+ mwu = 0;
+ mwt = NTB_MW_COUNT(dev);
+ spadu = 0;
+ spadt = NTB_SPAD_COUNT(dev);
+ dbu = 0;
+ dbt = flsll(NTB_DB_VALID_MASK(dev));
+
+ device_printf(dev, "%d memory windows, %d scratchpads, "
+ "%d doorbells\n", mwt, spadt, dbt);
+
+ snprintf(buf, sizeof(buf), "hint.%s.%d.config", device_get_name(dev),
+ device_get_unit(dev));
+ TUNABLE_STR_FETCH(buf, cfg, sizeof(cfg));
+ n = cfg;
+ i = 0;
+ while ((c = strsep(&n, ",")) != NULL) {
+ np = c;
+ name = strsep(&np, ":");
+ if (name != NULL && name[0] == 0)
+ name = NULL;
+ p = strsep(&np, ":");
+ mw = (p && p[0] != 0) ? strtol(p, NULL, 10) : mwt - mwu;
+ p = strsep(&np, ":");
+ spad = (p && p[0] != 0) ? strtol(p, NULL, 10) : spadt - spadu;
+ db = (np && np[0] != 0) ? strtol(np, NULL, 10) : dbt - dbu;
+
+ if (mw > mwt - mwu || spad > spadt - spadu || db > dbt - dbu) {
+ device_printf(dev, "Not enough resources for config\n");
+ break;
+ }
+
+ nc = malloc(sizeof(*nc), M_DEVBUF, M_WAITOK | M_ZERO);
+ nc->mwoff = mwu;
+ nc->mwcnt = mw;
+ nc->spadoff = spadu;
+ nc->spadcnt = spad;
+ nc->dboff = dbu;
+ nc->dbmask = (db == 0) ? 0 : (0xffffffffffffffff >> (64 - db));
+ rm_init(&nc->ctx_lock, "ntb ctx");
+ nc->dev = device_add_child(dev, name, -1);
+ if (nc->dev == NULL) {
+ ntb_unregister_device(dev);
+ return (ENOMEM);
+ }
+ device_set_ivars(nc->dev, nc);
+ *cpp = nc;
+ cpp = &nc->next;
+
+ if (bootverbose) {
+ device_printf(dev, "%d \"%s\":", i, name);
+ if (mw > 0) {
+ printf(" memory windows %d", mwu);
+ if (mw > 1)
+ printf("-%d", mwu + mw - 1);
+ }
+ if (spad > 0) {
+ printf(" scratchpads %d", spadu);
+ if (spad > 1)
+ printf("-%d", spadu + spad - 1);
+ }
+ if (db > 0) {
+ printf(" doorbells %d", dbu);
+ if (db > 1)
+ printf("-%d", dbu + db - 1);
+ }
+ printf("\n");
+ }
+
+ mwu += mw;
+ spadu += spad;
+ dbu += db;
+ i++;
+ }
+
+ bus_generic_attach(dev);
+ return (0);
+}
+
+int
+ntb_unregister_device(device_t dev)
+{
+ struct ntb_child **cpp = device_get_softc(dev);
+ struct ntb_child *nc;
+ int error = 0;
+
+ while ((nc = *cpp) != NULL) {
+ *cpp = (*cpp)->next;
+ error = device_delete_child(dev, nc->dev);
+ if (error)
+ break;
+ rm_destroy(&nc->ctx_lock);
+ free(nc, M_DEVBUF);
+ }
+ return (error);
+}
+
+void
+ntb_link_event(device_t dev)
+{
+ struct ntb_child **cpp = device_get_softc(dev);
+ struct ntb_child *nc;
+ struct rm_priotracker ctx_tracker;
+
+ for (nc = *cpp; nc != NULL; nc = nc->next) {
+ rm_rlock(&nc->ctx_lock, &ctx_tracker);
+ if (nc->ctx_ops != NULL && nc->ctx_ops->link_event != NULL)
+ nc->ctx_ops->link_event(nc->ctx);
+ rm_runlock(&nc->ctx_lock, &ctx_tracker);
+ }
+}
+
+void
+ntb_db_event(device_t dev, uint32_t vec)
+{
+ struct ntb_child **cpp = device_get_softc(dev);
+ struct ntb_child *nc;
+ struct rm_priotracker ctx_tracker;
+
+ for (nc = *cpp; nc != NULL; nc = nc->next) {
+ rm_rlock(&nc->ctx_lock, &ctx_tracker);
+ if (nc->ctx_ops != NULL && nc->ctx_ops->db_event != NULL)
+ nc->ctx_ops->db_event(nc->ctx, vec);
+ rm_runlock(&nc->ctx_lock, &ctx_tracker);
+ }
+}
+
+bool
+ntb_link_is_up(device_t ntb, enum ntb_speed *speed, enum ntb_width *width)
+{
+
+ return (NTB_LINK_IS_UP(device_get_parent(ntb), speed, width));
+}
+
+int
+ntb_link_enable(device_t ntb, enum ntb_speed speed, enum ntb_width width)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+ struct ntb_child **cpp = device_get_softc(device_get_parent(nc->dev));
+ struct ntb_child *nc1;
+
+ for (nc1 = *cpp; nc1 != NULL; nc1 = nc1->next) {
+ if (nc1->enabled) {
+ nc->enabled = 1;
+ return (0);
+ }
+ }
+ nc->enabled = 1;
+ return (NTB_LINK_ENABLE(device_get_parent(ntb), speed, width));
+}
+
+int
+ntb_link_disable(device_t ntb)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+ struct ntb_child **cpp = device_get_softc(device_get_parent(nc->dev));
+ struct ntb_child *nc1;
+
+ if (!nc->enabled)
+ return (0);
+ nc->enabled = 0;
+ for (nc1 = *cpp; nc1 != NULL; nc1 = nc1->next) {
+ if (nc1->enabled)
+ return (0);
+ }
+ return (NTB_LINK_DISABLE(device_get_parent(ntb)));
+}
+
+bool
+ntb_link_enabled(device_t ntb)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (nc->enabled && NTB_LINK_ENABLED(device_get_parent(ntb)));
+}
+
+int
+ntb_set_ctx(device_t ntb, void *ctx, const struct ntb_ctx_ops *ctx_ops)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ if (ctx == NULL || ctx_ops == NULL)
+ return (EINVAL);
+
+ rm_wlock(&nc->ctx_lock);
+ if (nc->ctx_ops != NULL) {
+ rm_wunlock(&nc->ctx_lock);
+ return (EINVAL);
+ }
+ nc->ctx = ctx;
+ nc->ctx_ops = ctx_ops;
+ rm_wunlock(&nc->ctx_lock);
+
+ return (0);
+}
+
+void *
+ntb_get_ctx(device_t ntb, const struct ntb_ctx_ops **ctx_ops)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ KASSERT(nc->ctx != NULL && nc->ctx_ops != NULL, ("bogus"));
+ if (ctx_ops != NULL)
+ *ctx_ops = nc->ctx_ops;
+ return (nc->ctx);
+}
+
+void
+ntb_clear_ctx(device_t ntb)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ rm_wlock(&nc->ctx_lock);
+ nc->ctx = NULL;
+ nc->ctx_ops = NULL;
+ rm_wunlock(&nc->ctx_lock);
+}
+
+uint8_t
+ntb_mw_count(device_t ntb)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (nc->mwcnt);
+}
+
+int
+ntb_mw_get_range(device_t ntb, unsigned mw_idx, vm_paddr_t *base,
+ caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
+ bus_addr_t *plimit)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_MW_GET_RANGE(device_get_parent(ntb), mw_idx + nc->mwoff,
+ base, vbase, size, align, align_size, plimit));
+}
+
+int
+ntb_mw_set_trans(device_t ntb, unsigned mw_idx, bus_addr_t addr, size_t size)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_MW_SET_TRANS(device_get_parent(ntb), mw_idx + nc->mwoff,
+ addr, size));
+}
+
+int
+ntb_mw_clear_trans(device_t ntb, unsigned mw_idx)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_MW_CLEAR_TRANS(device_get_parent(ntb), mw_idx + nc->mwoff));
+}
+
+int
+ntb_mw_get_wc(device_t ntb, unsigned mw_idx, vm_memattr_t *mode)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_MW_GET_WC(device_get_parent(ntb), mw_idx + nc->mwoff, mode));
+}
+
+int
+ntb_mw_set_wc(device_t ntb, unsigned mw_idx, vm_memattr_t mode)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_MW_SET_WC(device_get_parent(ntb), mw_idx + nc->mwoff, mode));
+}
+
+uint8_t
+ntb_spad_count(device_t ntb)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (nc->spadcnt);
+}
+
+void
+ntb_spad_clear(device_t ntb)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+ unsigned i;
+
+ for (i = 0; i < nc->spadcnt; i++)
+ NTB_SPAD_WRITE(device_get_parent(ntb), i + nc->spadoff, 0);
+}
+
+int
+ntb_spad_write(device_t ntb, unsigned int idx, uint32_t val)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_SPAD_WRITE(device_get_parent(ntb), idx + nc->spadoff, val));
+}
+
+int
+ntb_spad_read(device_t ntb, unsigned int idx, uint32_t *val)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_SPAD_READ(device_get_parent(ntb), idx + nc->spadoff, val));
+}
+
+int
+ntb_peer_spad_write(device_t ntb, unsigned int idx, uint32_t val)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_PEER_SPAD_WRITE(device_get_parent(ntb), idx + nc->spadoff,
+ val));
+}
+
+int
+ntb_peer_spad_read(device_t ntb, unsigned int idx, uint32_t *val)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_PEER_SPAD_READ(device_get_parent(ntb), idx + nc->spadoff,
+ val));
+}
+
+uint64_t
+ntb_db_valid_mask(device_t ntb)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (nc->dbmask);
+}
+
+int
+ntb_db_vector_count(device_t ntb)
+{
+
+ return (NTB_DB_VECTOR_COUNT(device_get_parent(ntb)));
+}
+
+uint64_t
+ntb_db_vector_mask(device_t ntb, uint32_t vector)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return ((NTB_DB_VECTOR_MASK(device_get_parent(ntb), vector)
+ >> nc->dboff) & nc->dbmask);
+}
+
+int
+ntb_peer_db_addr(device_t ntb, bus_addr_t *db_addr, vm_size_t *db_size)
+{
+
+ return (NTB_PEER_DB_ADDR(device_get_parent(ntb), db_addr, db_size));
+}
+
+void
+ntb_db_clear(device_t ntb, uint64_t bits)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_DB_CLEAR(device_get_parent(ntb), bits << nc->dboff));
+}
+
+void
+ntb_db_clear_mask(device_t ntb, uint64_t bits)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_DB_CLEAR_MASK(device_get_parent(ntb), bits << nc->dboff));
+}
+
+uint64_t
+ntb_db_read(device_t ntb)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return ((NTB_DB_READ(device_get_parent(ntb)) >> nc->dboff)
+ & nc->dbmask);
+}
+
+void
+ntb_db_set_mask(device_t ntb, uint64_t bits)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_DB_SET_MASK(device_get_parent(ntb), bits << nc->dboff));
+}
+
+void
+ntb_peer_db_set(device_t ntb, uint64_t bits)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_PEER_DB_SET(device_get_parent(ntb), bits << nc->dboff));
+}
+
+MODULE_VERSION(ntb, 1);
diff --git a/sys/dev/ntb/ntb.h b/sys/dev/ntb/ntb.h
new file mode 100644
index 0000000..8593c65
--- /dev/null
+++ b/sys/dev/ntb/ntb.h
@@ -0,0 +1,409 @@
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NTB_H_
+#define _NTB_H_
+
+#include "ntb_if.h"
+
+extern devclass_t ntb_hw_devclass;
+SYSCTL_DECL(_hw_ntb);
+
+int ntb_register_device(device_t ntb);
+int ntb_unregister_device(device_t ntb);
+
+/*
+ * ntb_link_event() - notify driver context of a change in link status
+ * @ntb: NTB device context
+ *
+ * Notify the driver context that the link status may have changed. The driver
+ * should call intb_link_is_up() to get the current status.
+ */
+void ntb_link_event(device_t ntb);
+
+/*
+ * ntb_db_event() - notify driver context of a doorbell event
+ * @ntb: NTB device context
+ * @vector: Interrupt vector number
+ *
+ * Notify the driver context of a doorbell event. If hardware supports
+ * multiple interrupt vectors for doorbells, the vector number indicates which
+ * vector received the interrupt. The vector number is relative to the first
+ * vector used for doorbells, starting at zero, and must be less than
+ * ntb_db_vector_count(). The driver may call ntb_db_read() to check which
+ * doorbell bits need service, and ntb_db_vector_mask() to determine which of
+ * those bits are associated with the vector number.
+ */
+void ntb_db_event(device_t ntb, uint32_t vec);
+
+/*
+ * ntb_link_is_up() - get the current ntb link state
+ * @ntb: NTB device context
+ * @speed: OUT - The link speed expressed as PCIe generation number
+ * @width: OUT - The link width expressed as the number of PCIe lanes
+ *
+ * RETURNS: true or false based on the hardware link state
+ */
+bool ntb_link_is_up(device_t ntb, enum ntb_speed *speed, enum ntb_width *width);
+
+/*
+ * ntb_link_enable() - enable the link on the secondary side of the ntb
+ * @ntb: NTB device context
+ * @max_speed: The maximum link speed expressed as PCIe generation number[0]
+ * @max_width: The maximum link width expressed as the number of PCIe lanes[0]
+ *
+ * Enable the link on the secondary side of the ntb. This can only be done
+ * from the primary side of the ntb in primary or b2b topology. The ntb device
+ * should train the link to its maximum speed and width, or the requested speed
+ * and width, whichever is smaller, if supported.
+ *
+ * Return: Zero on success, otherwise an error number.
+ *
+ * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed
+ * and width input will be ignored.
+ */
+int ntb_link_enable(device_t ntb, enum ntb_speed speed, enum ntb_width width);
+
+/*
+ * ntb_link_disable() - disable the link on the secondary side of the ntb
+ * @ntb: NTB device context
+ *
+ * Disable the link on the secondary side of the ntb. This can only be done
+ * from the primary side of the ntb in primary or b2b topology. The ntb device
+ * should disable the link. Returning from this call must indicate that a
+ * barrier has passed, though with no more writes may pass in either direction
+ * across the link, except if this call returns an error number.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+int ntb_link_disable(device_t ntb);
+
+/*
+ * get enable status of the link on the secondary side of the ntb
+ */
+bool ntb_link_enabled(device_t ntb);
+
+/*
+ * ntb_set_ctx() - associate a driver context with an ntb device
+ * @ntb: NTB device context
+ * @ctx: Driver context
+ * @ctx_ops: Driver context operations
+ *
+ * Associate a driver context and operations with a ntb device. The context is
+ * provided by the client driver, and the driver may associate a different
+ * context with each ntb device.
+ *
+ * Return: Zero if the context is associated, otherwise an error number.
+ */
+int ntb_set_ctx(device_t ntb, void *ctx, const struct ntb_ctx_ops *ctx_ops);
+
+/*
+ * ntb_set_ctx() - get a driver context associated with an ntb device
+ * @ntb: NTB device context
+ * @ctx_ops: Driver context operations
+ *
+ * Get a driver context and operations associated with a ntb device.
+ */
+void * ntb_get_ctx(device_t ntb, const struct ntb_ctx_ops **ctx_ops);
+
+/*
+ * ntb_clear_ctx() - disassociate any driver context from an ntb device
+ * @ntb: NTB device context
+ *
+ * Clear any association that may exist between a driver context and the ntb
+ * device.
+ */
+void ntb_clear_ctx(device_t ntb);
+
+/*
+ * ntb_mw_count() - Get the number of memory windows available for KPI
+ * consumers.
+ *
+ * (Excludes any MW wholly reserved for register access.)
+ */
+uint8_t ntb_mw_count(device_t ntb);
+
+/*
+ * ntb_mw_get_range() - get the range of a memory window
+ * @ntb: NTB device context
+ * @idx: Memory window number
+ * @base: OUT - the base address for mapping the memory window
+ * @size: OUT - the size for mapping the memory window
+ * @align: OUT - the base alignment for translating the memory window
+ * @align_size: OUT - the size alignment for translating the memory window
+ *
+ * Get the range of a memory window. NULL may be given for any output
+ * parameter if the value is not needed. The base and size may be used for
+ * mapping the memory window, to access the peer memory. The alignment and
+ * size may be used for translating the memory window, for the peer to access
+ * memory on the local system.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+int ntb_mw_get_range(device_t ntb, unsigned mw_idx, vm_paddr_t *base,
+ caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
+ bus_addr_t *plimit);
+
+/*
+ * ntb_mw_set_trans() - set the translation of a memory window
+ * @ntb: NTB device context
+ * @idx: Memory window number
+ * @addr: The dma address local memory to expose to the peer
+ * @size: The size of the local memory to expose to the peer
+ *
+ * Set the translation of a memory window. The peer may access local memory
+ * through the window starting at the address, up to the size. The address
+ * must be aligned to the alignment specified by ntb_mw_get_range(). The size
+ * must be aligned to the size alignment specified by ntb_mw_get_range(). The
+ * address must be below the plimit specified by ntb_mw_get_range() (i.e. for
+ * 32-bit BARs).
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+int ntb_mw_set_trans(device_t ntb, unsigned mw_idx, bus_addr_t addr,
+ size_t size);
+
+/*
+ * ntb_mw_clear_trans() - clear the translation of a memory window
+ * @ntb: NTB device context
+ * @idx: Memory window number
+ *
+ * Clear the translation of a memory window. The peer may no longer access
+ * local memory through the window.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+int ntb_mw_clear_trans(device_t ntb, unsigned mw_idx);
+
+/*
+ * ntb_mw_get_wc - Get the write-combine status of a memory window
+ *
+ * Returns: Zero on success, setting *wc; otherwise an error number (e.g. if
+ * idx is an invalid memory window).
+ *
+ * Mode is a VM_MEMATTR_* type.
+ */
+int ntb_mw_get_wc(device_t ntb, unsigned mw_idx, vm_memattr_t *mode);
+
+/*
+ * ntb_mw_set_wc - Set the write-combine status of a memory window
+ *
+ * If 'mode' matches the current status, this does nothing and succeeds. Mode
+ * is a VM_MEMATTR_* type.
+ *
+ * Returns: Zero on success, setting the caching attribute on the virtual
+ * mapping of the BAR; otherwise an error number (e.g. if idx is an invalid
+ * memory window, or if changing the caching attribute fails).
+ */
+int ntb_mw_set_wc(device_t ntb, unsigned mw_idx, vm_memattr_t mode);
+
+/*
+ * ntb_spad_count() - get the total scratch regs usable
+ * @ntb: pointer to ntb_softc instance
+ *
+ * This function returns the max 32bit scratchpad registers usable by the
+ * upper layer.
+ *
+ * RETURNS: total number of scratch pad registers available
+ */
+uint8_t ntb_spad_count(device_t ntb);
+
+/*
+ * ntb_get_max_spads() - zero local scratch registers
+ * @ntb: pointer to ntb_softc instance
+ *
+ * This functions overwrites all local scratchpad registers with zeroes.
+ */
+void ntb_spad_clear(device_t ntb);
+
+/*
+ * ntb_spad_write() - write to the secondary scratchpad register
+ * @ntb: pointer to ntb_softc instance
+ * @idx: index to the scratchpad register, 0 based
+ * @val: the data value to put into the register
+ *
+ * This function allows writing of a 32bit value to the indexed scratchpad
+ * register. The register resides on the secondary (external) side.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int ntb_spad_write(device_t ntb, unsigned int idx, uint32_t val);
+
+/*
+ * ntb_spad_read() - read from the primary scratchpad register
+ * @ntb: pointer to ntb_softc instance
+ * @idx: index to scratchpad register, 0 based
+ * @val: pointer to 32bit integer for storing the register value
+ *
+ * This function allows reading of the 32bit scratchpad register on
+ * the primary (internal) side.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int ntb_spad_read(device_t ntb, unsigned int idx, uint32_t *val);
+
+/*
+ * ntb_peer_spad_write() - write to the secondary scratchpad register
+ * @ntb: pointer to ntb_softc instance
+ * @idx: index to the scratchpad register, 0 based
+ * @val: the data value to put into the register
+ *
+ * This function allows writing of a 32bit value to the indexed scratchpad
+ * register. The register resides on the secondary (external) side.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int ntb_peer_spad_write(device_t ntb, unsigned int idx, uint32_t val);
+
+/*
+ * ntb_peer_spad_read() - read from the primary scratchpad register
+ * @ntb: pointer to ntb_softc instance
+ * @idx: index to scratchpad register, 0 based
+ * @val: pointer to 32bit integer for storing the register value
+ *
+ * This function allows reading of the 32bit scratchpad register on
+ * the primary (internal) side.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int ntb_peer_spad_read(device_t ntb, unsigned int idx, uint32_t *val);
+
+/*
+ * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
+ * @ntb: NTB device context
+ *
+ * Hardware may support different number or arrangement of doorbell bits.
+ *
+ * Return: A mask of doorbell bits supported by the ntb.
+ */
+uint64_t ntb_db_valid_mask(device_t ntb);
+
+/*
+ * ntb_db_vector_count() - get the number of doorbell interrupt vectors
+ * @ntb: NTB device context.
+ *
+ * Hardware may support different number of interrupt vectors.
+ *
+ * Return: The number of doorbell interrupt vectors.
+ */
+int ntb_db_vector_count(device_t ntb);
+
+/*
+ * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector
+ * @ntb: NTB device context
+ * @vector: Doorbell vector number
+ *
+ * Each interrupt vector may have a different number or arrangement of bits.
+ *
+ * Return: A mask of doorbell bits serviced by a vector.
+ */
+uint64_t ntb_db_vector_mask(device_t ntb, uint32_t vector);
+
+/*
+ * ntb_peer_db_addr() - address and size of the peer doorbell register
+ * @ntb: NTB device context.
+ * @db_addr: OUT - The address of the peer doorbell register.
+ * @db_size: OUT - The number of bytes to write the peer doorbell register.
+ *
+ * Return the address of the peer doorbell register. This may be used, for
+ * example, by drivers that offload memory copy operations to a dma engine.
+ * The drivers may wish to ring the peer doorbell at the completion of memory
+ * copy operations. For efficiency, and to simplify ordering of operations
+ * between the dma memory copies and the ringing doorbell, the driver may
+ * append one additional dma memory copy with the doorbell register as the
+ * destination, after the memory copy operations.
+ *
+ * Return: Zero on success, otherwise an error number.
+ *
+ * Note that writing the peer doorbell via a memory window will *not* generate
+ * an interrupt on the remote host; that must be done separately.
+ */
+int ntb_peer_db_addr(device_t ntb, bus_addr_t *db_addr, vm_size_t *db_size);
+
+/*
+ * ntb_db_clear() - clear bits in the local doorbell register
+ * @ntb: NTB device context.
+ * @db_bits: Doorbell bits to clear.
+ *
+ * Clear bits in the local doorbell register, arming the bits for the next
+ * doorbell.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+void ntb_db_clear(device_t ntb, uint64_t bits);
+
+/*
+ * ntb_db_clear_mask() - clear bits in the local doorbell mask
+ * @ntb: NTB device context.
+ * @db_bits: Doorbell bits to clear.
+ *
+ * Clear bits in the local doorbell mask register, allowing doorbell interrupts
+ * from being generated for those doorbell bits. If a doorbell bit is already
+ * set at the time the mask is cleared, and the corresponding mask bit is
+ * changed from set to clear, then the ntb driver must ensure that
+ * ntb_db_event() is called. If the hardware does not generate the interrupt
+ * on clearing the mask bit, then the driver must call ntb_db_event() anyway.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+void ntb_db_clear_mask(device_t ntb, uint64_t bits);
+
+/*
+ * ntb_db_read() - read the local doorbell register
+ * @ntb: NTB device context.
+ *
+ * Read the local doorbell register, and return the bits that are set.
+ *
+ * Return: The bits currently set in the local doorbell register.
+ */
+uint64_t ntb_db_read(device_t ntb);
+
+/*
+ * ntb_db_set_mask() - set bits in the local doorbell mask
+ * @ntb: NTB device context.
+ * @db_bits: Doorbell mask bits to set.
+ *
+ * Set bits in the local doorbell mask register, preventing doorbell interrupts
+ * from being generated for those doorbell bits. Bits that were already set
+ * must remain set.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+void ntb_db_set_mask(device_t ntb, uint64_t bits);
+
+/*
+ * ntb_peer_db_set() - Set the doorbell on the secondary/external side
+ * @ntb: pointer to ntb_softc instance
+ * @bit: doorbell bits to ring
+ *
+ * This function allows triggering of a doorbell on the secondary/external
+ * side that will initiate an interrupt on the remote host
+ */
+void ntb_peer_db_set(device_t ntb, uint64_t bits);
+
+#endif /* _NTB_H_ */
diff --git a/sys/dev/ntb/ntb_hw/ntb_hw.c b/sys/dev/ntb/ntb_hw/ntb_hw.c
index c1381f0..dac3699 100644
--- a/sys/dev/ntb/ntb_hw/ntb_hw.c
+++ b/sys/dev/ntb/ntb_hw/ntb_hw.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
* Copyright (C) 2013 Intel Corporation
* Copyright (C) 2015 EMC Corporation
* All rights reserved.
@@ -25,6 +26,16 @@
* SUCH DAMAGE.
*/
+/*
+ * The Non-Transparent Bridge (NTB) is a device that allows you to connect
+ * two or more systems using a PCI-e links, providing remote memory access.
+ *
+ * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs.
+ *
+ * NOTE: Much of the code in this module is shared with Linux. Any patches may
+ * be picked up and redistributed in Linux with a dual GPL/BSD license.
+ */
+
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -33,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/endian.h>
+#include <sys/interrupt.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mutex.h>
@@ -50,19 +62,7 @@ __FBSDID("$FreeBSD$");
#include <dev/pci/pcivar.h>
#include "ntb_regs.h"
-#include "ntb_hw.h"
-
-/*
- * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
- * allows you to connect two systems using a PCI-e link.
- *
- * This module contains the hardware abstraction layer for the NTB. It allows
- * you to send and receive interrupts, map the memory windows and send and
- * receive messages in the scratch-pad registers.
- *
- * NOTE: Much of the code in this module is shared with Linux. Any patches may
- * be picked up and redistributed in Linux with a dual GPL/BSD license.
- */
+#include "../ntb.h"
#define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
@@ -70,8 +70,6 @@ __FBSDID("$FreeBSD$");
#define ATOM_LINK_RECOVERY_TIME 500 /* ms */
#define BAR_HIGH_MASK (~((1ull << 12) - 1))
-#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
-
#define NTB_MSIX_VER_GUARD 0xaabbccdd
#define NTB_MSIX_RECEIVED 0xe0f0e0f0
@@ -122,8 +120,8 @@ enum {
};
/* Device features and workarounds */
-#define HAS_FEATURE(feature) \
- ((ntb->features & (feature)) != 0)
+#define HAS_FEATURE(ntb, feature) \
+ (((ntb)->features & (feature)) != 0)
struct ntb_hw_info {
uint32_t device_id;
@@ -202,6 +200,9 @@ struct ntb_msix_data {
};
struct ntb_softc {
+ /* ntb.c context. Do not move! Must go first! */
+ void *ntb_store;
+
device_t device;
enum ntb_device_type type;
uint32_t features;
@@ -220,13 +221,7 @@ struct ntb_softc {
struct callout heartbeat_timer;
struct callout lr_timer;
- void *ntb_ctx;
- const struct ntb_ctx_ops *ctx_ops;
struct ntb_vec *msix_vec;
-#define CTX_LOCK(sc) mtx_lock(&(sc)->ctx_lock)
-#define CTX_UNLOCK(sc) mtx_unlock(&(sc)->ctx_lock)
-#define CTX_ASSERT(sc,f) mtx_assert(&(sc)->ctx_lock, (f))
- struct mtx ctx_lock;
uint32_t ppd;
enum ntb_conn_type conn_type;
@@ -258,6 +253,7 @@ struct ntb_softc {
uint64_t db_valid_mask;
uint64_t db_link_mask;
uint64_t db_mask;
+ uint64_t fake_db_bell; /* NTB_SB01BASE_LOCKUP*/
int last_ts; /* ticks @ last irq */
@@ -287,61 +283,74 @@ bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
}
#endif
-#define ntb_bar_read(SIZE, bar, offset) \
+#define intel_ntb_bar_read(SIZE, bar, offset) \
bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
ntb->bar_info[(bar)].pci_bus_handle, (offset))
-#define ntb_bar_write(SIZE, bar, offset, val) \
+#define intel_ntb_bar_write(SIZE, bar, offset, val) \
bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
-#define ntb_reg_read(SIZE, offset) ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
-#define ntb_reg_write(SIZE, offset, val) \
- ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
-#define ntb_mw_read(SIZE, offset) \
- ntb_bar_read(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), offset)
-#define ntb_mw_write(SIZE, offset, val) \
- ntb_bar_write(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
+#define intel_ntb_reg_read(SIZE, offset) \
+ intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
+#define intel_ntb_reg_write(SIZE, offset, val) \
+ intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
+#define intel_ntb_mw_read(SIZE, offset) \
+ intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
+ offset)
+#define intel_ntb_mw_write(SIZE, offset, val) \
+ intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
offset, val)
-static int ntb_probe(device_t device);
-static int ntb_attach(device_t device);
-static int ntb_detach(device_t device);
-static unsigned ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
-static inline enum ntb_bar ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
+static int intel_ntb_probe(device_t device);
+static int intel_ntb_attach(device_t device);
+static int intel_ntb_detach(device_t device);
+static uint64_t intel_ntb_db_valid_mask(device_t dev);
+static void intel_ntb_spad_clear(device_t dev);
+static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector);
+static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed,
+ enum ntb_width *width);
+static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed,
+ enum ntb_width width);
+static int intel_ntb_link_disable(device_t dev);
+static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val);
+static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val);
+
+static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
+static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
uint32_t *base, uint32_t *xlat, uint32_t *lmt);
-static int ntb_map_pci_bars(struct ntb_softc *ntb);
-static int ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
+static int intel_ntb_map_pci_bars(struct ntb_softc *ntb);
+static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
vm_memattr_t);
static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
const char *);
static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
static int map_memory_window_bar(struct ntb_softc *ntb,
struct ntb_pci_bar_info *bar);
-static void ntb_unmap_pci_bar(struct ntb_softc *ntb);
-static int ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
-static int ntb_init_isr(struct ntb_softc *ntb);
-static int ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
-static int ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
-static void ntb_teardown_interrupts(struct ntb_softc *ntb);
-static inline uint64_t ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
-static void ntb_interrupt(struct ntb_softc *, uint32_t vec);
+static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb);
+static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
+static int intel_ntb_init_isr(struct ntb_softc *ntb);
+static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
+static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
+static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb);
+static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
+static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec);
static void ndev_vec_isr(void *arg);
static void ndev_irq_isr(void *arg);
static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
-static int ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
-static void ntb_free_msix_vec(struct ntb_softc *ntb);
-static void ntb_get_msix_info(struct ntb_softc *ntb);
-static void ntb_exchange_msix(void *);
-static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
-static void ntb_detect_max_mw(struct ntb_softc *ntb);
-static int ntb_detect_xeon(struct ntb_softc *ntb);
-static int ntb_detect_atom(struct ntb_softc *ntb);
-static int ntb_xeon_init_dev(struct ntb_softc *ntb);
-static int ntb_atom_init_dev(struct ntb_softc *ntb);
-static void ntb_teardown_xeon(struct ntb_softc *ntb);
+static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
+static void intel_ntb_free_msix_vec(struct ntb_softc *ntb);
+static void intel_ntb_get_msix_info(struct ntb_softc *ntb);
+static void intel_ntb_exchange_msix(void *);
+static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id);
+static void intel_ntb_detect_max_mw(struct ntb_softc *ntb);
+static int intel_ntb_detect_xeon(struct ntb_softc *ntb);
+static int intel_ntb_detect_atom(struct ntb_softc *ntb);
+static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb);
+static int intel_ntb_atom_init_dev(struct ntb_softc *ntb);
+static void intel_ntb_teardown_xeon(struct ntb_softc *ntb);
static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
enum ntb_bar regbar);
@@ -351,18 +360,16 @@ static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
enum ntb_bar idx);
static int xeon_setup_b2b_mw(struct ntb_softc *,
const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
-static int xeon_setup_msix_bar(struct ntb_softc *);
static inline bool link_is_up(struct ntb_softc *ntb);
static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
static inline bool atom_link_is_err(struct ntb_softc *ntb);
-static inline enum ntb_speed ntb_link_sta_speed(struct ntb_softc *);
-static inline enum ntb_width ntb_link_sta_width(struct ntb_softc *);
+static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *);
+static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *);
static void atom_link_hb(void *arg);
-static void ntb_db_event(struct ntb_softc *ntb, uint32_t vec);
static void recover_atom_link(void *arg);
-static bool ntb_poll_link(struct ntb_softc *ntb);
+static bool intel_ntb_poll_link(struct ntb_softc *ntb);
static void save_bar_parameters(struct ntb_pci_bar_info *bar);
-static void ntb_sysctl_init(struct ntb_softc *);
+static void intel_ntb_sysctl_init(struct ntb_softc *);
static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
@@ -372,7 +379,7 @@ static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
static unsigned g_ntb_hw_debug_level;
SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
&g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
-#define ntb_printf(lvl, ...) do { \
+#define intel_ntb_printf(lvl, ...) do { \
if ((lvl) <= g_ntb_hw_debug_level) { \
device_printf(ntb->device, __VA_ARGS__); \
} \
@@ -395,7 +402,7 @@ SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
"UC-: " __XSTRING(_NTB_PAT_UCM));
static inline vm_memattr_t
-ntb_pat_flags(void)
+intel_ntb_pat_flags(void)
{
switch (g_ntb_mw_pat) {
@@ -421,7 +428,7 @@ ntb_pat_flags(void)
* anywhere better yet.
*/
static inline const char *
-ntb_vm_memattr_to_str(vm_memattr_t pat)
+intel_ntb_vm_memattr_to_str(vm_memattr_t pat)
{
switch (pat) {
@@ -442,7 +449,7 @@ ntb_vm_memattr_to_str(vm_memattr_t pat)
}
}
-static int g_ntb_msix_idx = 0;
+static int g_ntb_msix_idx = 1;
SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
0, "Use this memory window to access the peer MSIX message complex on "
"certain Xeon-based NTB systems, as a workaround for a hardware errata. "
@@ -457,6 +464,18 @@ SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
"available memory window. Both sides of the NTB MUST set the same "
"value here! (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
+/* Hardware owns the low 16 bits of features. */
+#define NTB_BAR_SIZE_4K (1 << 0)
+#define NTB_SDOORBELL_LOCKUP (1 << 1)
+#define NTB_SB01BASE_LOCKUP (1 << 2)
+#define NTB_B2BDOORBELL_BIT14 (1 << 3)
+/* Software/configuration owns the top 16 bits. */
+#define NTB_SPLIT_BAR (1ull << 16)
+
+#define NTB_FEATURES_STR \
+ "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
+ "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"
+
static struct ntb_hw_info pci_ids[] = {
/* XXX: PS/SS IDs left out until they are supported. */
{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
@@ -597,35 +616,15 @@ SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
*/
MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
-static device_method_t ntb_pci_methods[] = {
- /* Device interface */
- DEVMETHOD(device_probe, ntb_probe),
- DEVMETHOD(device_attach, ntb_attach),
- DEVMETHOD(device_detach, ntb_detach),
- DEVMETHOD_END
-};
-
-static driver_t ntb_pci_driver = {
- "ntb_hw",
- ntb_pci_methods,
- sizeof(struct ntb_softc),
-};
-
-static devclass_t ntb_devclass;
-DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL);
-MODULE_VERSION(ntb_hw, 1);
-
-SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
-
/*
* OS <-> Driver linkage functions
*/
static int
-ntb_probe(device_t device)
+intel_ntb_probe(device_t device)
{
struct ntb_hw_info *p;
- p = ntb_get_device_info(pci_get_devid(device));
+ p = intel_ntb_get_device_info(pci_get_devid(device));
if (p == NULL)
return (ENXIO);
@@ -634,14 +633,14 @@ ntb_probe(device_t device)
}
static int
-ntb_attach(device_t device)
+intel_ntb_attach(device_t device)
{
struct ntb_softc *ntb;
struct ntb_hw_info *p;
int error;
- ntb = DEVICE2SOFTC(device);
- p = ntb_get_device_info(pci_get_devid(device));
+ ntb = device_get_softc(device);
+ p = intel_ntb_get_device_info(pci_get_devid(device));
ntb->device = device;
ntb->type = p->type;
@@ -654,47 +653,52 @@ ntb_attach(device_t device)
callout_init(&ntb->lr_timer, 1);
callout_init(&ntb->peer_msix_work, 1);
mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
- mtx_init(&ntb->ctx_lock, "ntb ctx", NULL, MTX_DEF);
if (ntb->type == NTB_ATOM)
- error = ntb_detect_atom(ntb);
+ error = intel_ntb_detect_atom(ntb);
else
- error = ntb_detect_xeon(ntb);
+ error = intel_ntb_detect_xeon(ntb);
if (error != 0)
goto out;
- ntb_detect_max_mw(ntb);
+ intel_ntb_detect_max_mw(ntb);
pci_enable_busmaster(ntb->device);
- error = ntb_map_pci_bars(ntb);
+ error = intel_ntb_map_pci_bars(ntb);
if (error != 0)
goto out;
if (ntb->type == NTB_ATOM)
- error = ntb_atom_init_dev(ntb);
+ error = intel_ntb_atom_init_dev(ntb);
else
- error = ntb_xeon_init_dev(ntb);
+ error = intel_ntb_xeon_init_dev(ntb);
if (error != 0)
goto out;
- ntb_spad_clear(ntb);
+ intel_ntb_spad_clear(device);
+
+ intel_ntb_poll_link(ntb);
- ntb_poll_link(ntb);
+ intel_ntb_sysctl_init(ntb);
- ntb_sysctl_init(ntb);
+ /* Attach children to this controller */
+ error = ntb_register_device(device);
out:
if (error != 0)
- ntb_detach(device);
+ intel_ntb_detach(device);
return (error);
}
static int
-ntb_detach(device_t device)
+intel_ntb_detach(device_t device)
{
struct ntb_softc *ntb;
- ntb = DEVICE2SOFTC(device);
+ ntb = device_get_softc(device);
+
+ /* Detach & delete all children */
+ ntb_unregister_device(device);
if (ntb->self_reg != NULL) {
DB_MASK_LOCK(ntb);
@@ -706,13 +710,12 @@ ntb_detach(device_t device)
callout_drain(&ntb->peer_msix_work);
pci_disable_busmaster(ntb->device);
if (ntb->type == NTB_XEON)
- ntb_teardown_xeon(ntb);
- ntb_teardown_interrupts(ntb);
+ intel_ntb_teardown_xeon(ntb);
+ intel_ntb_teardown_interrupts(ntb);
mtx_destroy(&ntb->db_mask_lock);
- mtx_destroy(&ntb->ctx_lock);
- ntb_unmap_pci_bar(ntb);
+ intel_ntb_unmap_pci_bar(ntb);
return (0);
}
@@ -721,7 +724,7 @@ ntb_detach(device_t device)
* Driver internal routines
*/
static inline enum ntb_bar
-ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
+intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
{
KASSERT(mw < ntb->mw_count,
@@ -736,7 +739,7 @@ bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
{
/* XXX This assertion could be stronger. */
KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
- return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(NTB_SPLIT_BAR));
+ return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR));
}
static inline void
@@ -777,7 +780,7 @@ bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
}
static int
-ntb_map_pci_bars(struct ntb_softc *ntb)
+intel_ntb_map_pci_bars(struct ntb_softc *ntb)
{
int rc;
@@ -802,7 +805,7 @@ ntb_map_pci_bars(struct ntb_softc *ntb)
ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
- if (!HAS_FEATURE(NTB_SPLIT_BAR))
+ if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR))
goto out;
ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
@@ -876,7 +879,7 @@ map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
* but the PCI driver does not honor the size in this call, so we have
* to modify it after the fact.
*/
- if (HAS_FEATURE(NTB_BAR_SIZE_4K)) {
+ if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) {
if (bar->pci_resource_id == PCIR_BAR(2))
bar_size_bits = pci_read_config(ntb->device,
XEON_PBAR23SZ_OFFSET, 1);
@@ -903,7 +906,7 @@ map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
* Optionally, mark MW BARs as anything other than UC to improve
* performance.
*/
- mapmode = ntb_pat_flags();
+ mapmode = intel_ntb_pat_flags();
if (mapmode == bar->map_mode)
return (0);
@@ -916,7 +919,7 @@ map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
(char *)bar->vbase + bar->size - 1,
(void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
- ntb_vm_memattr_to_str(mapmode));
+ intel_ntb_vm_memattr_to_str(mapmode));
} else
device_printf(ntb->device,
"Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
@@ -924,13 +927,13 @@ map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
(char *)bar->vbase + bar->size - 1,
(void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
- ntb_vm_memattr_to_str(mapmode), rc);
+ intel_ntb_vm_memattr_to_str(mapmode), rc);
/* Proceed anyway */
return (0);
}
static void
-ntb_unmap_pci_bar(struct ntb_softc *ntb)
+intel_ntb_unmap_pci_bar(struct ntb_softc *ntb)
{
struct ntb_pci_bar_info *current_bar;
int i;
@@ -945,7 +948,7 @@ ntb_unmap_pci_bar(struct ntb_softc *ntb)
}
static int
-ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
+intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
{
uint32_t i;
int rc;
@@ -998,7 +1001,7 @@ SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
* round-robin fashion.
*/
static int
-ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
+intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
{
u_int *vectors;
uint32_t i;
@@ -1018,7 +1021,7 @@ ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
}
static int
-ntb_init_isr(struct ntb_softc *ntb)
+intel_ntb_init_isr(struct ntb_softc *ntb)
{
uint32_t desired_vectors, num_vectors;
int rc;
@@ -1044,7 +1047,7 @@ ntb_init_isr(struct ntb_softc *ntb)
num_vectors--;
if (rc == 0 && num_vectors < desired_vectors) {
- rc = ntb_remap_msix(ntb->device, desired_vectors,
+ rc = intel_ntb_remap_msix(ntb->device, desired_vectors,
num_vectors);
if (rc == 0)
num_vectors = desired_vectors;
@@ -1057,7 +1060,7 @@ ntb_init_isr(struct ntb_softc *ntb)
num_vectors = 1;
if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
device_printf(ntb->device,
"Errata workaround does not support MSI or INTX\n");
return (EINVAL);
@@ -1065,32 +1068,30 @@ ntb_init_isr(struct ntb_softc *ntb)
ntb->db_vec_count = 1;
ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
- rc = ntb_setup_legacy_interrupt(ntb);
+ rc = intel_ntb_setup_legacy_interrupt(ntb);
} else {
if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
- HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+ HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
device_printf(ntb->device,
"Errata workaround expects %d doorbell bits\n",
XEON_NONLINK_DB_MSIX_BITS);
return (EINVAL);
}
- ntb_create_msix_vec(ntb, num_vectors);
- rc = ntb_setup_msix(ntb, num_vectors);
- if (rc == 0 && HAS_FEATURE(NTB_SB01BASE_LOCKUP))
- ntb_get_msix_info(ntb);
+ intel_ntb_create_msix_vec(ntb, num_vectors);
+ rc = intel_ntb_setup_msix(ntb, num_vectors);
}
if (rc != 0) {
device_printf(ntb->device,
"Error allocating interrupts: %d\n", rc);
- ntb_free_msix_vec(ntb);
+ intel_ntb_free_msix_vec(ntb);
}
return (rc);
}
static int
-ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
+intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
{
int rc;
@@ -1117,7 +1118,7 @@ ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
}
static void
-ntb_teardown_interrupts(struct ntb_softc *ntb)
+intel_ntb_teardown_interrupts(struct ntb_softc *ntb)
{
struct ntb_int_info *current_int;
int i;
@@ -1133,7 +1134,7 @@ ntb_teardown_interrupts(struct ntb_softc *ntb)
rman_get_rid(current_int->res), current_int->res);
}
- ntb_free_msix_vec(ntb);
+ intel_ntb_free_msix_vec(ntb);
pci_release_msi(ntb->device);
}
@@ -1146,11 +1147,11 @@ db_ioread(struct ntb_softc *ntb, uint64_t regoff)
{
if (ntb->type == NTB_ATOM)
- return (ntb_reg_read(8, regoff));
+ return (intel_ntb_reg_read(8, regoff));
KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
- return (ntb_reg_read(2, regoff));
+ return (intel_ntb_reg_read(2, regoff));
}
static inline void
@@ -1172,89 +1173,78 @@ db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
{
if (ntb->type == NTB_ATOM) {
- ntb_reg_write(8, regoff, val);
+ intel_ntb_reg_write(8, regoff, val);
return;
}
KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
- ntb_reg_write(2, regoff, (uint16_t)val);
+ intel_ntb_reg_write(2, regoff, (uint16_t)val);
}
-void
-ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits)
+static void
+intel_ntb_db_set_mask(device_t dev, uint64_t bits)
{
-
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
- return;
+ struct ntb_softc *ntb = device_get_softc(dev);
DB_MASK_LOCK(ntb);
ntb->db_mask |= bits;
- db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+ if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
+ db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
DB_MASK_UNLOCK(ntb);
}
-void
-ntb_db_clear_mask(struct ntb_softc *ntb, uint64_t bits)
+static void
+intel_ntb_db_clear_mask(device_t dev, uint64_t bits)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
+ uint64_t ibits;
+ int i;
KASSERT((bits & ~ntb->db_valid_mask) == 0,
("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
(uintmax_t)(bits & ~ntb->db_valid_mask),
(uintmax_t)ntb->db_valid_mask));
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
- return;
-
DB_MASK_LOCK(ntb);
+ ibits = ntb->fake_db_bell & ntb->db_mask & bits;
ntb->db_mask &= ~bits;
- db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+ /* Simulate fake interrupts if unmasked DB bits are set. */
+ for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+ if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0)
+ swi_sched(ntb->int_info[i].tag, 0);
+ }
+ } else {
+ db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+ }
DB_MASK_UNLOCK(ntb);
}
-uint64_t
-ntb_db_read(struct ntb_softc *ntb)
+static uint64_t
+intel_ntb_db_read(device_t dev)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
- uint64_t res;
- unsigned i;
-
- res = 0;
- for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
- if (ntb->msix_vec[i].masked != 0)
- res |= ntb_db_vector_mask(ntb, i);
- }
- return (res);
- }
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
+ return (ntb->fake_db_bell);
return (db_ioread(ntb, ntb->self_reg->db_bell));
}
-void
-ntb_db_clear(struct ntb_softc *ntb, uint64_t bits)
+static void
+intel_ntb_db_clear(device_t dev, uint64_t bits)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
KASSERT((bits & ~ntb->db_valid_mask) == 0,
("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
(uintmax_t)(bits & ~ntb->db_valid_mask),
(uintmax_t)ntb->db_valid_mask));
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
- unsigned i;
-
- for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
- if ((bits & ntb_db_vector_mask(ntb, i)) != 0) {
- DB_MASK_LOCK(ntb);
- if (ntb->msix_vec[i].masked != 0) {
- /* XXX These need a public API. */
-#if 0
- pci_unmask_msix(ntb->device, i);
-#endif
- ntb->msix_vec[i].masked = 0;
- }
- DB_MASK_UNLOCK(ntb);
- }
- }
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+ DB_MASK_LOCK(ntb);
+ ntb->fake_db_bell &= ~bits;
+ DB_MASK_UNLOCK(ntb);
return;
}
@@ -1262,43 +1252,59 @@ ntb_db_clear(struct ntb_softc *ntb, uint64_t bits)
}
static inline uint64_t
-ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
+intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
{
uint64_t shift, mask;
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+ /*
+ * Remap vectors in custom way to make at least first
+ * three doorbells to not generate stray events.
+ * This breaks Linux compatibility (if one existed)
+ * when more then one DB is used (not by if_ntb).
+ */
+ if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1)
+ return (1 << db_vector);
+ if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1)
+ return (0x7ffc);
+ }
+
shift = ntb->db_vec_shift;
mask = (1ull << shift) - 1;
return (mask << (shift * db_vector));
}
static void
-ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
+intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
{
uint64_t vec_mask;
ntb->last_ts = ticks;
- vec_mask = ntb_vec_mask(ntb, vec);
+ vec_mask = intel_ntb_vec_mask(ntb, vec);
if ((vec_mask & ntb->db_link_mask) != 0) {
- if (ntb_poll_link(ntb))
- ntb_link_event(ntb);
+ if (intel_ntb_poll_link(ntb))
+ ntb_link_event(ntb->device);
}
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP) &&
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
(vec_mask & ntb->db_link_mask) == 0) {
DB_MASK_LOCK(ntb);
- if (ntb->msix_vec[vec].masked == 0) {
- /* XXX These need a public API. */
-#if 0
- pci_mask_msix(ntb->device, vec);
-#endif
- ntb->msix_vec[vec].masked = 1;
- }
+
+ /* Do not report same DB events again if not cleared yet. */
+ vec_mask &= ~ntb->fake_db_bell;
+
+ /* Update our internal doorbell register. */
+ ntb->fake_db_bell |= vec_mask;
+
+ /* Do not report masked DB events. */
+ vec_mask &= ~ntb->db_mask;
+
DB_MASK_UNLOCK(ntb);
}
if ((vec_mask & ntb->db_valid_mask) != 0)
- ntb_db_event(ntb, vec);
+ ntb_db_event(ntb->device, vec);
}
static void
@@ -1306,18 +1312,18 @@ ndev_vec_isr(void *arg)
{
struct ntb_vec *nvec = arg;
- ntb_interrupt(nvec->ntb, nvec->num);
+ intel_ntb_interrupt(nvec->ntb, nvec->num);
}
static void
ndev_irq_isr(void *arg)
{
/* If we couldn't set up MSI-X, we only have the one vector. */
- ntb_interrupt(arg, 0);
+ intel_ntb_interrupt(arg, 0);
}
static int
-ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
+intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
{
uint32_t i;
@@ -1332,7 +1338,7 @@ ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
}
static void
-ntb_free_msix_vec(struct ntb_softc *ntb)
+intel_ntb_free_msix_vec(struct ntb_softc *ntb)
{
if (ntb->msix_vec == NULL)
@@ -1343,7 +1349,7 @@ ntb_free_msix_vec(struct ntb_softc *ntb)
}
static void
-ntb_get_msix_info(struct ntb_softc *ntb)
+intel_ntb_get_msix_info(struct ntb_softc *ntb)
{
struct pci_devinfo *dinfo;
struct pcicfg_msix *msix;
@@ -1352,8 +1358,6 @@ ntb_get_msix_info(struct ntb_softc *ntb)
dinfo = device_get_ivars(ntb->device);
msix = &dinfo->cfg.msix;
- laddr = data = 0;
-
CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));
for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
@@ -1361,7 +1365,7 @@ ntb_get_msix_info(struct ntb_softc *ntb)
laddr = bus_read_4(msix->msix_table_res, offset +
PCI_MSIX_ENTRY_LOWER_ADDR);
- ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);
+ intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);
KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
@@ -1370,14 +1374,14 @@ ntb_get_msix_info(struct ntb_softc *ntb)
data = bus_read_4(msix->msix_table_res, offset +
PCI_MSIX_ENTRY_DATA);
- ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
+ intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
ntb->msix_data[i].nmd_data = data;
}
}
static struct ntb_hw_info *
-ntb_get_device_info(uint32_t device_id)
+intel_ntb_get_device_info(uint32_t device_id)
{
struct ntb_hw_info *ep = pci_ids;
@@ -1390,15 +1394,15 @@ ntb_get_device_info(uint32_t device_id)
}
static void
-ntb_teardown_xeon(struct ntb_softc *ntb)
+intel_ntb_teardown_xeon(struct ntb_softc *ntb)
{
if (ntb->reg != NULL)
- ntb_link_disable(ntb);
+ intel_ntb_link_disable(ntb->device);
}
static void
-ntb_detect_max_mw(struct ntb_softc *ntb)
+intel_ntb_detect_max_mw(struct ntb_softc *ntb)
{
if (ntb->type == NTB_ATOM) {
@@ -1406,14 +1410,14 @@ ntb_detect_max_mw(struct ntb_softc *ntb)
return;
}
- if (HAS_FEATURE(NTB_SPLIT_BAR))
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
else
ntb->mw_count = XEON_SNB_MW_COUNT;
}
static int
-ntb_detect_xeon(struct ntb_softc *ntb)
+intel_ntb_detect_xeon(struct ntb_softc *ntb)
{
uint8_t ppd, conn_type;
@@ -1428,11 +1432,21 @@ ntb_detect_xeon(struct ntb_softc *ntb)
if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
ntb->features |= NTB_SPLIT_BAR;
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
+ !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
+ device_printf(ntb->device,
+ "Can not apply SB01BASE_LOCKUP workaround "
+ "with split BARs disabled!\n");
+ device_printf(ntb->device,
+ "Expect system hangs under heavy NTB traffic!\n");
+ ntb->features &= ~NTB_SB01BASE_LOCKUP;
+ }
+
/*
* SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
* errata workaround; only do one at a time.
*/
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
ntb->features &= ~NTB_SDOORBELL_LOCKUP;
conn_type = ppd & XEON_PPD_CONN_TYPE;
@@ -1451,7 +1465,7 @@ ntb_detect_xeon(struct ntb_softc *ntb)
}
static int
-ntb_detect_atom(struct ntb_softc *ntb)
+intel_ntb_detect_atom(struct ntb_softc *ntb)
{
uint32_t ppd, conn_type;
@@ -1476,7 +1490,7 @@ ntb_detect_atom(struct ntb_softc *ntb)
}
static int
-ntb_xeon_init_dev(struct ntb_softc *ntb)
+intel_ntb_xeon_init_dev(struct ntb_softc *ntb)
{
int rc;
@@ -1497,15 +1511,16 @@ ntb_xeon_init_dev(struct ntb_softc *ntb)
ntb->peer_reg = &xeon_b2b_reg;
ntb->xlat_reg = &xeon_sec_xlat;
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+ ntb->fake_db_bell = 0;
ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
ntb->mw_count;
- ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
+ intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
g_ntb_msix_idx, ntb->msix_mw_idx);
- rc = ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
+ rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
VM_MEMATTR_UNCACHEABLE);
KASSERT(rc == 0, ("shouldn't fail"));
- } else if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
+ } else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
/*
* There is a Xeon hardware errata related to writes to SDOORBELL or
* B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
@@ -1515,12 +1530,12 @@ ntb_xeon_init_dev(struct ntb_softc *ntb)
*/
ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
ntb->mw_count;
- ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
+ intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
g_ntb_mw_idx, ntb->b2b_mw_idx);
- rc = ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
+ rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
VM_MEMATTR_UNCACHEABLE);
KASSERT(rc == 0, ("shouldn't fail"));
- } else if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14))
+ } else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14))
/*
* HW Errata on bit 14 of b2bdoorbell register. Writes will not be
* mirrored to the remote system. Shrink the number of bits by one,
@@ -1543,7 +1558,7 @@ ntb_xeon_init_dev(struct ntb_softc *ntb)
return (rc);
/* Enable Bus Master and Memory Space on the secondary side */
- ntb_reg_write(2, XEON_SPCICMD_OFFSET,
+ intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET,
PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
/*
@@ -1554,16 +1569,12 @@ ntb_xeon_init_dev(struct ntb_softc *ntb)
db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
DB_MASK_UNLOCK(ntb);
- rc = xeon_setup_msix_bar(ntb);
- if (rc != 0)
- return (rc);
-
- rc = ntb_init_isr(ntb);
+ rc = intel_ntb_init_isr(ntb);
return (rc);
}
static int
-ntb_atom_init_dev(struct ntb_softc *ntb)
+intel_ntb_atom_init_dev(struct ntb_softc *ntb)
{
int error;
@@ -1590,15 +1601,15 @@ ntb_atom_init_dev(struct ntb_softc *ntb)
configure_atom_secondary_side_bars(ntb);
/* Enable Bus Master and Memory Space on the secondary side */
- ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
+ intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
- error = ntb_init_isr(ntb);
+ error = intel_ntb_init_isr(ntb);
if (error != 0)
return (error);
/* Initiate PCI-E link training */
- ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+ intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
@@ -1611,19 +1622,19 @@ configure_atom_secondary_side_bars(struct ntb_softc *ntb)
{
if (ntb->dev_type == NTB_DEV_USD) {
- ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
+ intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
XEON_B2B_BAR2_ADDR64);
- ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
+ intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
XEON_B2B_BAR4_ADDR64);
- ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
- ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
+ intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
+ intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
} else {
- ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
+ intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
XEON_B2B_BAR2_ADDR64);
- ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
+ intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
XEON_B2B_BAR4_ADDR64);
- ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
- ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
+ intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
+ intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
}
}
@@ -1649,7 +1660,7 @@ xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
struct ntb_pci_bar_info *bar;
uint8_t bar_sz;
- if (!HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
+ if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
return;
bar = &ntb->bar_info[idx];
@@ -1673,28 +1684,28 @@ xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
uint32_t base_reg, lmt_reg;
bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
- if (idx == regbar)
- bar_addr += ntb->b2b_off;
+ if (idx == regbar) {
+ if (ntb->b2b_off)
+ bar_addr += ntb->b2b_off;
+ else
+ bar_addr = 0;
+ }
- /*
- * Set limit registers first to avoid an errata where setting the base
- * registers locks the limit registers.
- */
if (!bar_is_64bit(ntb, idx)) {
- ntb_reg_write(4, lmt_reg, bar_addr);
- reg_val = ntb_reg_read(4, lmt_reg);
+ intel_ntb_reg_write(4, base_reg, bar_addr);
+ reg_val = intel_ntb_reg_read(4, base_reg);
(void)reg_val;
- ntb_reg_write(4, base_reg, bar_addr);
- reg_val = ntb_reg_read(4, base_reg);
+ intel_ntb_reg_write(4, lmt_reg, bar_addr);
+ reg_val = intel_ntb_reg_read(4, lmt_reg);
(void)reg_val;
} else {
- ntb_reg_write(8, lmt_reg, bar_addr);
- reg_val = ntb_reg_read(8, lmt_reg);
+ intel_ntb_reg_write(8, base_reg, bar_addr);
+ reg_val = intel_ntb_reg_read(8, base_reg);
(void)reg_val;
- ntb_reg_write(8, base_reg, bar_addr);
- reg_val = ntb_reg_read(8, base_reg);
+ intel_ntb_reg_write(8, lmt_reg, bar_addr);
+ reg_val = intel_ntb_reg_read(8, lmt_reg);
(void)reg_val;
}
}
@@ -1705,30 +1716,17 @@ xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
struct ntb_pci_bar_info *bar;
bar = &ntb->bar_info[idx];
- if (HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
- ntb_reg_write(4, bar->pbarxlat_off, base_addr);
- base_addr = ntb_reg_read(4, bar->pbarxlat_off);
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
+ intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr);
+ base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off);
} else {
- ntb_reg_write(8, bar->pbarxlat_off, base_addr);
- base_addr = ntb_reg_read(8, bar->pbarxlat_off);
+ intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr);
+ base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off);
}
(void)base_addr;
}
static int
-xeon_setup_msix_bar(struct ntb_softc *ntb)
-{
- enum ntb_bar bar_num;
-
- if (!HAS_FEATURE(NTB_SB01BASE_LOCKUP))
- return (0);
-
- bar_num = ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
- ntb->peer_lapic_bar = &ntb->bar_info[bar_num];
- return (0);
-}
-
-static int
xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
const struct ntb_b2b_addr *peer_addr)
{
@@ -1742,7 +1740,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
b2b_bar_num = NTB_CONFIG_BAR;
ntb->b2b_off = 0;
} else {
- b2b_bar_num = ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
+ b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
("invalid b2b mw bar"));
@@ -1773,7 +1771,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
bar_addr = addr->bar0_addr;
else if (b2b_bar_num == NTB_B2B_BAR_1)
bar_addr = addr->bar2_addr64;
- else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
+ else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
bar_addr = addr->bar4_addr64;
else if (b2b_bar_num == NTB_B2B_BAR_2)
bar_addr = addr->bar4_addr32;
@@ -1782,7 +1780,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
else
KASSERT(false, ("invalid bar"));
- ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
+ intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
/*
* Other SBARs are normally hit by the PBAR xlat, except for the b2b
@@ -1793,7 +1791,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
*/
xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
b2b_bar_num);
- if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
NTB_B2B_BAR_2, b2b_bar_num);
xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
@@ -1803,56 +1801,41 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
NTB_B2B_BAR_2, b2b_bar_num);
/* Zero incoming translation addrs */
- ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
- ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
-
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
- size_t size, xlatoffset;
+ intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
+ intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
- switch (ntb_mw_to_bar(ntb, ntb->msix_mw_idx)) {
- case NTB_B2B_BAR_1:
- size = 8;
- xlatoffset = XEON_SBAR2XLAT_OFFSET;
- break;
- case NTB_B2B_BAR_2:
- xlatoffset = XEON_SBAR4XLAT_OFFSET;
- if (HAS_FEATURE(NTB_SPLIT_BAR))
- size = 4;
- else
- size = 8;
- break;
- case NTB_B2B_BAR_3:
- xlatoffset = XEON_SBAR5XLAT_OFFSET;
- size = 4;
- break;
- default:
- KASSERT(false, ("Bogus msix mw idx: %u",
- ntb->msix_mw_idx));
- return (EINVAL);
- }
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+ uint32_t xlat_reg, lmt_reg;
+ enum ntb_bar bar_num;
/*
* We point the chosen MSIX MW BAR xlat to remote LAPIC for
* workaround
*/
- if (size == 4) {
- ntb_reg_write(4, xlatoffset, MSI_INTEL_ADDR_BASE);
- ntb->msix_xlat = ntb_reg_read(4, xlatoffset);
+ bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
+ bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg);
+ if (bar_is_64bit(ntb, bar_num)) {
+ intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE);
+ ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg);
+ intel_ntb_reg_write(8, lmt_reg, 0);
} else {
- ntb_reg_write(8, xlatoffset, MSI_INTEL_ADDR_BASE);
- ntb->msix_xlat = ntb_reg_read(8, xlatoffset);
+ intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE);
+ ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg);
+ intel_ntb_reg_write(4, lmt_reg, 0);
}
+
+ ntb->peer_lapic_bar = &ntb->bar_info[bar_num];
}
- (void)ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
- (void)ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
+ (void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
+ (void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
/* Zero outgoing translation limits (whole bar size windows) */
- ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
- ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
+ intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
+ intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
/* Set outgoing translation offsets */
xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
- if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
} else
@@ -1864,7 +1847,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
bar_addr = peer_addr->bar0_addr;
else if (b2b_bar_num == NTB_B2B_BAR_1)
bar_addr = peer_addr->bar2_addr64;
- else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
+ else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
bar_addr = peer_addr->bar4_addr64;
else if (b2b_bar_num == NTB_B2B_BAR_2)
bar_addr = peer_addr->bar4_addr32;
@@ -1877,8 +1860,8 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
* B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
* at a time.
*/
- ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
- ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
+ intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
+ intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
return (0);
}
@@ -1897,7 +1880,7 @@ link_is_up(struct ntb_softc *ntb)
if (ntb->type == NTB_XEON)
return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
- !HAS_FEATURE(NTB_SB01BASE_LOCKUP)));
+ !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)));
KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
@@ -1910,11 +1893,11 @@ atom_link_is_err(struct ntb_softc *ntb)
KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
- status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
+ status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
return (true);
- status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
+ status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
}
@@ -1937,8 +1920,8 @@ atom_link_hb(void *arg)
goto out;
}
- if (ntb_poll_link(ntb))
- ntb_link_event(ntb);
+ if (intel_ntb_poll_link(ntb))
+ ntb_link_event(ntb->device);
if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
/* Link is down with error, proceed with recovery */
@@ -1956,166 +1939,47 @@ atom_perform_link_restart(struct ntb_softc *ntb)
uint32_t status;
/* Driver resets the NTB ModPhy lanes - magic! */
- ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
- ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
- ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
- ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
+ intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
+ intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
+ intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
+ intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
/* Driver waits 100ms to allow the NTB ModPhy to settle */
pause("ModPhy", hz / 10);
/* Clear AER Errors, write to clear */
- status = ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
+ status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
status &= PCIM_AER_COR_REPLAY_ROLLOVER;
- ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
+ intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
/* Clear unexpected electrical idle event in LTSSM, write to clear */
- status = ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
+ status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
- ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
+ intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
/* Clear DeSkew Buffer error, write to clear */
- status = ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
+ status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
status |= ATOM_DESKEWSTS_DBERR;
- ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
+ intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
- status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
+ status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
status &= ATOM_IBIST_ERR_OFLOW;
- ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
+ intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
/* Releases the NTB state machine to allow the link to retrain */
- status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
+ status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
- ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
-}
-
-/*
- * ntb_set_ctx() - associate a driver context with an ntb device
- * @ntb: NTB device context
- * @ctx: Driver context
- * @ctx_ops: Driver context operations
- *
- * Associate a driver context and operations with a ntb device. The context is
- * provided by the client driver, and the driver may associate a different
- * context with each ntb device.
- *
- * Return: Zero if the context is associated, otherwise an error number.
- */
-int
-ntb_set_ctx(struct ntb_softc *ntb, void *ctx, const struct ntb_ctx_ops *ops)
-{
-
- if (ctx == NULL || ops == NULL)
- return (EINVAL);
- if (ntb->ctx_ops != NULL)
- return (EINVAL);
-
- CTX_LOCK(ntb);
- if (ntb->ctx_ops != NULL) {
- CTX_UNLOCK(ntb);
- return (EINVAL);
- }
- ntb->ntb_ctx = ctx;
- ntb->ctx_ops = ops;
- CTX_UNLOCK(ntb);
-
- return (0);
-}
-
-/*
- * It is expected that this will only be used from contexts where the ctx_lock
- * is not needed to protect ntb_ctx lifetime.
- */
-void *
-ntb_get_ctx(struct ntb_softc *ntb, const struct ntb_ctx_ops **ops)
-{
-
- KASSERT(ntb->ntb_ctx != NULL && ntb->ctx_ops != NULL, ("bogus"));
- if (ops != NULL)
- *ops = ntb->ctx_ops;
- return (ntb->ntb_ctx);
-}
-
-/*
- * ntb_clear_ctx() - disassociate any driver context from an ntb device
- * @ntb: NTB device context
- *
- * Clear any association that may exist between a driver context and the ntb
- * device.
- */
-void
-ntb_clear_ctx(struct ntb_softc *ntb)
-{
-
- CTX_LOCK(ntb);
- ntb->ntb_ctx = NULL;
- ntb->ctx_ops = NULL;
- CTX_UNLOCK(ntb);
-}
-
-/*
- * ntb_link_event() - notify driver context of a change in link status
- * @ntb: NTB device context
- *
- * Notify the driver context that the link status may have changed. The driver
- * should call ntb_link_is_up() to get the current status.
- */
-void
-ntb_link_event(struct ntb_softc *ntb)
-{
-
- CTX_LOCK(ntb);
- if (ntb->ctx_ops != NULL && ntb->ctx_ops->link_event != NULL)
- ntb->ctx_ops->link_event(ntb->ntb_ctx);
- CTX_UNLOCK(ntb);
+ intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
}
-/*
- * ntb_db_event() - notify driver context of a doorbell event
- * @ntb: NTB device context
- * @vector: Interrupt vector number
- *
- * Notify the driver context of a doorbell event. If hardware supports
- * multiple interrupt vectors for doorbells, the vector number indicates which
- * vector received the interrupt. The vector number is relative to the first
- * vector used for doorbells, starting at zero, and must be less than
- * ntb_db_vector_count(). The driver may call ntb_db_read() to check which
- * doorbell bits need service, and ntb_db_vector_mask() to determine which of
- * those bits are associated with the vector number.
- */
-static void
-ntb_db_event(struct ntb_softc *ntb, uint32_t vec)
-{
-
- CTX_LOCK(ntb);
- if (ntb->ctx_ops != NULL && ntb->ctx_ops->db_event != NULL)
- ntb->ctx_ops->db_event(ntb->ntb_ctx, vec);
- CTX_UNLOCK(ntb);
-}
-
-/*
- * ntb_link_enable() - enable the link on the secondary side of the ntb
- * @ntb: NTB device context
- * @max_speed: The maximum link speed expressed as PCIe generation number[0]
- * @max_width: The maximum link width expressed as the number of PCIe lanes[0]
- *
- * Enable the link on the secondary side of the ntb. This can only be done
- * from the primary side of the ntb in primary or b2b topology. The ntb device
- * should train the link to its maximum speed and width, or the requested speed
- * and width, whichever is smaller, if supported.
- *
- * Return: Zero on success, otherwise an error number.
- *
- * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed
- * and width input will be ignored.
- */
-int
-ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused,
- enum ntb_width w __unused)
+static int
+intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused,
+ enum ntb_width width __unused)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
uint32_t cntl;
- ntb_printf(2, "%s\n", __func__);
+ intel_ntb_printf(2, "%s\n", __func__);
if (ntb->type == NTB_ATOM) {
pci_write_config(ntb->device, NTB_PPD_OFFSET,
@@ -2124,57 +1988,47 @@ ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused,
}
if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
- ntb_link_event(ntb);
+ ntb_link_event(dev);
return (0);
}
- cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
+ cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
- if (HAS_FEATURE(NTB_SPLIT_BAR))
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
- ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
+ intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
return (0);
}
-/*
- * ntb_link_disable() - disable the link on the secondary side of the ntb
- * @ntb: NTB device context
- *
- * Disable the link on the secondary side of the ntb. This can only be done
- * from the primary side of the ntb in primary or b2b topology. The ntb device
- * should disable the link. Returning from this call must indicate that a
- * barrier has passed, though with no more writes may pass in either direction
- * across the link, except if this call returns an error number.
- *
- * Return: Zero on success, otherwise an error number.
- */
-int
-ntb_link_disable(struct ntb_softc *ntb)
+static int
+intel_ntb_link_disable(device_t dev)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
uint32_t cntl;
- ntb_printf(2, "%s\n", __func__);
+ intel_ntb_printf(2, "%s\n", __func__);
if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
- ntb_link_event(ntb);
+ ntb_link_event(dev);
return (0);
}
- cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
+ cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
- if (HAS_FEATURE(NTB_SPLIT_BAR))
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
- ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
+ intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
return (0);
}
-bool
-ntb_link_enabled(struct ntb_softc *ntb)
+static bool
+intel_ntb_link_enabled(device_t dev)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
uint32_t cntl;
if (ntb->type == NTB_ATOM) {
@@ -2185,7 +2039,7 @@ ntb_link_enabled(struct ntb_softc *ntb)
if (ntb->conn_type == NTB_CONN_TRANSPARENT)
return (true);
- cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
+ cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
}
@@ -2210,11 +2064,11 @@ recover_atom_link(void *arg)
if (atom_link_is_err(ntb))
goto retry;
- status32 = ntb_reg_read(4, ntb->reg->ntb_ctl);
+ status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
goto out;
- status32 = ntb_reg_read(4, ntb->reg->lnk_sta);
+ status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
width = NTB_LNK_STA_WIDTH(status32);
speed = status32 & NTB_LINK_SPEED_MASK;
@@ -2237,18 +2091,18 @@ retry:
* Polls the HW link status register(s); returns true if something has changed.
*/
static bool
-ntb_poll_link(struct ntb_softc *ntb)
+intel_ntb_poll_link(struct ntb_softc *ntb)
{
uint32_t ntb_cntl;
uint16_t reg_val;
if (ntb->type == NTB_ATOM) {
- ntb_cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
+ ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
if (ntb_cntl == ntb->ntb_ctl)
return (false);
ntb->ntb_ctl = ntb_cntl;
- ntb->lnk_sta = ntb_reg_read(4, ntb->reg->lnk_sta);
+ ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
} else {
db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
@@ -2258,11 +2112,11 @@ ntb_poll_link(struct ntb_softc *ntb)
ntb->lnk_sta = reg_val;
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
if (_xeon_link_is_up(ntb)) {
if (!ntb->peer_msix_good) {
callout_reset(&ntb->peer_msix_work, 0,
- ntb_exchange_msix, ntb);
+ intel_ntb_exchange_msix, ntb);
return (false);
}
} else {
@@ -2275,7 +2129,7 @@ ntb_poll_link(struct ntb_softc *ntb)
}
static inline enum ntb_speed
-ntb_link_sta_speed(struct ntb_softc *ntb)
+intel_ntb_link_sta_speed(struct ntb_softc *ntb)
{
if (!link_is_up(ntb))
@@ -2284,7 +2138,7 @@ ntb_link_sta_speed(struct ntb_softc *ntb)
}
static inline enum ntb_width
-ntb_link_sta_width(struct ntb_softc *ntb)
+intel_ntb_link_sta_width(struct ntb_softc *ntb)
{
if (!link_is_up(ntb))
@@ -2306,7 +2160,7 @@ SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0,
#define NTB_REGFLAGS_MASK (NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
static void
-ntb_sysctl_init(struct ntb_softc *ntb)
+intel_ntb_sysctl_init(struct ntb_softc *ntb)
{
struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
struct sysctl_ctx_list *ctx;
@@ -2405,7 +2259,7 @@ ntb_sysctl_init(struct ntb_softc *ntb)
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
sysctl_handle_register, "QU", "Incoming XLAT23 register");
- if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
@@ -2425,7 +2279,7 @@ ntb_sysctl_init(struct ntb_softc *ntb)
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_64 | ntb->xlat_reg->bar2_limit,
sysctl_handle_register, "QU", "Incoming LMT23 register");
- if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_32 | ntb->xlat_reg->bar4_limit,
@@ -2516,7 +2370,7 @@ ntb_sysctl_init(struct ntb_softc *ntb)
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
sysctl_handle_register, "QU", "Outgoing XLAT23 register");
- if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
@@ -2536,7 +2390,7 @@ ntb_sysctl_init(struct ntb_softc *ntb)
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
sysctl_handle_register, "QU", "Outgoing LMT23 register");
- if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
@@ -2560,7 +2414,7 @@ ntb_sysctl_init(struct ntb_softc *ntb)
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_64 | ntb->xlat_reg->bar2_base,
sysctl_handle_register, "QU", "Secondary BAR23 base register");
- if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_32 | ntb->xlat_reg->bar4_base,
@@ -2583,13 +2437,10 @@ ntb_sysctl_init(struct ntb_softc *ntb)
static int
sysctl_handle_features(SYSCTL_HANDLER_ARGS)
{
- struct ntb_softc *ntb;
+ struct ntb_softc *ntb = arg1;
struct sbuf sb;
int error;
- error = 0;
- ntb = arg1;
-
sbuf_new_for_sysctl(&sb, NULL, 256, req);
sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
@@ -2604,14 +2455,11 @@ sysctl_handle_features(SYSCTL_HANDLER_ARGS)
static int
sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
{
- struct ntb_softc *ntb;
+ struct ntb_softc *ntb = arg1;
unsigned old, new;
int error;
- error = 0;
- ntb = arg1;
-
- old = ntb_link_enabled(ntb);
+ old = intel_ntb_link_enabled(ntb->device);
error = SYSCTL_OUT(req, &old, sizeof(old));
if (error != 0 || req->newptr == NULL)
@@ -2621,31 +2469,28 @@ sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
if (error != 0)
return (error);
- ntb_printf(0, "Admin set interface state to '%sabled'\n",
+ intel_ntb_printf(0, "Admin set interface state to '%sabled'\n",
(new != 0)? "en" : "dis");
if (new != 0)
- error = ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+ error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
else
- error = ntb_link_disable(ntb);
+ error = intel_ntb_link_disable(ntb->device);
return (error);
}
static int
sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
{
- struct ntb_softc *ntb;
+ struct ntb_softc *ntb = arg1;
struct sbuf sb;
enum ntb_speed speed;
enum ntb_width width;
int error;
- error = 0;
- ntb = arg1;
-
sbuf_new_for_sysctl(&sb, NULL, 32, req);
- if (ntb_link_is_up(ntb, &speed, &width))
+ if (intel_ntb_link_is_up(ntb->device, &speed, &width))
sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
(unsigned)speed, (unsigned)width);
else
@@ -2662,14 +2507,11 @@ sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
static int
sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
{
- struct ntb_softc *ntb;
+ struct ntb_softc *ntb = arg1;
unsigned res;
int error;
- error = 0;
- ntb = arg1;
-
- res = ntb_link_is_up(ntb, NULL, NULL);
+ res = intel_ntb_link_is_up(ntb->device, NULL, NULL);
error = SYSCTL_OUT(req, &res, sizeof(res));
if (error || !req->newptr)
@@ -2708,28 +2550,28 @@ sysctl_handle_register(SYSCTL_HANDLER_ARGS)
if (pci)
umv = pci_read_config(ntb->device, reg, 8);
else
- umv = ntb_reg_read(8, reg);
+ umv = intel_ntb_reg_read(8, reg);
outsz = sizeof(uint64_t);
break;
case NTB_REG_32:
if (pci)
umv = pci_read_config(ntb->device, reg, 4);
else
- umv = ntb_reg_read(4, reg);
+ umv = intel_ntb_reg_read(4, reg);
outsz = sizeof(uint32_t);
break;
case NTB_REG_16:
if (pci)
umv = pci_read_config(ntb->device, reg, 2);
else
- umv = ntb_reg_read(2, reg);
+ umv = intel_ntb_reg_read(2, reg);
outsz = sizeof(uint16_t);
break;
case NTB_REG_8:
if (pci)
umv = pci_read_config(ntb->device, reg, 1);
else
- umv = ntb_reg_read(1, reg);
+ umv = intel_ntb_reg_read(1, reg);
outsz = sizeof(uint8_t);
break;
default:
@@ -2749,7 +2591,7 @@ sysctl_handle_register(SYSCTL_HANDLER_ARGS)
}
static unsigned
-ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
+intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
{
if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
@@ -2763,8 +2605,21 @@ ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
return (uidx);
}
+#ifndef EARLY_AP_STARTUP
+static int msix_ready;
+
+static void
+intel_ntb_msix_ready(void *arg __unused)
+{
+
+ msix_ready = 1;
+}
+SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY,
+ intel_ntb_msix_ready, NULL);
+#endif
+
static void
-ntb_exchange_msix(void *ctx)
+intel_ntb_exchange_msix(void *ctx)
{
struct ntb_softc *ntb;
uint32_t val;
@@ -2777,42 +2632,50 @@ ntb_exchange_msix(void *ctx)
if (ntb->peer_msix_done)
goto msix_done;
+#ifndef EARLY_AP_STARTUP
+ /* Block MSIX negotiation until SMP started and IRQ reshuffled. */
+ if (!msix_ready)
+ goto reschedule;
+#endif
+
+ intel_ntb_get_msix_info(ntb);
for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
- ntb_peer_spad_write(ntb, NTB_MSIX_DATA0 + i,
+ intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i,
ntb->msix_data[i].nmd_data);
- ntb_peer_spad_write(ntb, NTB_MSIX_OFS0 + i,
+ intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i,
ntb->msix_data[i].nmd_ofs - ntb->msix_xlat);
}
- ntb_peer_spad_write(ntb, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
+ intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
- ntb_spad_read(ntb, NTB_MSIX_GUARD, &val);
+ intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val);
if (val != NTB_MSIX_VER_GUARD)
goto reschedule;
for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
- ntb_spad_read(ntb, NTB_MSIX_DATA0 + i, &val);
- ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
+ intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val);
+ intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
ntb->peer_msix_data[i].nmd_data = val;
- ntb_spad_read(ntb, NTB_MSIX_OFS0 + i, &val);
- ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
+ intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val);
+ intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
ntb->peer_msix_data[i].nmd_ofs = val;
}
ntb->peer_msix_done = true;
msix_done:
- ntb_peer_spad_write(ntb, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
- ntb_spad_read(ntb, NTB_MSIX_DONE, &val);
+ intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
+ intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val);
if (val != NTB_MSIX_RECEIVED)
goto reschedule;
+ intel_ntb_spad_clear(ntb->device);
ntb->peer_msix_good = true;
/* Give peer time to see our NTB_MSIX_RECEIVED. */
goto reschedule;
msix_good:
- ntb_poll_link(ntb);
- ntb_link_event(ntb);
+ intel_ntb_poll_link(ntb);
+ ntb_link_event(ntb->device);
return;
reschedule:
@@ -2820,40 +2683,27 @@ reschedule:
if (_xeon_link_is_up(ntb)) {
callout_reset(&ntb->peer_msix_work,
hz * (ntb->peer_msix_good ? 2 : 1) / 100,
- ntb_exchange_msix, ntb);
+ intel_ntb_exchange_msix, ntb);
} else
- ntb_spad_clear(ntb);
+ intel_ntb_spad_clear(ntb->device);
}
/*
* Public API to the rest of the OS
*/
-/**
- * ntb_get_max_spads() - get the total scratch regs usable
- * @ntb: pointer to ntb_softc instance
- *
- * This function returns the max 32bit scratchpad registers usable by the
- * upper layer.
- *
- * RETURNS: total number of scratch pad registers available
- */
-uint8_t
-ntb_get_max_spads(struct ntb_softc *ntb)
+static uint8_t
+intel_ntb_spad_count(device_t dev)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
return (ntb->spad_count);
}
-/*
- * ntb_mw_count() - Get the number of memory windows available for KPI
- * consumers.
- *
- * (Excludes any MW wholly reserved for register access.)
- */
-uint8_t
-ntb_mw_count(struct ntb_softc *ntb)
+static uint8_t
+intel_ntb_mw_count(device_t dev)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
uint8_t res;
res = ntb->mw_count;
@@ -2864,25 +2714,15 @@ ntb_mw_count(struct ntb_softc *ntb)
return (res);
}
-/**
- * ntb_spad_write() - write to the secondary scratchpad register
- * @ntb: pointer to ntb_softc instance
- * @idx: index to the scratchpad register, 0 based
- * @val: the data value to put into the register
- *
- * This function allows writing of a 32bit value to the indexed scratchpad
- * register. The register resides on the secondary (external) side.
- *
- * RETURNS: An appropriate ERRNO error value on error, or zero for success.
- */
-int
-ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
+static int
+intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
if (idx >= ntb->spad_count)
return (EINVAL);
- ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
+ intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
return (0);
}
@@ -2890,122 +2730,77 @@ ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
/*
* Zeros the local scratchpad.
*/
-void
-ntb_spad_clear(struct ntb_softc *ntb)
+static void
+intel_ntb_spad_clear(device_t dev)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
unsigned i;
for (i = 0; i < ntb->spad_count; i++)
- ntb_spad_write(ntb, i, 0);
+ intel_ntb_spad_write(dev, i, 0);
}
-/**
- * ntb_spad_read() - read from the primary scratchpad register
- * @ntb: pointer to ntb_softc instance
- * @idx: index to scratchpad register, 0 based
- * @val: pointer to 32bit integer for storing the register value
- *
- * This function allows reading of the 32bit scratchpad register on
- * the primary (internal) side.
- *
- * RETURNS: An appropriate ERRNO error value on error, or zero for success.
- */
-int
-ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
+static int
+intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
if (idx >= ntb->spad_count)
return (EINVAL);
- *val = ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
+ *val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
return (0);
}
-/**
- * ntb_peer_spad_write() - write to the secondary scratchpad register
- * @ntb: pointer to ntb_softc instance
- * @idx: index to the scratchpad register, 0 based
- * @val: the data value to put into the register
- *
- * This function allows writing of a 32bit value to the indexed scratchpad
- * register. The register resides on the secondary (external) side.
- *
- * RETURNS: An appropriate ERRNO error value on error, or zero for success.
- */
-int
-ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
+static int
+intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
if (idx >= ntb->spad_count)
return (EINVAL);
- if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
- ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
+ if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
+ intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
else
- ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
+ intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
return (0);
}
-/**
- * ntb_peer_spad_read() - read from the primary scratchpad register
- * @ntb: pointer to ntb_softc instance
- * @idx: index to scratchpad register, 0 based
- * @val: pointer to 32bit integer for storing the register value
- *
- * This function allows reading of the 32bit scratchpad register on
- * the primary (internal) side.
- *
- * RETURNS: An appropriate ERRNO error value on error, or zero for success.
- */
-int
-ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
+static int
+intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
if (idx >= ntb->spad_count)
return (EINVAL);
- if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
- *val = ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
+ if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
+ *val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
else
- *val = ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
+ *val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
return (0);
}
-/*
- * ntb_mw_get_range() - get the range of a memory window
- * @ntb: NTB device context
- * @idx: Memory window number
- * @base: OUT - the base address for mapping the memory window
- * @size: OUT - the size for mapping the memory window
- * @align: OUT - the base alignment for translating the memory window
- * @align_size: OUT - the size alignment for translating the memory window
- *
- * Get the range of a memory window. NULL may be given for any output
- * parameter if the value is not needed. The base and size may be used for
- * mapping the memory window, to access the peer memory. The alignment and
- * size may be used for translating the memory window, for the peer to access
- * memory on the local system.
- *
- * Return: Zero on success, otherwise an error number.
- */
-int
-ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base,
+static int
+intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base,
caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
bus_addr_t *plimit)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
struct ntb_pci_bar_info *bar;
bus_addr_t limit;
size_t bar_b2b_off;
enum ntb_bar bar_num;
- if (mw_idx >= ntb_mw_count(ntb))
+ if (mw_idx >= intel_ntb_mw_count(dev))
return (EINVAL);
- mw_idx = ntb_user_mw_to_idx(ntb, mw_idx);
+ mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx);
- bar_num = ntb_mw_to_bar(ntb, mw_idx);
+ bar_num = intel_ntb_mw_to_bar(ntb, mw_idx);
bar = &ntb->bar_info[bar_num];
bar_b2b_off = 0;
if (mw_idx == ntb->b2b_mw_idx) {
@@ -3034,37 +2829,21 @@ ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base,
return (0);
}
-/*
- * ntb_mw_set_trans() - set the translation of a memory window
- * @ntb: NTB device context
- * @idx: Memory window number
- * @addr: The dma address local memory to expose to the peer
- * @size: The size of the local memory to expose to the peer
- *
- * Set the translation of a memory window. The peer may access local memory
- * through the window starting at the address, up to the size. The address
- * must be aligned to the alignment specified by ntb_mw_get_range(). The size
- * must be aligned to the size alignment specified by ntb_mw_get_range(). The
- * address must be below the plimit specified by ntb_mw_get_range() (i.e. for
- * 32-bit BARs).
- *
- * Return: Zero on success, otherwise an error number.
- */
-int
-ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
- size_t size)
+static int
+intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
struct ntb_pci_bar_info *bar;
uint64_t base, limit, reg_val;
size_t bar_size, mw_size;
uint32_t base_reg, xlat_reg, limit_reg;
enum ntb_bar bar_num;
- if (idx >= ntb_mw_count(ntb))
+ if (idx >= intel_ntb_mw_count(dev))
return (EINVAL);
- idx = ntb_user_mw_to_idx(ntb, idx);
+ idx = intel_ntb_user_mw_to_idx(ntb, idx);
- bar_num = ntb_mw_to_bar(ntb, idx);
+ bar_num = intel_ntb_mw_to_bar(ntb, idx);
bar = &ntb->bar_info[bar_num];
bar_size = bar->size;
@@ -3084,25 +2863,25 @@ ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
limit = 0;
if (bar_is_64bit(ntb, bar_num)) {
- base = ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
+ base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
if (limit_reg != 0 && size != mw_size)
limit = base + size;
/* Set and verify translation address */
- ntb_reg_write(8, xlat_reg, addr);
- reg_val = ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
+ intel_ntb_reg_write(8, xlat_reg, addr);
+ reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
if (reg_val != addr) {
- ntb_reg_write(8, xlat_reg, 0);
+ intel_ntb_reg_write(8, xlat_reg, 0);
return (EIO);
}
/* Set and verify the limit */
- ntb_reg_write(8, limit_reg, limit);
- reg_val = ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
+ intel_ntb_reg_write(8, limit_reg, limit);
+ reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
if (reg_val != limit) {
- ntb_reg_write(8, limit_reg, base);
- ntb_reg_write(8, xlat_reg, 0);
+ intel_ntb_reg_write(8, limit_reg, base);
+ intel_ntb_reg_write(8, xlat_reg, 0);
return (EIO);
}
} else {
@@ -3113,98 +2892,72 @@ ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
if (((addr + size) & UINT32_MAX) != (addr + size))
return (ERANGE);
- base = ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
+ base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
if (limit_reg != 0 && size != mw_size)
limit = base + size;
/* Set and verify translation address */
- ntb_reg_write(4, xlat_reg, addr);
- reg_val = ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
+ intel_ntb_reg_write(4, xlat_reg, addr);
+ reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
if (reg_val != addr) {
- ntb_reg_write(4, xlat_reg, 0);
+ intel_ntb_reg_write(4, xlat_reg, 0);
return (EIO);
}
/* Set and verify the limit */
- ntb_reg_write(4, limit_reg, limit);
- reg_val = ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
+ intel_ntb_reg_write(4, limit_reg, limit);
+ reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
if (reg_val != limit) {
- ntb_reg_write(4, limit_reg, base);
- ntb_reg_write(4, xlat_reg, 0);
+ intel_ntb_reg_write(4, limit_reg, base);
+ intel_ntb_reg_write(4, xlat_reg, 0);
return (EIO);
}
}
return (0);
}
-/*
- * ntb_mw_clear_trans() - clear the translation of a memory window
- * @ntb: NTB device context
- * @idx: Memory window number
- *
- * Clear the translation of a memory window. The peer may no longer access
- * local memory through the window.
- *
- * Return: Zero on success, otherwise an error number.
- */
-int
-ntb_mw_clear_trans(struct ntb_softc *ntb, unsigned mw_idx)
+static int
+intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx)
{
- return (ntb_mw_set_trans(ntb, mw_idx, 0, 0));
+ return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0));
}
-/*
- * ntb_mw_get_wc - Get the write-combine status of a memory window
- *
- * Returns: Zero on success, setting *wc; otherwise an error number (e.g. if
- * idx is an invalid memory window).
- *
- * Mode is a VM_MEMATTR_* type.
- */
-int
-ntb_mw_get_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t *mode)
+static int
+intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
struct ntb_pci_bar_info *bar;
- if (idx >= ntb_mw_count(ntb))
+ if (idx >= intel_ntb_mw_count(dev))
return (EINVAL);
- idx = ntb_user_mw_to_idx(ntb, idx);
+ idx = intel_ntb_user_mw_to_idx(ntb, idx);
- bar = &ntb->bar_info[ntb_mw_to_bar(ntb, idx)];
+ bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
*mode = bar->map_mode;
return (0);
}
-/*
- * ntb_mw_set_wc - Set the write-combine status of a memory window
- *
- * If 'mode' matches the current status, this does nothing and succeeds. Mode
- * is a VM_MEMATTR_* type.
- *
- * Returns: Zero on success, setting the caching attribute on the virtual
- * mapping of the BAR; otherwise an error number (e.g. if idx is an invalid
- * memory window, or if changing the caching attribute fails).
- */
-int
-ntb_mw_set_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
+static int
+intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
- if (idx >= ntb_mw_count(ntb))
+ if (idx >= intel_ntb_mw_count(dev))
return (EINVAL);
- idx = ntb_user_mw_to_idx(ntb, idx);
- return (ntb_mw_set_wc_internal(ntb, idx, mode));
+ idx = intel_ntb_user_mw_to_idx(ntb, idx);
+ return (intel_ntb_mw_set_wc_internal(ntb, idx, mode));
}
static int
-ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
+intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
{
struct ntb_pci_bar_info *bar;
int rc;
- bar = &ntb->bar_info[ntb_mw_to_bar(ntb, idx)];
+ bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
if (bar->map_mode == mode)
return (0);
@@ -3215,26 +2968,19 @@ ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
return (rc);
}
-/**
- * ntb_peer_db_set() - Set the doorbell on the secondary/external side
- * @ntb: pointer to ntb_softc instance
- * @bit: doorbell bits to ring
- *
- * This function allows triggering of a doorbell on the secondary/external
- * side that will initiate an interrupt on the remote host
- */
-void
-ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
+static void
+intel_ntb_peer_db_set(device_t dev, uint64_t bit)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
struct ntb_pci_bar_info *lapic;
unsigned i;
lapic = ntb->peer_lapic_bar;
for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
- if ((bit & ntb_db_vector_mask(ntb, i)) != 0)
+ if ((bit & intel_ntb_db_vector_mask(dev, i)) != 0)
bus_space_write_4(lapic->pci_bus_tag,
lapic->pci_bus_handle,
ntb->peer_msix_data[i].nmd_ofs,
@@ -3243,99 +2989,76 @@ ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
return;
}
- if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
- ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
+ if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
+ intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
return;
}
db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
}
-/*
- * ntb_get_peer_db_addr() - Return the address of the remote doorbell register,
- * as well as the size of the register (via *sz_out).
- *
- * This function allows a caller using I/OAT DMA to chain the remote doorbell
- * ring to its memory window write.
- *
- * Note that writing the peer doorbell via a memory window will *not* generate
- * an interrupt on the remote host; that must be done separately.
- */
-bus_addr_t
-ntb_get_peer_db_addr(struct ntb_softc *ntb, vm_size_t *sz_out)
+static int
+intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
struct ntb_pci_bar_info *bar;
uint64_t regoff;
- KASSERT(sz_out != NULL, ("must be non-NULL"));
+ KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL"));
- if (!HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
+ if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
bar = &ntb->bar_info[NTB_CONFIG_BAR];
regoff = ntb->peer_reg->db_bell;
} else {
KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
("invalid b2b idx"));
- bar = &ntb->bar_info[ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
+ bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
regoff = XEON_PDOORBELL_OFFSET;
}
KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
- *sz_out = ntb->reg->db_size;
/* HACK: Specific to current x86 bus implementation. */
- return ((uint64_t)bar->pci_bus_handle + regoff);
+ *db_addr = ((uint64_t)bar->pci_bus_handle + regoff);
+ *db_size = ntb->reg->db_size;
+ return (0);
}
-/*
- * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
- * @ntb: NTB device context
- *
- * Hardware may support different number or arrangement of doorbell bits.
- *
- * Return: A mask of doorbell bits supported by the ntb.
- */
-uint64_t
-ntb_db_valid_mask(struct ntb_softc *ntb)
+static uint64_t
+intel_ntb_db_valid_mask(device_t dev)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
return (ntb->db_valid_mask);
}
-/*
- * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector
- * @ntb: NTB device context
- * @vector: Doorbell vector number
- *
- * Each interrupt vector may have a different number or arrangement of bits.
- *
- * Return: A mask of doorbell bits serviced by a vector.
- */
-uint64_t
-ntb_db_vector_mask(struct ntb_softc *ntb, uint32_t vector)
+static int
+intel_ntb_db_vector_count(device_t dev)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
+
+ return (ntb->db_vec_count);
+}
+
+static uint64_t
+intel_ntb_db_vector_mask(device_t dev, uint32_t vector)
+{
+ struct ntb_softc *ntb = device_get_softc(dev);
if (vector > ntb->db_vec_count)
return (0);
- return (ntb->db_valid_mask & ntb_vec_mask(ntb, vector));
+ return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector));
}
-/**
- * ntb_link_is_up() - get the current ntb link state
- * @ntb: NTB device context
- * @speed: OUT - The link speed expressed as PCIe generation number
- * @width: OUT - The link width expressed as the number of PCIe lanes
- *
- * RETURNS: true or false based on the hardware link state
- */
-bool
-ntb_link_is_up(struct ntb_softc *ntb, enum ntb_speed *speed,
- enum ntb_width *width)
+static bool
+intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
if (speed != NULL)
- *speed = ntb_link_sta_speed(ntb);
+ *speed = intel_ntb_link_sta_speed(ntb);
if (width != NULL)
- *width = ntb_link_sta_width(ntb);
+ *width = intel_ntb_link_sta_width(ntb);
return (link_is_up(ntb));
}
@@ -3350,17 +3073,42 @@ save_bar_parameters(struct ntb_pci_bar_info *bar)
bar->vbase = rman_get_virtual(bar->pci_resource);
}
-device_t
-ntb_get_device(struct ntb_softc *ntb)
-{
-
- return (ntb->device);
-}
-
-/* Export HW-specific errata information. */
-bool
-ntb_has_feature(struct ntb_softc *ntb, uint32_t feature)
-{
+static device_method_t ntb_intel_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, intel_ntb_probe),
+ DEVMETHOD(device_attach, intel_ntb_attach),
+ DEVMETHOD(device_detach, intel_ntb_detach),
+ /* NTB interface */
+ DEVMETHOD(ntb_link_is_up, intel_ntb_link_is_up),
+ DEVMETHOD(ntb_link_enable, intel_ntb_link_enable),
+ DEVMETHOD(ntb_link_disable, intel_ntb_link_disable),
+ DEVMETHOD(ntb_link_enabled, intel_ntb_link_enabled),
+ DEVMETHOD(ntb_mw_count, intel_ntb_mw_count),
+ DEVMETHOD(ntb_mw_get_range, intel_ntb_mw_get_range),
+ DEVMETHOD(ntb_mw_set_trans, intel_ntb_mw_set_trans),
+ DEVMETHOD(ntb_mw_clear_trans, intel_ntb_mw_clear_trans),
+ DEVMETHOD(ntb_mw_get_wc, intel_ntb_mw_get_wc),
+ DEVMETHOD(ntb_mw_set_wc, intel_ntb_mw_set_wc),
+ DEVMETHOD(ntb_spad_count, intel_ntb_spad_count),
+ DEVMETHOD(ntb_spad_clear, intel_ntb_spad_clear),
+ DEVMETHOD(ntb_spad_write, intel_ntb_spad_write),
+ DEVMETHOD(ntb_spad_read, intel_ntb_spad_read),
+ DEVMETHOD(ntb_peer_spad_write, intel_ntb_peer_spad_write),
+ DEVMETHOD(ntb_peer_spad_read, intel_ntb_peer_spad_read),
+ DEVMETHOD(ntb_db_valid_mask, intel_ntb_db_valid_mask),
+ DEVMETHOD(ntb_db_vector_count, intel_ntb_db_vector_count),
+ DEVMETHOD(ntb_db_vector_mask, intel_ntb_db_vector_mask),
+ DEVMETHOD(ntb_db_clear, intel_ntb_db_clear),
+ DEVMETHOD(ntb_db_clear_mask, intel_ntb_db_clear_mask),
+ DEVMETHOD(ntb_db_read, intel_ntb_db_read),
+ DEVMETHOD(ntb_db_set_mask, intel_ntb_db_set_mask),
+ DEVMETHOD(ntb_peer_db_addr, intel_ntb_peer_db_addr),
+ DEVMETHOD(ntb_peer_db_set, intel_ntb_peer_db_set),
+ DEVMETHOD_END
+};
- return (HAS_FEATURE(feature));
-}
+static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods,
+ sizeof(struct ntb_softc));
+DRIVER_MODULE(ntb_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL);
+MODULE_DEPEND(ntb_intel, ntb, 1, 1, 1);
+MODULE_VERSION(ntb_intel, 1);
diff --git a/sys/dev/ntb/ntb_hw/ntb_hw.h b/sys/dev/ntb/ntb_hw/ntb_hw.h
deleted file mode 100644
index f05acda..0000000
--- a/sys/dev/ntb/ntb_hw/ntb_hw.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*-
- * Copyright (C) 2013 Intel Corporation
- * Copyright (C) 2015 EMC Corporation
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _NTB_HW_H_
-#define _NTB_HW_H_
-
-struct ntb_softc;
-
-#define NTB_MAX_NUM_MW 3
-
-enum ntb_speed {
- NTB_SPEED_AUTO = -1,
- NTB_SPEED_NONE = 0,
- NTB_SPEED_GEN1 = 1,
- NTB_SPEED_GEN2 = 2,
- NTB_SPEED_GEN3 = 3,
-};
-
-enum ntb_width {
- NTB_WIDTH_AUTO = -1,
- NTB_WIDTH_NONE = 0,
- NTB_WIDTH_1 = 1,
- NTB_WIDTH_2 = 2,
- NTB_WIDTH_4 = 4,
- NTB_WIDTH_8 = 8,
- NTB_WIDTH_12 = 12,
- NTB_WIDTH_16 = 16,
- NTB_WIDTH_32 = 32,
-};
-
-SYSCTL_DECL(_hw_ntb);
-
-typedef void (*ntb_db_callback)(void *data, uint32_t vector);
-typedef void (*ntb_event_callback)(void *data);
-
-struct ntb_ctx_ops {
- ntb_event_callback link_event;
- ntb_db_callback db_event;
-};
-
-device_t ntb_get_device(struct ntb_softc *);
-
-bool ntb_link_is_up(struct ntb_softc *, enum ntb_speed *, enum ntb_width *);
-void ntb_link_event(struct ntb_softc *);
-int ntb_link_enable(struct ntb_softc *, enum ntb_speed, enum ntb_width);
-int ntb_link_disable(struct ntb_softc *);
-bool ntb_link_enabled(struct ntb_softc *);
-
-int ntb_set_ctx(struct ntb_softc *, void *, const struct ntb_ctx_ops *);
-void *ntb_get_ctx(struct ntb_softc *, const struct ntb_ctx_ops **);
-void ntb_clear_ctx(struct ntb_softc *);
-
-uint8_t ntb_mw_count(struct ntb_softc *);
-int ntb_mw_get_range(struct ntb_softc *, unsigned mw_idx, vm_paddr_t *base,
- caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
- bus_addr_t *plimit);
-int ntb_mw_set_trans(struct ntb_softc *, unsigned mw_idx, bus_addr_t, size_t);
-int ntb_mw_clear_trans(struct ntb_softc *, unsigned mw_idx);
-
-int ntb_mw_get_wc(struct ntb_softc *, unsigned mw_idx, vm_memattr_t *mode);
-int ntb_mw_set_wc(struct ntb_softc *, unsigned mw_idx, vm_memattr_t mode);
-
-uint8_t ntb_get_max_spads(struct ntb_softc *ntb);
-void ntb_spad_clear(struct ntb_softc *ntb);
-int ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val);
-int ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val);
-int ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx,
- uint32_t val);
-int ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx,
- uint32_t *val);
-
-uint64_t ntb_db_valid_mask(struct ntb_softc *);
-uint64_t ntb_db_vector_mask(struct ntb_softc *, uint32_t vector);
-bus_addr_t ntb_get_peer_db_addr(struct ntb_softc *, vm_size_t *sz_out);
-
-void ntb_db_clear(struct ntb_softc *, uint64_t bits);
-void ntb_db_clear_mask(struct ntb_softc *, uint64_t bits);
-uint64_t ntb_db_read(struct ntb_softc *);
-void ntb_db_set_mask(struct ntb_softc *, uint64_t bits);
-void ntb_peer_db_set(struct ntb_softc *, uint64_t bits);
-
-#define XEON_SPAD_COUNT 16
-#define ATOM_SPAD_COUNT 16
-
-/* Hardware owns the low 16 bits of features. */
-#define NTB_BAR_SIZE_4K (1 << 0)
-#define NTB_SDOORBELL_LOCKUP (1 << 1)
-#define NTB_SB01BASE_LOCKUP (1 << 2)
-#define NTB_B2BDOORBELL_BIT14 (1 << 3)
-/* Software/configuration owns the top 16 bits. */
-#define NTB_SPLIT_BAR (1ull << 16)
-
-#define NTB_FEATURES_STR \
- "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
- "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"
-
-bool ntb_has_feature(struct ntb_softc *, uint32_t);
-
-#endif /* _NTB_HW_H_ */
diff --git a/sys/dev/ntb/ntb_hw/ntb_regs.h b/sys/dev/ntb/ntb_hw/ntb_regs.h
index fb445d7..a037736 100644
--- a/sys/dev/ntb/ntb_hw/ntb_regs.h
+++ b/sys/dev/ntb/ntb_hw/ntb_regs.h
@@ -1,4 +1,5 @@
/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
* Copyright (C) 2013 Intel Corporation
* Copyright (C) 2015 EMC Corporation
* All rights reserved.
@@ -76,6 +77,7 @@
#define XEON_SDBMSK_OFFSET 0x0066
#define XEON_USMEMMISS_OFFSET 0x0070
#define XEON_SPAD_OFFSET 0x0080
+#define XEON_SPAD_COUNT 16
#define XEON_SPADSEMA4_OFFSET 0x00c0
#define XEON_WCCNTRL_OFFSET 0x00e0
#define XEON_UNCERRSTS_OFFSET 0x014c
@@ -104,6 +106,7 @@
#define ATOM_NTBCNTL_OFFSET 0x0060
#define ATOM_EBDF_OFFSET 0x0064
#define ATOM_SPAD_OFFSET 0x0080
+#define ATOM_SPAD_COUNT 16
#define ATOM_SPADSEMA_OFFSET 0x00c0
#define ATOM_STKYSPAD_OFFSET 0x00c4
#define ATOM_PBAR2XLAT_OFFSET 0x8008
diff --git a/sys/dev/ntb/ntb_if.m b/sys/dev/ntb/ntb_if.m
new file mode 100644
index 0000000..d8ca227
--- /dev/null
+++ b/sys/dev/ntb/ntb_if.m
@@ -0,0 +1,210 @@
+#-
+# Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD$
+#
+
+#include <sys/bus.h>
+#include <machine/bus.h>
+
+INTERFACE ntb;
+
+HEADER {
+ enum ntb_speed {
+ NTB_SPEED_AUTO = -1,
+ NTB_SPEED_NONE = 0,
+ NTB_SPEED_GEN1 = 1,
+ NTB_SPEED_GEN2 = 2,
+ NTB_SPEED_GEN3 = 3,
+ };
+
+ enum ntb_width {
+ NTB_WIDTH_AUTO = -1,
+ NTB_WIDTH_NONE = 0,
+ NTB_WIDTH_1 = 1,
+ NTB_WIDTH_2 = 2,
+ NTB_WIDTH_4 = 4,
+ NTB_WIDTH_8 = 8,
+ NTB_WIDTH_12 = 12,
+ NTB_WIDTH_16 = 16,
+ NTB_WIDTH_32 = 32,
+ };
+
+ typedef void (*ntb_db_callback)(void *data, uint32_t vector);
+ typedef void (*ntb_event_callback)(void *data);
+ struct ntb_ctx_ops {
+ ntb_event_callback link_event;
+ ntb_db_callback db_event;
+ };
+};
+
+METHOD bool link_is_up {
+ device_t ntb;
+ enum ntb_speed *speed;
+ enum ntb_width *width;
+};
+
+METHOD int link_enable {
+ device_t ntb;
+ enum ntb_speed speed;
+ enum ntb_width width;
+};
+
+METHOD int link_disable {
+ device_t ntb;
+};
+
+METHOD bool link_enabled {
+ device_t ntb;
+};
+
+METHOD int set_ctx {
+ device_t ntb;
+ void *ctx;
+ const struct ntb_ctx_ops *ctx_ops;
+};
+
+METHOD void * get_ctx {
+ device_t ntb;
+ const struct ntb_ctx_ops **ctx_ops;
+};
+
+METHOD void clear_ctx {
+ device_t ntb;
+};
+
+METHOD uint8_t mw_count {
+ device_t ntb;
+};
+
+METHOD int mw_get_range {
+ device_t ntb;
+ unsigned mw_idx;
+ vm_paddr_t *base;
+ caddr_t *vbase;
+ size_t *size;
+ size_t *align;
+ size_t *align_size;
+ bus_addr_t *plimit;
+};
+
+METHOD int mw_set_trans {
+ device_t ntb;
+ unsigned mw_idx;
+ bus_addr_t addr;
+ size_t size;
+};
+
+METHOD int mw_clear_trans {
+ device_t ntb;
+ unsigned mw_idx;
+};
+
+METHOD int mw_get_wc {
+ device_t ntb;
+ unsigned mw_idx;
+ vm_memattr_t *mode;
+};
+
+METHOD int mw_set_wc {
+ device_t ntb;
+ unsigned mw_idx;
+ vm_memattr_t mode;
+};
+
+METHOD uint8_t spad_count {
+ device_t ntb;
+};
+
+METHOD void spad_clear {
+ device_t ntb;
+};
+
+METHOD int spad_write {
+ device_t ntb;
+ unsigned int idx;
+ uint32_t val;
+};
+
+METHOD int spad_read {
+ device_t ntb;
+ unsigned int idx;
+ uint32_t *val;
+};
+
+METHOD int peer_spad_write {
+ device_t ntb;
+ unsigned int idx;
+ uint32_t val;
+};
+
+METHOD int peer_spad_read {
+ device_t ntb;
+ unsigned int idx;
+ uint32_t *val;
+};
+
+METHOD uint64_t db_valid_mask {
+ device_t ntb;
+};
+
+METHOD int db_vector_count {
+ device_t ntb;
+};
+
+METHOD uint64_t db_vector_mask {
+ device_t ntb;
+ uint32_t vector;
+};
+
+METHOD int peer_db_addr {
+ device_t ntb;
+ bus_addr_t *db_addr;
+ vm_size_t *db_size;
+};
+
+METHOD void db_clear {
+ device_t ntb;
+ uint64_t bits;
+};
+
+METHOD void db_clear_mask {
+ device_t ntb;
+ uint64_t bits;
+};
+
+METHOD uint64_t db_read {
+ device_t ntb;
+};
+
+METHOD void db_set_mask {
+ device_t ntb;
+ uint64_t bits;
+};
+
+METHOD void peer_db_set {
+ device_t ntb;
+ uint64_t bits;
+};
diff --git a/sys/dev/ntb/ntb_transport.c b/sys/dev/ntb/ntb_transport.c
new file mode 100644
index 0000000..c7bc4da
--- /dev/null
+++ b/sys/dev/ntb/ntb_transport.c
@@ -0,0 +1,1518 @@
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
+ * Copyright (C) 2013 Intel Corporation
+ * Copyright (C) 2015 EMC Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * The Non-Transparent Bridge (NTB) is a device that allows you to connect
+ * two or more systems using a PCI-e links, providing remote memory access.
+ *
+ * This module contains a transport for sending and receiving messages by
+ * writing to remote memory window(s) provided by underlying NTB device.
+ *
+ * NOTE: Much of the code in this module is shared with Linux. Any patches may
+ * be picked up and redistributed in Linux with a dual GPL/BSD license.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/ktr.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/bus.h>
+
+#include "ntb.h"
+#include "ntb_transport.h"
+
+#define KTR_NTB KTR_SPARE3
+
+#define NTB_TRANSPORT_VERSION 4
+
+static SYSCTL_NODE(_hw, OID_AUTO, ntb_transport, CTLFLAG_RW, 0, "ntb_transport");
+
+static unsigned g_ntb_transport_debug_level;
+SYSCTL_UINT(_hw_ntb_transport, OID_AUTO, debug_level, CTLFLAG_RWTUN,
+ &g_ntb_transport_debug_level, 0,
+ "ntb_transport log level -- higher is more verbose");
+#define ntb_printf(lvl, ...) do { \
+ if ((lvl) <= g_ntb_transport_debug_level) { \
+ printf(__VA_ARGS__); \
+ } \
+} while (0)
+
+static unsigned transport_mtu = 0x10000;
+
+static uint64_t max_mw_size;
+SYSCTL_UQUAD(_hw_ntb_transport, OID_AUTO, max_mw_size, CTLFLAG_RDTUN, &max_mw_size, 0,
+ "If enabled (non-zero), limit the size of large memory windows. "
+ "Both sides of the NTB MUST set the same value here.");
+
+static unsigned enable_xeon_watchdog;
+SYSCTL_UINT(_hw_ntb_transport, OID_AUTO, enable_xeon_watchdog, CTLFLAG_RDTUN,
+ &enable_xeon_watchdog, 0, "If non-zero, write a register every second to "
+ "keep a watchdog from tearing down the NTB link");
+
+STAILQ_HEAD(ntb_queue_list, ntb_queue_entry);
+
+typedef uint32_t ntb_q_idx_t;
+
+struct ntb_queue_entry {
+ /* ntb_queue list reference */
+ STAILQ_ENTRY(ntb_queue_entry) entry;
+
+ /* info on data to be transferred */
+ void *cb_data;
+ void *buf;
+ uint32_t len;
+ uint32_t flags;
+
+ struct ntb_transport_qp *qp;
+ struct ntb_payload_header *x_hdr;
+ ntb_q_idx_t index;
+};
+
+struct ntb_rx_info {
+ ntb_q_idx_t entry;
+};
+
+struct ntb_transport_qp {
+ struct ntb_transport_ctx *transport;
+ device_t dev;
+
+ void *cb_data;
+
+ bool client_ready;
+ volatile bool link_is_up;
+ uint8_t qp_num; /* Only 64 QPs are allowed. 0-63 */
+
+ struct ntb_rx_info *rx_info;
+ struct ntb_rx_info *remote_rx_info;
+
+ void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+ void *data, int len);
+ struct ntb_queue_list tx_free_q;
+ struct mtx ntb_tx_free_q_lock;
+ caddr_t tx_mw;
+ bus_addr_t tx_mw_phys;
+ ntb_q_idx_t tx_index;
+ ntb_q_idx_t tx_max_entry;
+ uint64_t tx_max_frame;
+
+ void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+ void *data, int len);
+ struct ntb_queue_list rx_post_q;
+ struct ntb_queue_list rx_pend_q;
+ /* ntb_rx_q_lock: synchronize access to rx_XXXX_q */
+ struct mtx ntb_rx_q_lock;
+ struct task rxc_db_work;
+ struct taskqueue *rxc_tq;
+ caddr_t rx_buff;
+ ntb_q_idx_t rx_index;
+ ntb_q_idx_t rx_max_entry;
+ uint64_t rx_max_frame;
+
+ void (*event_handler)(void *data, enum ntb_link_event status);
+ struct callout link_work;
+ struct callout rx_full;
+
+ uint64_t last_rx_no_buf;
+
+ /* Stats */
+ uint64_t rx_bytes;
+ uint64_t rx_pkts;
+ uint64_t rx_ring_empty;
+ uint64_t rx_err_no_buf;
+ uint64_t rx_err_oflow;
+ uint64_t rx_err_ver;
+ uint64_t tx_bytes;
+ uint64_t tx_pkts;
+ uint64_t tx_ring_full;
+ uint64_t tx_err_no_buf;
+
+ struct mtx tx_lock;
+};
+
+struct ntb_transport_mw {
+ vm_paddr_t phys_addr;
+ size_t phys_size;
+ size_t xlat_align;
+ size_t xlat_align_size;
+ bus_addr_t addr_limit;
+ /* Tx buff is off vbase / phys_addr */
+ caddr_t vbase;
+ size_t xlat_size;
+ size_t buff_size;
+ /* Rx buff is off virt_addr / dma_addr */
+ caddr_t virt_addr;
+ bus_addr_t dma_addr;
+};
+
+struct ntb_transport_child {
+ device_t dev;
+ int qpoff;
+ int qpcnt;
+ struct ntb_transport_child *next;
+};
+
+struct ntb_transport_ctx {
+ device_t dev;
+ struct ntb_transport_child *child;
+ struct ntb_transport_mw *mw_vec;
+ struct ntb_transport_qp *qp_vec;
+ unsigned mw_count;
+ unsigned qp_count;
+ uint64_t qp_bitmap;
+ volatile bool link_is_up;
+ struct callout link_work;
+ struct callout link_watchdog;
+ struct task link_cleanup;
+};
+
+enum {
+ NTBT_DESC_DONE_FLAG = 1 << 0,
+ NTBT_LINK_DOWN_FLAG = 1 << 1,
+};
+
+struct ntb_payload_header {
+ ntb_q_idx_t ver;
+ uint32_t len;
+ uint32_t flags;
+};
+
+enum {
+ /*
+ * The order of this enum is part of the remote protocol. Do not
+ * reorder without bumping protocol version (and it's probably best
+ * to keep the protocol in lock-step with the Linux NTB driver.
+ */
+ NTBT_VERSION = 0,
+ NTBT_QP_LINKS,
+ NTBT_NUM_QPS,
+ NTBT_NUM_MWS,
+ /*
+ * N.B.: transport_link_work assumes MW1 enums = MW0 + 2.
+ */
+ NTBT_MW0_SZ_HIGH,
+ NTBT_MW0_SZ_LOW,
+ NTBT_MW1_SZ_HIGH,
+ NTBT_MW1_SZ_LOW,
+
+ /*
+ * Some NTB-using hardware have a watchdog to work around NTB hangs; if
+ * a register or doorbell isn't written every few seconds, the link is
+ * torn down. Write an otherwise unused register every few seconds to
+ * work around this watchdog.
+ */
+ NTBT_WATCHDOG_SPAD = 15
+};
+
+#define QP_TO_MW(nt, qp) ((qp) % nt->mw_count)
+#define NTB_QP_DEF_NUM_ENTRIES 100
+#define NTB_LINK_DOWN_TIMEOUT 10
+
+static int ntb_transport_probe(device_t dev);
+static int ntb_transport_attach(device_t dev);
+static int ntb_transport_detach(device_t dev);
+static void ntb_transport_init_queue(struct ntb_transport_ctx *nt,
+ unsigned int qp_num);
+static int ntb_process_tx(struct ntb_transport_qp *qp,
+ struct ntb_queue_entry *entry);
+static void ntb_transport_rxc_db(void *arg, int pending);
+static int ntb_process_rxc(struct ntb_transport_qp *qp);
+static void ntb_memcpy_rx(struct ntb_transport_qp *qp,
+ struct ntb_queue_entry *entry, void *offset);
+static inline void ntb_rx_copy_callback(struct ntb_transport_qp *qp,
+ void *data);
+static void ntb_complete_rxc(struct ntb_transport_qp *qp);
+static void ntb_transport_doorbell_callback(void *data, uint32_t vector);
+static void ntb_transport_event_callback(void *data);
+static void ntb_transport_link_work(void *arg);
+static int ntb_set_mw(struct ntb_transport_ctx *, int num_mw, size_t size);
+static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw);
+static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
+ unsigned int qp_num);
+static void ntb_qp_link_work(void *arg);
+static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt);
+static void ntb_transport_link_cleanup_work(void *, int);
+static void ntb_qp_link_down(struct ntb_transport_qp *qp);
+static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp);
+static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp);
+static void ntb_send_link_down(struct ntb_transport_qp *qp);
+static void ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
+ struct ntb_queue_list *list);
+static struct ntb_queue_entry *ntb_list_rm(struct mtx *lock,
+ struct ntb_queue_list *list);
+static struct ntb_queue_entry *ntb_list_mv(struct mtx *lock,
+ struct ntb_queue_list *from, struct ntb_queue_list *to);
+static void xeon_link_watchdog_hb(void *);
+
+static const struct ntb_ctx_ops ntb_transport_ops = {
+ .link_event = ntb_transport_event_callback,
+ .db_event = ntb_transport_doorbell_callback,
+};
+
+MALLOC_DEFINE(M_NTB_T, "ntb_transport", "ntb transport driver");
+
+static inline void
+iowrite32(uint32_t val, void *addr)
+{
+
+ bus_space_write_4(X86_BUS_SPACE_MEM, 0/* HACK */, (uintptr_t)addr,
+ val);
+}
+
+/* Transport Init and teardown */
+
+static void
+xeon_link_watchdog_hb(void *arg)
+{
+ struct ntb_transport_ctx *nt;
+
+ nt = arg;
+ ntb_spad_write(nt->dev, NTBT_WATCHDOG_SPAD, 0);
+ callout_reset(&nt->link_watchdog, 1 * hz, xeon_link_watchdog_hb, nt);
+}
+
+static int
+ntb_transport_probe(device_t dev)
+{
+
+ device_set_desc(dev, "NTB Transport");
+ return (0);
+}
+
+static int
+ntb_transport_attach(device_t dev)
+{
+ struct ntb_transport_ctx *nt = device_get_softc(dev);
+ struct ntb_transport_child **cpp = &nt->child;
+ struct ntb_transport_child *nc;
+ struct ntb_transport_mw *mw;
+ uint64_t db_bitmap;
+ int rc, i, db_count, spad_count, qp, qpu, qpo, qpt;
+ char cfg[128] = "";
+ char buf[32];
+ char *n, *np, *c, *name;
+
+ nt->dev = dev;
+ nt->mw_count = ntb_mw_count(dev);
+ spad_count = ntb_spad_count(dev);
+ db_bitmap = ntb_db_valid_mask(dev);
+ db_count = flsll(db_bitmap);
+ KASSERT(db_bitmap == (1 << db_count) - 1,
+ ("Doorbells are not sequential (%jx).\n", db_bitmap));
+
+ device_printf(dev, "%d memory windows, %d scratchpads, "
+ "%d doorbells\n", nt->mw_count, spad_count, db_count);
+
+ if (nt->mw_count == 0) {
+ device_printf(dev, "At least 1 memory window required.\n");
+ return (ENXIO);
+ }
+ if (spad_count < 6) {
+ device_printf(dev, "At least 6 scratchpads required.\n");
+ return (ENXIO);
+ }
+ if (spad_count < 4 + 2 * nt->mw_count) {
+ nt->mw_count = (spad_count - 4) / 2;
+ device_printf(dev, "Scratchpads enough only for %d "
+ "memory windows.\n", nt->mw_count);
+ }
+ if (db_bitmap == 0) {
+ device_printf(dev, "At least one doorbell required.\n");
+ return (ENXIO);
+ }
+
+ nt->mw_vec = malloc(nt->mw_count * sizeof(*nt->mw_vec), M_NTB_T,
+ M_WAITOK | M_ZERO);
+ for (i = 0; i < nt->mw_count; i++) {
+ mw = &nt->mw_vec[i];
+
+ rc = ntb_mw_get_range(dev, i, &mw->phys_addr, &mw->vbase,
+ &mw->phys_size, &mw->xlat_align, &mw->xlat_align_size,
+ &mw->addr_limit);
+ if (rc != 0)
+ goto err;
+
+ mw->buff_size = 0;
+ mw->xlat_size = 0;
+ mw->virt_addr = NULL;
+ mw->dma_addr = 0;
+
+ rc = ntb_mw_set_wc(dev, i, VM_MEMATTR_WRITE_COMBINING);
+ if (rc)
+ ntb_printf(0, "Unable to set mw%d caching\n", i);
+ }
+
+ qpu = 0;
+ qpo = imin(db_count, nt->mw_count);
+ qpt = db_count;
+
+ snprintf(buf, sizeof(buf), "hint.%s.%d.config", device_get_name(dev),
+ device_get_unit(dev));
+ TUNABLE_STR_FETCH(buf, cfg, sizeof(cfg));
+ n = cfg;
+ i = 0;
+ while ((c = strsep(&n, ",")) != NULL) {
+ np = c;
+ name = strsep(&np, ":");
+ if (name != NULL && name[0] == 0)
+ name = NULL;
+ qp = (np && np[0] != 0) ? strtol(np, NULL, 10) : qpo - qpu;
+ if (qp <= 0)
+ qp = 1;
+
+ if (qp > qpt - qpu) {
+ device_printf(dev, "Not enough resources for config\n");
+ break;
+ }
+
+ nc = malloc(sizeof(*nc), M_DEVBUF, M_WAITOK | M_ZERO);
+ nc->qpoff = qpu;
+ nc->qpcnt = qp;
+ nc->dev = device_add_child(dev, name, -1);
+ if (nc->dev == NULL) {
+ device_printf(dev, "Can not add child.\n");
+ break;
+ }
+ device_set_ivars(nc->dev, nc);
+ *cpp = nc;
+ cpp = &nc->next;
+
+ if (bootverbose) {
+ device_printf(dev, "%d \"%s\": queues %d",
+ i, name, qpu);
+ if (qp > 1)
+ printf("-%d", qpu + qp - 1);
+ printf("\n");
+ }
+
+ qpu += qp;
+ i++;
+ }
+ nt->qp_count = qpu;
+
+ nt->qp_vec = malloc(nt->qp_count * sizeof(*nt->qp_vec), M_NTB_T,
+ M_WAITOK | M_ZERO);
+
+ for (i = 0; i < nt->qp_count; i++)
+ ntb_transport_init_queue(nt, i);
+
+ callout_init(&nt->link_work, 0);
+ callout_init(&nt->link_watchdog, 0);
+ TASK_INIT(&nt->link_cleanup, 0, ntb_transport_link_cleanup_work, nt);
+
+ rc = ntb_set_ctx(dev, nt, &ntb_transport_ops);
+ if (rc != 0)
+ goto err;
+
+ nt->link_is_up = false;
+ ntb_link_enable(dev, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+
+ if (enable_xeon_watchdog != 0)
+ callout_reset(&nt->link_watchdog, 0, xeon_link_watchdog_hb, nt);
+
+ bus_generic_attach(dev);
+ return (0);
+
+err:
+ free(nt->qp_vec, M_NTB_T);
+ free(nt->mw_vec, M_NTB_T);
+ return (rc);
+}
+
+static int
+ntb_transport_detach(device_t dev)
+{
+ struct ntb_transport_ctx *nt = device_get_softc(dev);
+ struct ntb_transport_child **cpp = &nt->child;
+ struct ntb_transport_child *nc;
+ int error = 0, i;
+
+ while ((nc = *cpp) != NULL) {
+ *cpp = (*cpp)->next;
+ error = device_delete_child(dev, nc->dev);
+ if (error)
+ break;
+ free(nc, M_DEVBUF);
+ }
+ KASSERT(nt->qp_bitmap == 0,
+ ("Some queues not freed on detach (%jx)", nt->qp_bitmap));
+
+ ntb_transport_link_cleanup(nt);
+ taskqueue_drain(taskqueue_swi, &nt->link_cleanup);
+ callout_drain(&nt->link_work);
+ callout_drain(&nt->link_watchdog);
+
+ ntb_link_disable(dev);
+ ntb_clear_ctx(dev);
+
+ for (i = 0; i < nt->mw_count; i++)
+ ntb_free_mw(nt, i);
+
+ free(nt->qp_vec, M_NTB_T);
+ free(nt->mw_vec, M_NTB_T);
+ return (0);
+}
+
+int
+ntb_transport_queue_count(device_t dev)
+{
+ struct ntb_transport_child *nc = device_get_ivars(dev);
+
+ return (nc->qpcnt);
+}
+
+static void
+ntb_transport_init_queue(struct ntb_transport_ctx *nt, unsigned int qp_num)
+{
+ struct ntb_transport_mw *mw;
+ struct ntb_transport_qp *qp;
+ vm_paddr_t mw_base;
+ uint64_t mw_size, qp_offset;
+ size_t tx_size;
+ unsigned num_qps_mw, mw_num, mw_count;
+
+ mw_count = nt->mw_count;
+ mw_num = QP_TO_MW(nt, qp_num);
+ mw = &nt->mw_vec[mw_num];
+
+ qp = &nt->qp_vec[qp_num];
+ qp->qp_num = qp_num;
+ qp->transport = nt;
+ qp->dev = nt->dev;
+ qp->client_ready = false;
+ qp->event_handler = NULL;
+ ntb_qp_link_down_reset(qp);
+
+ if (mw_num < nt->qp_count % mw_count)
+ num_qps_mw = nt->qp_count / mw_count + 1;
+ else
+ num_qps_mw = nt->qp_count / mw_count;
+
+ mw_base = mw->phys_addr;
+ mw_size = mw->phys_size;
+
+ tx_size = mw_size / num_qps_mw;
+ qp_offset = tx_size * (qp_num / mw_count);
+
+ qp->tx_mw = mw->vbase + qp_offset;
+ KASSERT(qp->tx_mw != NULL, ("uh oh?"));
+
+ /* XXX Assumes that a vm_paddr_t is equivalent to bus_addr_t */
+ qp->tx_mw_phys = mw_base + qp_offset;
+ KASSERT(qp->tx_mw_phys != 0, ("uh oh?"));
+
+ tx_size -= sizeof(struct ntb_rx_info);
+ qp->rx_info = (void *)(qp->tx_mw + tx_size);
+
+ /* Due to house-keeping, there must be at least 2 buffs */
+ qp->tx_max_frame = qmin(transport_mtu, tx_size / 2);
+ qp->tx_max_entry = tx_size / qp->tx_max_frame;
+
+ callout_init(&qp->link_work, 0);
+ callout_init(&qp->rx_full, 1);
+
+ mtx_init(&qp->ntb_rx_q_lock, "ntb rx q", NULL, MTX_SPIN);
+ mtx_init(&qp->ntb_tx_free_q_lock, "ntb tx free q", NULL, MTX_SPIN);
+ mtx_init(&qp->tx_lock, "ntb transport tx", NULL, MTX_DEF);
+ TASK_INIT(&qp->rxc_db_work, 0, ntb_transport_rxc_db, qp);
+ qp->rxc_tq = taskqueue_create("ntbt_rx", M_WAITOK,
+ taskqueue_thread_enqueue, &qp->rxc_tq);
+ taskqueue_start_threads(&qp->rxc_tq, 1, PI_NET, "%s rx%d",
+ device_get_nameunit(nt->dev), qp_num);
+
+ STAILQ_INIT(&qp->rx_post_q);
+ STAILQ_INIT(&qp->rx_pend_q);
+ STAILQ_INIT(&qp->tx_free_q);
+}
+
+void
+ntb_transport_free_queue(struct ntb_transport_qp *qp)
+{
+ struct ntb_transport_ctx *nt = qp->transport;
+ struct ntb_queue_entry *entry;
+
+ callout_drain(&qp->link_work);
+
+ ntb_db_set_mask(qp->dev, 1ull << qp->qp_num);
+ taskqueue_drain_all(qp->rxc_tq);
+ taskqueue_free(qp->rxc_tq);
+
+ qp->cb_data = NULL;
+ qp->rx_handler = NULL;
+ qp->tx_handler = NULL;
+ qp->event_handler = NULL;
+
+ while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q)))
+ free(entry, M_NTB_T);
+
+ while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q)))
+ free(entry, M_NTB_T);
+
+ while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
+ free(entry, M_NTB_T);
+
+ nt->qp_bitmap &= ~(1 << qp->qp_num);
+}
+
+/**
+ * ntb_transport_create_queue - Create a new NTB transport layer queue
+ * @rx_handler: receive callback function
+ * @tx_handler: transmit callback function
+ * @event_handler: event callback function
+ *
+ * Create a new NTB transport layer queue and provide the queue with a callback
+ * routine for both transmit and receive. The receive callback routine will be
+ * used to pass up data when the transport has received it on the queue. The
+ * transmit callback routine will be called when the transport has completed the
+ * transmission of the data on the queue and the data is ready to be freed.
+ *
+ * RETURNS: pointer to newly created ntb_queue, NULL on error.
+ */
+struct ntb_transport_qp *
+ntb_transport_create_queue(device_t dev, int q,
+ const struct ntb_queue_handlers *handlers, void *data)
+{
+ struct ntb_transport_child *nc = device_get_ivars(dev);
+ struct ntb_transport_ctx *nt = device_get_softc(device_get_parent(dev));
+ struct ntb_queue_entry *entry;
+ struct ntb_transport_qp *qp;
+ int i;
+
+ if (q < 0 || q >= nc->qpcnt)
+ return (NULL);
+
+ qp = &nt->qp_vec[nc->qpoff + q];
+ nt->qp_bitmap |= (1 << qp->qp_num);
+ qp->cb_data = data;
+ qp->rx_handler = handlers->rx_handler;
+ qp->tx_handler = handlers->tx_handler;
+ qp->event_handler = handlers->event_handler;
+
+ for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
+ entry = malloc(sizeof(*entry), M_NTB_T, M_WAITOK | M_ZERO);
+ entry->cb_data = data;
+ entry->buf = NULL;
+ entry->len = transport_mtu;
+ entry->qp = qp;
+ ntb_list_add(&qp->ntb_rx_q_lock, entry, &qp->rx_pend_q);
+ }
+
+ for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
+ entry = malloc(sizeof(*entry), M_NTB_T, M_WAITOK | M_ZERO);
+ entry->qp = qp;
+ ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+ }
+
+ ntb_db_clear(dev, 1ull << qp->qp_num);
+ return (qp);
+}
+
+/**
+ * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
+ * @qp: NTB transport layer queue to be enabled
+ *
+ * Notify NTB transport layer of client readiness to use queue
+ */
+void
+ntb_transport_link_up(struct ntb_transport_qp *qp)
+{
+ struct ntb_transport_ctx *nt = qp->transport;
+
+ qp->client_ready = true;
+
+ ntb_printf(2, "qp %d client ready\n", qp->qp_num);
+
+ if (nt->link_is_up)
+ callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
+}
+
+
+
+/* Transport Tx */
+
+/**
+ * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry
+ * @qp: NTB transport layer queue the entry is to be enqueued on
+ * @cb: per buffer pointer for callback function to use
+ * @data: pointer to data buffer that will be sent
+ * @len: length of the data buffer
+ *
+ * Enqueue a new transmit buffer onto the transport queue from which a NTB
+ * payload will be transmitted. This assumes that a lock is being held to
+ * serialize access to the qp.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int
+ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+ unsigned int len)
+{
+ struct ntb_queue_entry *entry;
+ int rc;
+
+ if (!qp->link_is_up || len == 0) {
+ CTR0(KTR_NTB, "TX: link not up");
+ return (EINVAL);
+ }
+
+ entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
+ if (entry == NULL) {
+ CTR0(KTR_NTB, "TX: could not get entry from tx_free_q");
+ qp->tx_err_no_buf++;
+ return (EBUSY);
+ }
+ CTR1(KTR_NTB, "TX: got entry %p from tx_free_q", entry);
+
+ entry->cb_data = cb;
+ entry->buf = data;
+ entry->len = len;
+ entry->flags = 0;
+
+ mtx_lock(&qp->tx_lock);
+ rc = ntb_process_tx(qp, entry);
+ mtx_unlock(&qp->tx_lock);
+ if (rc != 0) {
+ ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+ CTR1(KTR_NTB,
+ "TX: process_tx failed. Returning entry %p to tx_free_q",
+ entry);
+ }
+ return (rc);
+}
+
+static void
+ntb_tx_copy_callback(void *data)
+{
+ struct ntb_queue_entry *entry = data;
+ struct ntb_transport_qp *qp = entry->qp;
+ struct ntb_payload_header *hdr = entry->x_hdr;
+
+ iowrite32(entry->flags | NTBT_DESC_DONE_FLAG, &hdr->flags);
+ CTR1(KTR_NTB, "TX: hdr %p set DESC_DONE", hdr);
+
+ ntb_peer_db_set(qp->dev, 1ull << qp->qp_num);
+
+ /*
+ * The entry length can only be zero if the packet is intended to be a
+ * "link down" or similar. Since no payload is being sent in these
+ * cases, there is nothing to add to the completion queue.
+ */
+ if (entry->len > 0) {
+ qp->tx_bytes += entry->len;
+
+ if (qp->tx_handler)
+ qp->tx_handler(qp, qp->cb_data, entry->buf,
+ entry->len);
+ else
+ m_freem(entry->buf);
+ entry->buf = NULL;
+ }
+
+ CTR3(KTR_NTB,
+ "TX: entry %p sent. hdr->ver = %u, hdr->flags = 0x%x, Returning "
+ "to tx_free_q", entry, hdr->ver, hdr->flags);
+ ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+}
+
+static void
+ntb_memcpy_tx(struct ntb_queue_entry *entry, void *offset)
+{
+
+ CTR2(KTR_NTB, "TX: copying %d bytes to offset %p", entry->len, offset);
+ if (entry->buf != NULL) {
+ m_copydata((struct mbuf *)entry->buf, 0, entry->len, offset);
+
+ /*
+ * Ensure that the data is fully copied before setting the
+ * flags
+ */
+ wmb();
+ }
+
+ ntb_tx_copy_callback(entry);
+}
+
+static void
+ntb_async_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry)
+{
+ struct ntb_payload_header *hdr;
+ void *offset;
+
+ offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
+ hdr = (struct ntb_payload_header *)((char *)offset + qp->tx_max_frame -
+ sizeof(struct ntb_payload_header));
+ entry->x_hdr = hdr;
+
+ iowrite32(entry->len, &hdr->len);
+ iowrite32(qp->tx_pkts, &hdr->ver);
+
+ ntb_memcpy_tx(entry, offset);
+}
+
+static int
+ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry)
+{
+
+ CTR3(KTR_NTB,
+ "TX: process_tx: tx_pkts=%lu, tx_index=%u, remote entry=%u",
+ qp->tx_pkts, qp->tx_index, qp->remote_rx_info->entry);
+ if (qp->tx_index == qp->remote_rx_info->entry) {
+ CTR0(KTR_NTB, "TX: ring full");
+ qp->tx_ring_full++;
+ return (EAGAIN);
+ }
+
+ if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
+ if (qp->tx_handler != NULL)
+ qp->tx_handler(qp, qp->cb_data, entry->buf,
+ EIO);
+ else
+ m_freem(entry->buf);
+
+ entry->buf = NULL;
+ ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+ CTR1(KTR_NTB,
+ "TX: frame too big. returning entry %p to tx_free_q",
+ entry);
+ return (0);
+ }
+ CTR2(KTR_NTB, "TX: copying entry %p to index %u", entry, qp->tx_index);
+ ntb_async_tx(qp, entry);
+
+ qp->tx_index++;
+ qp->tx_index %= qp->tx_max_entry;
+
+ qp->tx_pkts++;
+
+ return (0);
+}
+
+/* Transport Rx */
+static void
+ntb_transport_rxc_db(void *arg, int pending __unused)
+{
+ struct ntb_transport_qp *qp = arg;
+ int rc;
+
+ CTR0(KTR_NTB, "RX: transport_rx");
+again:
+ while ((rc = ntb_process_rxc(qp)) == 0)
+ ;
+ CTR1(KTR_NTB, "RX: process_rxc returned %d", rc);
+
+ if ((ntb_db_read(qp->dev) & (1ull << qp->qp_num)) != 0) {
+ /* If db is set, clear it and check queue once more. */
+ ntb_db_clear(qp->dev, 1ull << qp->qp_num);
+ goto again;
+ }
+}
+
+static int
+ntb_process_rxc(struct ntb_transport_qp *qp)
+{
+ struct ntb_payload_header *hdr;
+ struct ntb_queue_entry *entry;
+ caddr_t offset;
+
+ offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index;
+ hdr = (void *)(offset + qp->rx_max_frame -
+ sizeof(struct ntb_payload_header));
+
+ CTR1(KTR_NTB, "RX: process_rxc rx_index = %u", qp->rx_index);
+ if ((hdr->flags & NTBT_DESC_DONE_FLAG) == 0) {
+ CTR0(KTR_NTB, "RX: hdr not done");
+ qp->rx_ring_empty++;
+ return (EAGAIN);
+ }
+
+ if ((hdr->flags & NTBT_LINK_DOWN_FLAG) != 0) {
+ CTR0(KTR_NTB, "RX: link down");
+ ntb_qp_link_down(qp);
+ hdr->flags = 0;
+ return (EAGAIN);
+ }
+
+ if (hdr->ver != (uint32_t)qp->rx_pkts) {
+ CTR2(KTR_NTB,"RX: ver != rx_pkts (%x != %lx). "
+ "Returning entry to rx_pend_q", hdr->ver, qp->rx_pkts);
+ qp->rx_err_ver++;
+ return (EIO);
+ }
+
+ entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q);
+ if (entry == NULL) {
+ qp->rx_err_no_buf++;
+ CTR0(KTR_NTB, "RX: No entries in rx_pend_q");
+ return (EAGAIN);
+ }
+ callout_stop(&qp->rx_full);
+ CTR1(KTR_NTB, "RX: rx entry %p from rx_pend_q", entry);
+
+ entry->x_hdr = hdr;
+ entry->index = qp->rx_index;
+
+ if (hdr->len > entry->len) {
+ CTR2(KTR_NTB, "RX: len too long. Wanted %ju got %ju",
+ (uintmax_t)hdr->len, (uintmax_t)entry->len);
+ qp->rx_err_oflow++;
+
+ entry->len = -EIO;
+ entry->flags |= NTBT_DESC_DONE_FLAG;
+
+ ntb_complete_rxc(qp);
+ } else {
+ qp->rx_bytes += hdr->len;
+ qp->rx_pkts++;
+
+ CTR1(KTR_NTB, "RX: received %ld rx_pkts", qp->rx_pkts);
+
+ entry->len = hdr->len;
+
+ ntb_memcpy_rx(qp, entry, offset);
+ }
+
+ qp->rx_index++;
+ qp->rx_index %= qp->rx_max_entry;
+ return (0);
+}
+
+static void
+ntb_memcpy_rx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry,
+ void *offset)
+{
+ struct ifnet *ifp = entry->cb_data;
+ unsigned int len = entry->len;
+
+ CTR2(KTR_NTB, "RX: copying %d bytes from offset %p", len, offset);
+
+ entry->buf = (void *)m_devget(offset, len, 0, ifp, NULL);
+ if (entry->buf == NULL)
+ entry->len = -ENOMEM;
+
+ /* Ensure that the data is globally visible before clearing the flag */
+ wmb();
+
+ CTR2(KTR_NTB, "RX: copied entry %p to mbuf %p.", entry, entry->buf);
+ ntb_rx_copy_callback(qp, entry);
+}
+
+static inline void
+ntb_rx_copy_callback(struct ntb_transport_qp *qp, void *data)
+{
+ struct ntb_queue_entry *entry;
+
+ entry = data;
+ entry->flags |= NTBT_DESC_DONE_FLAG;
+ ntb_complete_rxc(qp);
+}
+
+static void
+ntb_complete_rxc(struct ntb_transport_qp *qp)
+{
+ struct ntb_queue_entry *entry;
+ struct mbuf *m;
+ unsigned len;
+
+ CTR0(KTR_NTB, "RX: rx_completion_task");
+
+ mtx_lock_spin(&qp->ntb_rx_q_lock);
+
+ while (!STAILQ_EMPTY(&qp->rx_post_q)) {
+ entry = STAILQ_FIRST(&qp->rx_post_q);
+ if ((entry->flags & NTBT_DESC_DONE_FLAG) == 0)
+ break;
+
+ entry->x_hdr->flags = 0;
+ iowrite32(entry->index, &qp->rx_info->entry);
+
+ STAILQ_REMOVE_HEAD(&qp->rx_post_q, entry);
+
+ len = entry->len;
+ m = entry->buf;
+
+ /*
+ * Re-initialize queue_entry for reuse; rx_handler takes
+ * ownership of the mbuf.
+ */
+ entry->buf = NULL;
+ entry->len = transport_mtu;
+ entry->cb_data = qp->cb_data;
+
+ STAILQ_INSERT_TAIL(&qp->rx_pend_q, entry, entry);
+
+ mtx_unlock_spin(&qp->ntb_rx_q_lock);
+
+ CTR2(KTR_NTB, "RX: completing entry %p, mbuf %p", entry, m);
+ if (qp->rx_handler != NULL && qp->client_ready)
+ qp->rx_handler(qp, qp->cb_data, m, len);
+ else
+ m_freem(m);
+
+ mtx_lock_spin(&qp->ntb_rx_q_lock);
+ }
+
+ mtx_unlock_spin(&qp->ntb_rx_q_lock);
+}
+
+static void
+ntb_transport_doorbell_callback(void *data, uint32_t vector)
+{
+ struct ntb_transport_ctx *nt = data;
+ struct ntb_transport_qp *qp;
+ uint64_t vec_mask;
+ unsigned qp_num;
+
+ vec_mask = ntb_db_vector_mask(nt->dev, vector);
+ vec_mask &= nt->qp_bitmap;
+ if ((vec_mask & (vec_mask - 1)) != 0)
+ vec_mask &= ntb_db_read(nt->dev);
+ while (vec_mask != 0) {
+ qp_num = ffsll(vec_mask) - 1;
+
+ qp = &nt->qp_vec[qp_num];
+ if (qp->link_is_up)
+ taskqueue_enqueue(qp->rxc_tq, &qp->rxc_db_work);
+
+ vec_mask &= ~(1ull << qp_num);
+ }
+}
+
+/* Link Event handler */
+static void
+ntb_transport_event_callback(void *data)
+{
+ struct ntb_transport_ctx *nt = data;
+
+ if (ntb_link_is_up(nt->dev, NULL, NULL)) {
+ ntb_printf(1, "HW link up\n");
+ callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt);
+ } else {
+ ntb_printf(1, "HW link down\n");
+ taskqueue_enqueue(taskqueue_swi, &nt->link_cleanup);
+ }
+}
+
+/* Link bring up */
+static void
+ntb_transport_link_work(void *arg)
+{
+ struct ntb_transport_ctx *nt = arg;
+ device_t dev = nt->dev;
+ struct ntb_transport_qp *qp;
+ uint64_t val64, size;
+ uint32_t val;
+ unsigned i;
+ int rc;
+
+ /* send the local info, in the opposite order of the way we read it */
+ for (i = 0; i < nt->mw_count; i++) {
+ size = nt->mw_vec[i].phys_size;
+
+ if (max_mw_size != 0 && size > max_mw_size)
+ size = max_mw_size;
+
+ ntb_peer_spad_write(dev, NTBT_MW0_SZ_HIGH + (i * 2),
+ size >> 32);
+ ntb_peer_spad_write(dev, NTBT_MW0_SZ_LOW + (i * 2), size);
+ }
+ ntb_peer_spad_write(dev, NTBT_NUM_MWS, nt->mw_count);
+ ntb_peer_spad_write(dev, NTBT_NUM_QPS, nt->qp_count);
+ ntb_peer_spad_write(dev, NTBT_QP_LINKS, 0);
+ ntb_peer_spad_write(dev, NTBT_VERSION, NTB_TRANSPORT_VERSION);
+
+ /* Query the remote side for its info */
+ val = 0;
+ ntb_spad_read(dev, NTBT_VERSION, &val);
+ if (val != NTB_TRANSPORT_VERSION)
+ goto out;
+
+ ntb_spad_read(dev, NTBT_NUM_QPS, &val);
+ if (val != nt->qp_count)
+ goto out;
+
+ ntb_spad_read(dev, NTBT_NUM_MWS, &val);
+ if (val != nt->mw_count)
+ goto out;
+
+ for (i = 0; i < nt->mw_count; i++) {
+ ntb_spad_read(dev, NTBT_MW0_SZ_HIGH + (i * 2), &val);
+ val64 = (uint64_t)val << 32;
+
+ ntb_spad_read(dev, NTBT_MW0_SZ_LOW + (i * 2), &val);
+ val64 |= val;
+
+ rc = ntb_set_mw(nt, i, val64);
+ if (rc != 0)
+ goto free_mws;
+ }
+
+ nt->link_is_up = true;
+ ntb_printf(1, "transport link up\n");
+
+ for (i = 0; i < nt->qp_count; i++) {
+ qp = &nt->qp_vec[i];
+
+ ntb_transport_setup_qp_mw(nt, i);
+
+ if (qp->client_ready)
+ callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
+ }
+
+ return;
+
+free_mws:
+ for (i = 0; i < nt->mw_count; i++)
+ ntb_free_mw(nt, i);
+out:
+ if (ntb_link_is_up(dev, NULL, NULL))
+ callout_reset(&nt->link_work,
+ NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_transport_link_work, nt);
+}
+
+static int
+ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, size_t size)
+{
+ struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
+ size_t xlat_size, buff_size;
+ int rc;
+
+ if (size == 0)
+ return (EINVAL);
+
+ xlat_size = roundup(size, mw->xlat_align_size);
+ buff_size = xlat_size;
+
+ /* No need to re-setup */
+ if (mw->xlat_size == xlat_size)
+ return (0);
+
+ if (mw->buff_size != 0)
+ ntb_free_mw(nt, num_mw);
+
+ /* Alloc memory for receiving data. Must be aligned */
+ mw->xlat_size = xlat_size;
+ mw->buff_size = buff_size;
+
+ mw->virt_addr = contigmalloc(mw->buff_size, M_NTB_T, M_ZERO, 0,
+ mw->addr_limit, mw->xlat_align, 0);
+ if (mw->virt_addr == NULL) {
+ ntb_printf(0, "Unable to allocate MW buffer of size %zu/%zu\n",
+ mw->buff_size, mw->xlat_size);
+ mw->xlat_size = 0;
+ mw->buff_size = 0;
+ return (ENOMEM);
+ }
+ /* TODO: replace with bus_space_* functions */
+ mw->dma_addr = vtophys(mw->virt_addr);
+
+ /*
+ * Ensure that the allocation from contigmalloc is aligned as
+ * requested. XXX: This may not be needed -- brought in for parity
+ * with the Linux driver.
+ */
+ if (mw->dma_addr % mw->xlat_align != 0) {
+ ntb_printf(0,
+ "DMA memory 0x%jx not aligned to BAR size 0x%zx\n",
+ (uintmax_t)mw->dma_addr, size);
+ ntb_free_mw(nt, num_mw);
+ return (ENOMEM);
+ }
+
+ /* Notify HW the memory location of the receive buffer */
+ rc = ntb_mw_set_trans(nt->dev, num_mw, mw->dma_addr, mw->xlat_size);
+ if (rc) {
+ ntb_printf(0, "Unable to set mw%d translation\n", num_mw);
+ ntb_free_mw(nt, num_mw);
+ return (rc);
+ }
+
+ return (0);
+}
+
+static void
+ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
+{
+ struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
+
+ if (mw->virt_addr == NULL)
+ return;
+
+ ntb_mw_clear_trans(nt->dev, num_mw);
+ contigfree(mw->virt_addr, mw->xlat_size, M_NTB_T);
+ mw->xlat_size = 0;
+ mw->buff_size = 0;
+ mw->virt_addr = NULL;
+}
+
+static int
+ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, unsigned int qp_num)
+{
+ struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+ struct ntb_transport_mw *mw;
+ void *offset;
+ ntb_q_idx_t i;
+ size_t rx_size;
+ unsigned num_qps_mw, mw_num, mw_count;
+
+ mw_count = nt->mw_count;
+ mw_num = QP_TO_MW(nt, qp_num);
+ mw = &nt->mw_vec[mw_num];
+
+ if (mw->virt_addr == NULL)
+ return (ENOMEM);
+
+ if (mw_num < nt->qp_count % mw_count)
+ num_qps_mw = nt->qp_count / mw_count + 1;
+ else
+ num_qps_mw = nt->qp_count / mw_count;
+
+ rx_size = mw->xlat_size / num_qps_mw;
+ qp->rx_buff = mw->virt_addr + rx_size * (qp_num / mw_count);
+ rx_size -= sizeof(struct ntb_rx_info);
+
+ qp->remote_rx_info = (void*)(qp->rx_buff + rx_size);
+
+ /* Due to house-keeping, there must be at least 2 buffs */
+ qp->rx_max_frame = qmin(transport_mtu, rx_size / 2);
+ qp->rx_max_entry = rx_size / qp->rx_max_frame;
+ qp->rx_index = 0;
+
+ qp->remote_rx_info->entry = qp->rx_max_entry - 1;
+
+ /* Set up the hdr offsets with 0s */
+ for (i = 0; i < qp->rx_max_entry; i++) {
+ offset = (void *)(qp->rx_buff + qp->rx_max_frame * (i + 1) -
+ sizeof(struct ntb_payload_header));
+ memset(offset, 0, sizeof(struct ntb_payload_header));
+ }
+
+ qp->rx_pkts = 0;
+ qp->tx_pkts = 0;
+ qp->tx_index = 0;
+
+ return (0);
+}
+
+static void
+ntb_qp_link_work(void *arg)
+{
+ struct ntb_transport_qp *qp = arg;
+ device_t dev = qp->dev;
+ struct ntb_transport_ctx *nt = qp->transport;
+ int i;
+ uint32_t val;
+
+ /* Report queues that are up on our side */
+ for (i = 0, val = 0; i < nt->qp_count; i++) {
+ if (nt->qp_vec[i].client_ready)
+ val |= (1 << i);
+ }
+ ntb_peer_spad_write(dev, NTBT_QP_LINKS, val);
+
+ /* See if the remote side is up */
+ ntb_spad_read(dev, NTBT_QP_LINKS, &val);
+ if ((val & (1ull << qp->qp_num)) != 0) {
+ ntb_printf(2, "qp %d link up\n", qp->qp_num);
+ qp->link_is_up = true;
+
+ if (qp->event_handler != NULL)
+ qp->event_handler(qp->cb_data, NTB_LINK_UP);
+
+ ntb_db_clear_mask(dev, 1ull << qp->qp_num);
+ } else if (nt->link_is_up)
+ callout_reset(&qp->link_work,
+ NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp);
+}
+
+/* Link down event*/
+static void
+ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
+{
+ struct ntb_transport_qp *qp;
+ int i;
+
+ /* Pass along the info to any clients */
+ for (i = 0; i < nt->qp_count; i++) {
+ if ((nt->qp_bitmap & (1 << i)) != 0) {
+ qp = &nt->qp_vec[i];
+ ntb_qp_link_cleanup(qp);
+ callout_drain(&qp->link_work);
+ }
+ }
+
+ if (!nt->link_is_up)
+ callout_drain(&nt->link_work);
+
+ /*
+ * The scratchpad registers keep the values if the remote side
+ * goes down, blast them now to give them a sane value the next
+ * time they are accessed
+ */
+ ntb_spad_clear(nt->dev);
+}
+
+static void
+ntb_transport_link_cleanup_work(void *arg, int pending __unused)
+{
+
+ ntb_transport_link_cleanup(arg);
+}
+
+static void
+ntb_qp_link_down(struct ntb_transport_qp *qp)
+{
+
+ ntb_qp_link_cleanup(qp);
+}
+
+static void
+ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
+{
+
+ qp->link_is_up = false;
+ ntb_db_set_mask(qp->dev, 1ull << qp->qp_num);
+
+ qp->tx_index = qp->rx_index = 0;
+ qp->tx_bytes = qp->rx_bytes = 0;
+ qp->tx_pkts = qp->rx_pkts = 0;
+
+ qp->rx_ring_empty = 0;
+ qp->tx_ring_full = 0;
+
+ qp->rx_err_no_buf = qp->tx_err_no_buf = 0;
+ qp->rx_err_oflow = qp->rx_err_ver = 0;
+}
+
+static void
+ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
+{
+
+ callout_drain(&qp->link_work);
+ ntb_qp_link_down_reset(qp);
+
+ if (qp->event_handler != NULL)
+ qp->event_handler(qp->cb_data, NTB_LINK_DOWN);
+}
+
+/* Link commanded down */
+/**
+ * ntb_transport_link_down - Notify NTB transport to no longer enqueue data
+ * @qp: NTB transport layer queue to be disabled
+ *
+ * Notify NTB transport layer of client's desire to no longer receive data on
+ * transport queue specified. It is the client's responsibility to ensure all
+ * entries on queue are purged or otherwise handled appropriately.
+ */
+void
+ntb_transport_link_down(struct ntb_transport_qp *qp)
+{
+ struct ntb_transport_ctx *nt = qp->transport;
+ int i;
+ uint32_t val;
+
+ qp->client_ready = false;
+ for (i = 0, val = 0; i < nt->qp_count; i++) {
+ if (nt->qp_vec[i].client_ready)
+ val |= (1 << i);
+ }
+ ntb_peer_spad_write(qp->dev, NTBT_QP_LINKS, val);
+
+ if (qp->link_is_up)
+ ntb_send_link_down(qp);
+ else
+ callout_drain(&qp->link_work);
+}
+
+/**
+ * ntb_transport_link_query - Query transport link state
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query connectivity to the remote system of the NTB transport queue
+ *
+ * RETURNS: true for link up or false for link down
+ */
+bool
+ntb_transport_link_query(struct ntb_transport_qp *qp)
+{
+
+ return (qp->link_is_up);
+}
+
+static void
+ntb_send_link_down(struct ntb_transport_qp *qp)
+{
+ struct ntb_queue_entry *entry;
+ int i, rc;
+
+ if (!qp->link_is_up)
+ return;
+
+ for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) {
+ entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
+ if (entry != NULL)
+ break;
+ pause("NTB Wait for link down", hz / 10);
+ }
+
+ if (entry == NULL)
+ return;
+
+ entry->cb_data = NULL;
+ entry->buf = NULL;
+ entry->len = 0;
+ entry->flags = NTBT_LINK_DOWN_FLAG;
+
+ mtx_lock(&qp->tx_lock);
+ rc = ntb_process_tx(qp, entry);
+ mtx_unlock(&qp->tx_lock);
+ if (rc != 0)
+ printf("ntb: Failed to send link down\n");
+
+ ntb_qp_link_down_reset(qp);
+}
+
+
+/* List Management */
+
+static void
+ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
+ struct ntb_queue_list *list)
+{
+
+ mtx_lock_spin(lock);
+ STAILQ_INSERT_TAIL(list, entry, entry);
+ mtx_unlock_spin(lock);
+}
+
+static struct ntb_queue_entry *
+ntb_list_rm(struct mtx *lock, struct ntb_queue_list *list)
+{
+ struct ntb_queue_entry *entry;
+
+ mtx_lock_spin(lock);
+ if (STAILQ_EMPTY(list)) {
+ entry = NULL;
+ goto out;
+ }
+ entry = STAILQ_FIRST(list);
+ STAILQ_REMOVE_HEAD(list, entry);
+out:
+ mtx_unlock_spin(lock);
+
+ return (entry);
+}
+
+static struct ntb_queue_entry *
+ntb_list_mv(struct mtx *lock, struct ntb_queue_list *from,
+ struct ntb_queue_list *to)
+{
+ struct ntb_queue_entry *entry;
+
+ mtx_lock_spin(lock);
+ if (STAILQ_EMPTY(from)) {
+ entry = NULL;
+ goto out;
+ }
+ entry = STAILQ_FIRST(from);
+ STAILQ_REMOVE_HEAD(from, entry);
+ STAILQ_INSERT_TAIL(to, entry, entry);
+
+out:
+ mtx_unlock_spin(lock);
+ return (entry);
+}
+
+/**
+ * ntb_transport_qp_num - Query the qp number
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query qp number of the NTB transport queue
+ *
+ * RETURNS: a zero based number specifying the qp number
+ */
+unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp)
+{
+
+ return (qp->qp_num);
+}
+
+/**
+ * ntb_transport_max_size - Query the max payload size of a qp
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query the maximum payload size permissible on the given qp
+ *
+ * RETURNS: the max payload size of a qp
+ */
+unsigned int
+ntb_transport_max_size(struct ntb_transport_qp *qp)
+{
+
+ return (qp->tx_max_frame - sizeof(struct ntb_payload_header));
+}
+
+unsigned int
+ntb_transport_tx_free_entry(struct ntb_transport_qp *qp)
+{
+ unsigned int head = qp->tx_index;
+ unsigned int tail = qp->remote_rx_info->entry;
+
+ return (tail >= head ? tail - head : qp->tx_max_entry + tail - head);
+}
+
+static device_method_t ntb_transport_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, ntb_transport_probe),
+ DEVMETHOD(device_attach, ntb_transport_attach),
+ DEVMETHOD(device_detach, ntb_transport_detach),
+ DEVMETHOD_END
+};
+
+devclass_t ntb_transport_devclass;
+static DEFINE_CLASS_0(ntb_transport, ntb_transport_driver,
+ ntb_transport_methods, sizeof(struct ntb_transport_ctx));
+DRIVER_MODULE(ntb_transport, ntb_hw, ntb_transport_driver,
+ ntb_transport_devclass, NULL, NULL);
+MODULE_DEPEND(ntb_transport, ntb, 1, 1, 1);
+MODULE_VERSION(ntb_transport, 1);
diff --git a/sys/dev/ntb/ntb_transport.h b/sys/dev/ntb/ntb_transport.h
new file mode 100644
index 0000000..63cdbce
--- /dev/null
+++ b/sys/dev/ntb/ntb_transport.h
@@ -0,0 +1,61 @@
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+struct ntb_transport_qp;
+
+extern devclass_t ntb_transport_devclass;
+
+enum ntb_link_event {
+ NTB_LINK_DOWN = 0,
+ NTB_LINK_UP,
+};
+
+struct ntb_queue_handlers {
+ void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+ void *data, int len);
+ void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+ void *data, int len);
+ void (*event_handler)(void *data, enum ntb_link_event status);
+};
+
+int ntb_transport_queue_count(device_t dev);
+struct ntb_transport_qp *
+ntb_transport_create_queue(device_t dev, int q,
+ const struct ntb_queue_handlers *handlers, void *data);
+void ntb_transport_free_queue(struct ntb_transport_qp *qp);
+unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp);
+unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp);
+int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+ unsigned int len);
+int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+ unsigned int len);
+void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len);
+void ntb_transport_link_up(struct ntb_transport_qp *qp);
+void ntb_transport_link_down(struct ntb_transport_qp *qp);
+bool ntb_transport_link_query(struct ntb_transport_qp *qp);
+unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp);
diff --git a/sys/dev/pci/pci_pci.c b/sys/dev/pci/pci_pci.c
index 6e8d076..dedc55a 100644
--- a/sys/dev/pci/pci_pci.c
+++ b/sys/dev/pci/pci_pci.c
@@ -918,6 +918,7 @@ static void
pcib_probe_hotplug(struct pcib_softc *sc)
{
device_t dev;
+ uint16_t link_sta, slot_sta;
if (!pci_enable_pcie_hp)
return;
@@ -932,15 +933,29 @@ pcib_probe_hotplug(struct pcib_softc *sc)
sc->pcie_link_cap = pcie_read_config(dev, PCIER_LINK_CAP, 4);
sc->pcie_slot_cap = pcie_read_config(dev, PCIER_SLOT_CAP, 4);
+ if ((sc->pcie_slot_cap & PCIEM_SLOT_CAP_HPC) == 0)
+ return;
+
/*
- * XXX: Handling of slots with a power controller needs to be
- * reexamined. Ignore hotplug on such slots for now.
+ * Some devices report that they have an MRL when they actually
+ * do not. Since they always report that the MRL is open, child
+ * devices would be ignored. Try to detect these devices and
+ * ignore their claim of HotPlug support.
+ *
+ * If there is an open MRL but the Data Link Layer is active,
+ * the MRL is not real.
*/
- if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_PCP)
- return;
-
- if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_HPC)
- sc->flags |= PCIB_HOTPLUG;
+ if ((sc->pcie_slot_cap & PCIEM_SLOT_CAP_MRLSP) != 0 &&
+ (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) != 0) {
+ link_sta = pcie_read_config(dev, PCIER_LINK_STA, 2);
+ slot_sta = pcie_read_config(dev, PCIER_SLOT_STA, 2);
+ if ((slot_sta & PCIEM_SLOT_STA_MRLSS) != 0 &&
+ (link_sta & PCIEM_LINK_STA_DL_ACTIVE) != 0) {
+ return;
+ }
+ }
+
+ sc->flags |= PCIB_HOTPLUG;
}
/*
@@ -966,6 +981,8 @@ pcib_pcie_hotplug_command(struct pcib_softc *sc, uint16_t val, uint16_t mask)
new = (ctl & ~mask) | val;
if (new == ctl)
return;
+ if (bootverbose)
+ device_printf(dev, "HotPlug command: %04x -> %04x\n", ctl, new);
pcie_write_config(dev, PCIER_SLOT_CTL, new, 2);
if (!(sc->pcie_slot_cap & PCIEM_SLOT_CAP_NCCS) &&
(ctl & new) & PCIEM_SLOT_CTL_CCIE) {
@@ -1028,9 +1045,6 @@ pcib_hotplug_inserted(struct pcib_softc *sc)
static int
pcib_hotplug_present(struct pcib_softc *sc)
{
- device_t dev;
-
- dev = sc->dev;
/* Card must be inserted. */
if (!pcib_hotplug_inserted(sc))
@@ -1059,7 +1073,7 @@ pcib_pcie_hotplug_update(struct pcib_softc *sc, uint16_t val, uint16_t mask,
{
bool card_inserted, ei_engaged;
- /* Clear DETACHING if Present Detect has cleared. */
+ /* Clear DETACHING if Presence Detect has cleared. */
if ((sc->pcie_slot_sta & (PCIEM_SLOT_STA_PDC | PCIEM_SLOT_STA_PDS)) ==
PCIEM_SLOT_STA_PDC)
sc->flags &= ~PCIB_DETACHING;
@@ -1101,14 +1115,15 @@ pcib_pcie_hotplug_update(struct pcib_softc *sc, uint16_t val, uint16_t mask,
/*
* Start a timer to see if the Data Link Layer times out.
- * Note that we only start the timer if Presence Detect
+ * Note that we only start the timer if Presence Detect or MRL Sensor
* changed on this interrupt. Stop any scheduled timer if
* the Data Link Layer is active.
*/
if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) {
if (card_inserted &&
!(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE) &&
- sc->pcie_slot_sta & PCIEM_SLOT_STA_PDC) {
+ sc->pcie_slot_sta &
+ (PCIEM_SLOT_STA_MRLSC | PCIEM_SLOT_STA_PDC)) {
if (cold)
device_printf(sc->dev,
"Data Link Layer inactive\n");
@@ -1144,6 +1159,10 @@ pcib_pcie_intr(void *arg)
/* Clear the events just reported. */
pcie_write_config(dev, PCIER_SLOT_STA, sc->pcie_slot_sta, 2);
+ if (bootverbose)
+ device_printf(dev, "HotPlug interrupt: %#x\n",
+ sc->pcie_slot_sta);
+
if (sc->pcie_slot_sta & PCIEM_SLOT_STA_ABP) {
if (sc->flags & PCIB_DETACH_PENDING) {
device_printf(dev,
@@ -1165,7 +1184,7 @@ pcib_pcie_intr(void *arg)
sc->pcie_slot_sta & PCIEM_SLOT_STA_MRLSS ? "open" :
"closed");
if (bootverbose && sc->pcie_slot_sta & PCIEM_SLOT_STA_PDC)
- device_printf(dev, "Present Detect Changed to %s\n",
+ device_printf(dev, "Presence Detect Changed to %s\n",
sc->pcie_slot_sta & PCIEM_SLOT_STA_PDS ? "card present" :
"empty");
if (sc->pcie_slot_sta & PCIEM_SLOT_STA_CC)
@@ -1234,7 +1253,7 @@ pcib_pcie_cc_timeout(void *arg)
sta = pcie_read_config(dev, PCIER_SLOT_STA, 2);
if (!(sta & PCIEM_SLOT_STA_CC)) {
device_printf(dev,
- "Hotplug Command Timed Out - forcing detach\n");
+ "HotPlug Command Timed Out - forcing detach\n");
sc->flags &= ~(PCIB_HOTPLUG_CMD_PENDING | PCIB_DETACH_PENDING);
sc->flags |= PCIB_DETACHING;
pcib_pcie_hotplug_update(sc, 0, 0, true);
OpenPOWER on IntegriCloud