66 files changed, 5261 insertions, 3257 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 6062bfd..f87d3b5 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1224,7 +1224,7 @@ pmap_init(void)
 	 * include at least one feature that is only supported by older Intel
 	 * or newer AMD processors.
 	 */
-	if (vm_guest == VM_GUEST_VM && (cpu_feature & CPUID_SS) == 0 &&
+	if (vm_guest != VM_GUEST_NO && (cpu_feature & CPUID_SS) == 0 &&
 	    (cpu_feature2 & (CPUID2_SSSE3 | CPUID2_SSE41 | CPUID2_AESNI |
 	    CPUID2_AVX | CPUID2_XSAVE)) == 0 && (amd_feature2 & (AMDID2_XOP |
 	    AMDID2_FMA4)) == 0)
diff --git a/sys/arm/allwinner/a10_ehci.c b/sys/arm/allwinner/a10_ehci.c
index 95e5990..3ad3d98 100644
--- a/sys/arm/allwinner/a10_ehci.c
+++ b/sys/arm/allwinner/a10_ehci.c
@@ -360,7 +360,7 @@ static device_method_t ehci_methods[] = {
 static driver_t ehci_driver = {
 	.name = "ehci",
 	.methods = ehci_methods,
-	.size = sizeof(ehci_softc_t),
+	.size = sizeof(struct aw_ehci_softc),
 };
 
 static devclass_t ehci_devclass;
diff --git a/sys/boot/efi/loader/Makefile b/sys/boot/efi/loader/Makefile
index 6b1b4ac..2f54032 100644
--- a/sys/boot/efi/loader/Makefile
+++ b/sys/boot/efi/loader/Makefile
@@ -63,6 +63,18 @@ CFLAGS+=	-DNO_PCI -DEFI
 LIBSTAND=	${.OBJDIR}/../../../../lib/libstand/libstand.a
 .endif
 
+.if !defined(BOOT_HIDE_SERIAL_NUMBERS)
+# Export serial numbers, UUID, and asset tag from loader.
+CFLAGS+= -DSMBIOS_SERIAL_NUMBERS
+.if defined(BOOT_LITTLE_ENDIAN_UUID)
+# Use little-endian UUID format as defined in SMBIOS 2.6.
+CFLAGS+= -DSMBIOS_LITTLE_ENDIAN_UUID
+.elif defined(BOOT_NETWORK_ENDIAN_UUID)
+# Use network-endian UUID format for backward compatibility.
+CFLAGS+= -DSMBIOS_NETWORK_ENDIAN_UUID
+.endif
+.endif
+
 .if ${MK_FORTH} != "no"
 BOOT_FORTH=	yes
 CFLAGS+=	-DBOOT_FORTH
diff --git a/sys/conf/files b/sys/conf/files
index 58bc490..448060c 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3345,6 +3345,7 @@ kern/subr_disk.c		standard
 kern/subr_eventhandler.c	standard
 kern/subr_fattime.c		standard
 kern/subr_firmware.c		optional firmware
+kern/subr_gtaskqueue.c		standard
 kern/subr_hash.c		standard
 kern/subr_hints.c		standard
 kern/subr_kdb.c			standard
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index defd5ac..186ea7d 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -287,7 +287,10 @@ dev/hyperv/vmbus/amd64/hyperv_machdep.c			optional	hyperv
 dev/hyperv/vmbus/amd64/vmbus_vector.S			optional	hyperv
 dev/nfe/if_nfe.c		optional	nfe pci
 dev/ntb/if_ntb/if_ntb.c		optional	if_ntb
-dev/ntb/ntb_hw/ntb_hw.c		optional	if_ntb | ntb_hw
+dev/ntb/ntb_transport.c		optional	if_ntb
+dev/ntb/ntb.c			optional	if_ntb | ntb_hw
+dev/ntb/ntb_if.m		optional	if_ntb | ntb_hw
+dev/ntb/ntb_hw/ntb_hw.c		optional	ntb_hw
 dev/nvd/nvd.c			optional	nvd nvme
 dev/nvme/nvme.c			optional	nvme
 dev/nvme/nvme_ctrlr.c		optional	nvme
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index bf6f1f0..85df7d0 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -277,7 +277,10 @@ dev/mse/mse.c			optional mse
 dev/mse/mse_isa.c		optional mse isa
 dev/nfe/if_nfe.c		optional nfe pci
 dev/ntb/if_ntb/if_ntb.c		optional if_ntb
-dev/ntb/ntb_hw/ntb_hw.c		optional if_ntb | ntb_hw
+dev/ntb/ntb_transport.c		optional if_ntb
+dev/ntb/ntb.c			optional if_ntb | ntb_hw
+dev/ntb/ntb_if.m		optional if_ntb | ntb_hw
+dev/ntb/ntb_hw/ntb_hw.c		optional ntb_hw
 dev/nvd/nvd.c			optional nvd nvme
 dev/nvme/nvme.c			optional nvme
 dev/nvme/nvme_ctrlr.c		optional nvme
diff --git a/sys/conf/kern.post.mk b/sys/conf/kern.post.mk
index 4cb60c0..f44645c 100644
--- a/sys/conf/kern.post.mk
+++ b/sys/conf/kern.post.mk
@@ -65,6 +65,10 @@ OSRELDATE!=	awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \
 # Keep the related ports builds in the obj directory so that they are only rebuilt once per kernel build
 WRKDIRPREFIX?=	${MAKEOBJDIRPREFIX}${SRC_BASE}/sys/${KERNCONF}
 PORTSMODULESENV=\
+	env \
+	-u CC \
+	-u CXX \
+	-u CPP \
 	PATH=${PATH}:${LOCALBASE}/bin:${LOCALBASE}/sbin \
 	SRC_BASE=${SRC_BASE} \
 	OSVERSION=${OSRELDATE} \
@@ -192,12 +196,13 @@ ${SYSTEM_OBJS} genassym.o vers.o: opt_global.h
 .if !empty(.MAKE.MODE:Unormal:Mmeta) && empty(.MAKE.MODE:Unormal:Mnofilemon)
 _meta_filemon=	1
 .endif
-# Skip reading .depend when not needed to speed up tree-walks
-# and simple lookups.
+# Skip reading .depend when not needed to speed up tree-walks and simple
+# lookups.  For install, only do this if no other targets are specified.
 # Also skip generating or including .depend.* files if in meta+filemon mode
 # since it will track dependencies itself.  OBJS_DEPEND_GUESS is still used.
 .if !empty(.MAKEFLAGS:M-V${_V_READ_DEPEND}) || make(obj) || make(clean*) || \
-    make(install*) || make(kernel-obj) || make(kernel-clean*) || \
+    ${.TARGETS:M*install*} == ${.TARGETS} || \
+    make(kernel-obj) || make(kernel-clean*) || \
     make(kernel-install*) || defined(_meta_filemon)
 _SKIP_READ_DEPEND=	1
 .MAKE.DEPENDFILE=	/dev/null
diff --git a/sys/dev/ahci/ahci.c b/sys/dev/ahci/ahci.c
index ec42b3c..ee0e957 100644
--- a/sys/dev/ahci/ahci.c
+++ b/sys/dev/ahci/ahci.c
@@ -416,7 +416,8 @@ ahci_setup_interrupt(device_t dev)
 		else if (ctlr->numirqs == 1 || i >= ctlr->channels ||
 		    (ctlr->ccc && i == ctlr->cccv))
 			ctlr->irqs[i].mode = AHCI_IRQ_MODE_ALL;
-		else if (i == ctlr->numirqs - 1)
+		else if (ctlr->channels > ctlr->numirqs &&
+		    i == ctlr->numirqs - 1)
 			ctlr->irqs[i].mode = AHCI_IRQ_MODE_AFTER;
 		else
 			ctlr->irqs[i].mode = AHCI_IRQ_MODE_ONE;
@@ -465,6 +466,7 @@ ahci_intr(void *data)
 	} else {	/* AHCI_IRQ_MODE_AFTER */
 		unit = irq->r_irq_rid - 1;
 		is = ATA_INL(ctlr->r_mem, AHCI_IS);
+		is &= (0xffffffff << unit);
 	}
 	/* CCC interrupt is edge triggered. */
 	if (ctlr->ccc)
diff --git a/sys/dev/ahci/ahci_pci.c b/sys/dev/ahci/ahci_pci.c
index 6b6a1a6..4a44484 100644
--- a/sys/dev/ahci/ahci_pci.c
+++ b/sys/dev/ahci/ahci_pci.c
@@ -187,7 +187,7 @@ static const struct {
 	{0xa10f8086, 0x00, "Intel Sunrise Point (RAID)",	0},
 	{0x23238086, 0x00, "Intel DH89xxCC",	0},
 	{0x2360197b, 0x00, "JMicron JMB360",	0},
-	{0x2361197b, 0x00, "JMicron JMB361",	AHCI_Q_NOFORCE},
+	{0x2361197b, 0x00, "JMicron JMB361",	AHCI_Q_NOFORCE | AHCI_Q_1CH},
 	{0x2362197b, 0x00, "JMicron JMB362",	0},
 	{0x2363197b, 0x00, "JMicron JMB363",	AHCI_Q_NOFORCE},
 	{0x2365197b, 0x00, "JMicron JMB365",	AHCI_Q_NOFORCE},
diff --git a/sys/dev/e1000/e1000_api.c b/sys/dev/e1000/e1000_api.c
index 28379cc..52e2609 100644
--- a/sys/dev/e1000/e1000_api.c
+++ b/sys/dev/e1000/e1000_api.c
@@ -304,6 +304,10 @@ s32 e1000_set_mac_type(struct e1000_hw *hw)
 	case E1000_DEV_ID_PCH_SPT_I219_LM2:
 	case E1000_DEV_ID_PCH_SPT_I219_V2:
 	case E1000_DEV_ID_PCH_LBG_I219_LM3:
+	case E1000_DEV_ID_PCH_SPT_I219_LM4:
+	case E1000_DEV_ID_PCH_SPT_I219_V4:
+	case E1000_DEV_ID_PCH_SPT_I219_LM5:
+	case E1000_DEV_ID_PCH_SPT_I219_V5:
 		mac->type = e1000_pch_spt;
 		break;
 	case E1000_DEV_ID_82575EB_COPPER:
diff --git a/sys/dev/e1000/e1000_hw.h b/sys/dev/e1000/e1000_hw.h
index 1792e14..e1464a7 100644
--- a/sys/dev/e1000/e1000_hw.h
+++ b/sys/dev/e1000/e1000_hw.h
@@ -142,6 +142,10 @@ struct e1000_hw;
 #define E1000_DEV_ID_PCH_SPT_I219_LM2		0x15B7 /* Sunrise Point-H PCH */
 #define E1000_DEV_ID_PCH_SPT_I219_V2		0x15B8 /* Sunrise Point-H PCH */
 #define E1000_DEV_ID_PCH_LBG_I219_LM3		0x15B9 /* LEWISBURG PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_LM4		0x15D7
+#define E1000_DEV_ID_PCH_SPT_I219_V4		0x15D8
+#define E1000_DEV_ID_PCH_SPT_I219_LM5		0x15E3
+#define E1000_DEV_ID_PCH_SPT_I219_V5		0x15D6
 #define E1000_DEV_ID_82576			0x10C9
 #define E1000_DEV_ID_82576_FIBER		0x10E6
 #define E1000_DEV_ID_82576_SERDES		0x10E7
@@ -957,9 +961,13 @@ struct e1000_dev_spec_ich8lan {
 	E1000_MUTEX nvm_mutex;
 	E1000_MUTEX swflag_mutex;
 	bool nvm_k1_enabled;
+	bool disable_k1_off;
 	bool eee_disable;
 	u16 eee_lp_ability;
 	enum e1000_ulp_state ulp_state;
+	bool ulp_capability_disabled;
+	bool during_suspend_flow;
+	bool during_dpg_exit;
 };
 
 struct e1000_dev_spec_82575 {
diff --git a/sys/dev/e1000/e1000_ich8lan.c b/sys/dev/e1000/e1000_ich8lan.c
index ae97a8c..4c50ce2 100644
--- a/sys/dev/e1000/e1000_ich8lan.c
+++ b/sys/dev/e1000/e1000_ich8lan.c
@@ -288,7 +288,7 @@ static void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw)
 	mac_reg &= ~E1000_CTRL_LANPHYPC_VALUE;
 	E1000_WRITE_REG(hw, E1000_CTRL, mac_reg);
 	E1000_WRITE_FLUSH(hw);
-	usec_delay(10);
+	msec_delay(1);
 	mac_reg &= ~E1000_CTRL_LANPHYPC_OVERRIDE;
 	E1000_WRITE_REG(hw, E1000_CTRL, mac_reg);
 	E1000_WRITE_FLUSH(hw);
@@ -1625,7 +1625,17 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 			hw->phy.ops.write_reg_locked(hw,
 						     I217_PLL_CLOCK_GATE_REG,
 						     phy_reg);
-		}
+
+			if (speed == SPEED_1000) {
+				hw->phy.ops.read_reg_locked(hw, HV_PM_CTRL,
+							    &phy_reg);
+
+				phy_reg |= HV_PM_CTRL_K1_CLK_REQ;
+
+				hw->phy.ops.write_reg_locked(hw, HV_PM_CTRL,
+							     phy_reg);
+				}
+		 }
 		hw->phy.ops.release(hw);
 
 		if (ret_val)
@@ -1718,7 +1728,8 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 		u32 pcieanacfg = E1000_READ_REG(hw, E1000_PCIEANACFG);
 		u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6);
 
-		if (pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE)
+		if ((pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE) &&
+			(hw->dev_spec.ich8lan.disable_k1_off == FALSE))
 			fextnvm6 |= E1000_FEXTNVM6_K1_OFF_ENABLE;
 		else
 			fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE;
diff --git a/sys/dev/e1000/e1000_ich8lan.h b/sys/dev/e1000/e1000_ich8lan.h
index edc1dd1..6d81291 100644
--- a/sys/dev/e1000/e1000_ich8lan.h
+++ b/sys/dev/e1000/e1000_ich8lan.h
@@ -239,7 +239,7 @@
 
 /* PHY Power Management Control */
 #define HV_PM_CTRL		PHY_REG(770, 17)
-#define HV_PM_CTRL_PLL_STOP_IN_K1_GIGA	0x100
+#define HV_PM_CTRL_K1_CLK_REQ		0x200
 #define HV_PM_CTRL_K1_ENABLE		0x4000
 
 #define I217_PLL_CLOCK_GATE_REG	PHY_REG(772, 28)
diff --git a/sys/dev/e1000/e1000_phy.c b/sys/dev/e1000/e1000_phy.c
index 847d315..9684b43 100644
--- a/sys/dev/e1000/e1000_phy.c
+++ b/sys/dev/e1000/e1000_phy.c
@@ -4146,12 +4146,13 @@ s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data)
 	*data = E1000_READ_REG(hw, E1000_MPHY_DATA);
 
 	/* Disable access to mPHY if it was originally disabled */
-	if (locked) {
+	if (locked)
 		ready = e1000_is_mphy_ready(hw);
-		if (!ready)
-			return -E1000_ERR_PHY;
-	}
-	E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, E1000_MPHY_DIS_ACCESS);
+	if (!ready)
+		return -E1000_ERR_PHY;
+	E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
+			E1000_MPHY_DIS_ACCESS);
+
 	return E1000_SUCCESS;
 }
 
@@ -4210,12 +4211,13 @@ s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data,
 	E1000_WRITE_REG(hw, E1000_MPHY_DATA, data);
 
 	/* Disable access to mPHY if it was originally disabled */
-	if (locked) {
+	if (locked)
 		ready = e1000_is_mphy_ready(hw);
-		if (!ready)
-			return -E1000_ERR_PHY;
-	}
-	E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, E1000_MPHY_DIS_ACCESS);
+	if (!ready)
+		return -E1000_ERR_PHY;
+	E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
+			E1000_MPHY_DIS_ACCESS);
+
 	return E1000_SUCCESS;
 }
 
diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c
index b8e9750..7e2690e 100644
--- a/sys/dev/e1000/if_em.c
+++ b/sys/dev/e1000/if_em.c
@@ -193,6 +193,12 @@ static em_vendor_info_t em_vendor_info_array[] =
 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
+	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
+						PCI_ANY_ID, PCI_ANY_ID, 0},
+	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
+	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
+						PCI_ANY_ID, PCI_ANY_ID, 0},
+	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
 	/* required last entry */
 	{ 0, 0, 0, 0, 0}
 };
diff --git a/sys/dev/filemon/filemon.c b/sys/dev/filemon/filemon.c
index 919af9d..26e1bc3 100644
--- a/sys/dev/filemon/filemon.c
+++ b/sys/dev/filemon/filemon.c
@@ -137,6 +137,8 @@ filemon_proc_get(struct proc *p)
 {
 	struct filemon *filemon;
 
+	if (p->p_filemon == NULL)
+		return (NULL);
 	PROC_LOCK(p);
 	filemon = filemon_acquire(p->p_filemon);
 	PROC_UNLOCK(p);
diff --git a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
index 8f3efef..b79e10c 100644
--- a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
+++ b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
@@ -810,6 +810,7 @@ hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
 	 * because the fields will be used later in storvsc_io_done().
 	 */
 	request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
+	request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status;
 	request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
 
 	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
@@ -1945,28 +1946,6 @@ create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
 	return(0);
 }
 
-/*
- * SCSI Inquiry checks qualifier and type.
- * If qualifier is 011b, means the device server is not capable
- * of supporting a peripheral device on this logical unit, and
- * the type should be set to 1Fh.
- * 
- * Return 1 if it is valid, 0 otherwise.
- */
-static inline int
-is_inquiry_valid(const struct scsi_inquiry_data *inq_data)
-{
-	uint8_t type;
-	if (SID_QUAL(inq_data) != SID_QUAL_LU_CONNECTED) {
-		return (0);
-	}
-	type = SID_TYPE(inq_data);
-	if (type == T_NODEVICE) {
-		return (0);
-	}
-	return (1);
-}
-
 /**
  * @brief completion function before returning to CAM
  *
@@ -1985,7 +1964,6 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
 	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
 	bus_dma_segment_t *ori_sglist = NULL;
 	int ori_sg_count = 0;
-
 	/* destroy bounce buffer if it is used */
 	if (reqp->bounce_sgl_count) {
 		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
@@ -2040,88 +2018,71 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
 		const struct scsi_generic *cmd;
-		/*
-		 * Check whether the data for INQUIRY cmd is valid or
-		 * not.  Windows 10 and Windows 2016 send all zero
-		 * inquiry data to VM even for unpopulated slots.
-		 */
+
+		if (vm_srb->srb_status != SRB_STATUS_SUCCESS) {
+			if (vm_srb->srb_status == SRB_STATUS_INVALID_LUN) {
+				xpt_print(ccb->ccb_h.path, "invalid LUN %d\n",
+				    vm_srb->lun);
+			} else {
+				xpt_print(ccb->ccb_h.path, "Unknown SRB flag: %d\n",
+				    vm_srb->srb_status);
+			}
+			/*
+			 * If there are errors, for example, invalid LUN,
+			 * host will inform VM through SRB status.
+			 */
+			ccb->ccb_h.status |= CAM_SEL_TIMEOUT;
+		} else {
+			ccb->ccb_h.status |= CAM_REQ_CMP;
+		}
+
 		cmd = (const struct scsi_generic *)
 		    ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
 		     csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
 		if (cmd->opcode == INQUIRY) {
-		    /*
-		     * The host of Windows 10 or 2016 server will response
-		     * the inquiry request with invalid data for unexisted device:
-			[0x7f 0x0 0x5 0x2 0x1f ... ]
-		     * But on windows 2012 R2, the response is:
-			[0x7f 0x0 0x0 0x0 0x0 ]
-		     * That is why here wants to validate the inquiry response.
-		     * The validation will skip the INQUIRY whose response is short,
-		     * which is less than SHORT_INQUIRY_LENGTH (36).
-		     *
-		     * For more information about INQUIRY, please refer to:
-		     *  ftp://ftp.avc-pioneer.com/Mtfuji_7/Proposal/Jun09/INQUIRY.pdf
-		     */
-		    struct scsi_inquiry_data *inq_data =
-			(struct scsi_inquiry_data *)csio->data_ptr;
-		    uint8_t* resp_buf = (uint8_t*)csio->data_ptr;
-		    /* Get the buffer length reported by host */
-		    int resp_xfer_len = vm_srb->transfer_len;
-		    /* Get the available buffer length */
-		    int resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
-		    int data_len = (resp_buf_len < resp_xfer_len) ? resp_buf_len : resp_xfer_len;
-		    if (data_len < SHORT_INQUIRY_LENGTH) {
-			ccb->ccb_h.status |= CAM_REQ_CMP;
-			if (bootverbose && data_len >= 5) {
-				mtx_lock(&sc->hs_lock);
-				xpt_print(ccb->ccb_h.path,
-				    "storvsc skips the validation for short inquiry (%d)"
-				    " [%x %x %x %x %x]\n",
-				    data_len,resp_buf[0],resp_buf[1],resp_buf[2],
-				    resp_buf[3],resp_buf[4]);
-				mtx_unlock(&sc->hs_lock);
-			}
-		    } else if (is_inquiry_valid(inq_data) == 0) {
-			ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
+			struct scsi_inquiry_data *inq_data =
+			    (struct scsi_inquiry_data *)csio->data_ptr;
+			uint8_t *resp_buf = (uint8_t *)csio->data_ptr;
+			int resp_xfer_len, resp_buf_len, data_len;
+
+			/* Get the buffer length reported by host */
+			resp_xfer_len = vm_srb->transfer_len;
+			/* Get the available buffer length */
+			resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
+			data_len = (resp_buf_len < resp_xfer_len) ?
+			    resp_buf_len : resp_xfer_len;
+
 			if (bootverbose && data_len >= 5) {
-				mtx_lock(&sc->hs_lock);
-				xpt_print(ccb->ccb_h.path,
-				    "storvsc uninstalled invalid device"
-				    " [%x %x %x %x %x]\n",
-				resp_buf[0],resp_buf[1],resp_buf[2],resp_buf[3],resp_buf[4]);
-				mtx_unlock(&sc->hs_lock);
+				xpt_print(ccb->ccb_h.path, "storvsc inquiry "
+				    "(%d) [%x %x %x %x %x ... ]\n", data_len,
+				    resp_buf[0], resp_buf[1], resp_buf[2],
+				    resp_buf[3], resp_buf[4]);
 			}
-		    } else {
-			char vendor[16];
-			cam_strvis(vendor, inq_data->vendor, sizeof(inq_data->vendor),
-				sizeof(vendor));
-			/**
-			 * XXX: upgrade SPC2 to SPC3 if host is WIN8 or WIN2012 R2
-			 * in order to support UNMAP feature
-			 */
-			if (!strncmp(vendor,"Msft",4) &&
-			     SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
-			     (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
-				vmstor_proto_version== VMSTOR_PROTOCOL_VERSION_WIN8)) {
-				inq_data->version = SCSI_REV_SPC3;
-				if (bootverbose) {
-					mtx_lock(&sc->hs_lock);
-					xpt_print(ccb->ccb_h.path,
-						"storvsc upgrades SPC2 to SPC3\n");
-					mtx_unlock(&sc->hs_lock);
+			if (vm_srb->srb_status == SRB_STATUS_SUCCESS &&
+			    data_len > SHORT_INQUIRY_LENGTH) {
+				char vendor[16];
+
+				cam_strvis(vendor, inq_data->vendor,
+				    sizeof(inq_data->vendor), sizeof(vendor));
+
+				/*
+				 * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or
+				 * WIN2012 R2 in order to support UNMAP feature.
+				 */
+				if (!strncmp(vendor, "Msft", 4) &&
+				    SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
+				    (vmstor_proto_version ==
+				     VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
+				     vmstor_proto_version ==
+				     VMSTOR_PROTOCOL_VERSION_WIN8)) {
+					inq_data->version = SCSI_REV_SPC3;
+					if (bootverbose) {
+						xpt_print(ccb->ccb_h.path,
+						    "storvsc upgrades "
+						    "SPC2 to SPC3\n");
+					}
 				}
 			}
-			ccb->ccb_h.status |= CAM_REQ_CMP;
-			if (bootverbose) {
-				mtx_lock(&sc->hs_lock);
-				xpt_print(ccb->ccb_h.path,
-				    "storvsc has passed inquiry response (%d) validation\n",
-				    data_len);
-				mtx_unlock(&sc->hs_lock);
-			}
-		    }
-		} else {
-			ccb->ccb_h.status |= CAM_REQ_CMP;
 		}
 	} else {
 		mtx_lock(&sc->hs_lock);
diff --git a/sys/dev/hyperv/storvsc/hv_vstorage.h b/sys/dev/hyperv/storvsc/hv_vstorage.h
index f2b9480..9205e35 100644
--- a/sys/dev/hyperv/storvsc/hv_vstorage.h
+++ b/sys/dev/hyperv/storvsc/hv_vstorage.h
@@ -249,9 +249,9 @@ struct vstor_packet {
 /**
  * SRB Status Masks (can be combined with above status codes)
  */
-#define SRB_STATUS_QUEUE_FROZEN		0x40
-#define SRB_STATUS_AUTOSENSE_VALID	0x80
-
+#define SRB_STATUS_QUEUE_FROZEN         0x40
+#define SRB_STATUS_AUTOSENSE_VALID      0x80
+#define SRB_STATUS_INVALID_LUN          0X20
 
 /**
  *  Packet flags
diff --git a/sys/dev/ntb/if_ntb/if_ntb.c b/sys/dev/ntb/if_ntb/if_ntb.c
index 7b659f7..c67ae0d 100644
--- a/sys/dev/ntb/if_ntb/if_ntb.c
+++ b/sys/dev/ntb/if_ntb/if_ntb.c
@@ -1,4 +1,5 @@
 /*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
  * Copyright (C) 2013 Intel Corporation
  * Copyright (C) 2015 EMC Corporation
  * All rights reserved.
@@ -25,21 +26,27 @@
  * SUCH DAMAGE.
  */
 
+/*
+ * The Non-Transparent Bridge (NTB) is a device that allows you to connect
+ * two or more systems using a PCI-e links, providing remote memory access.
+ *
+ * This module contains a driver for simulated Ethernet device, using
+ * underlying NTB Transport device.
+ *
+ * NOTE: Much of the code in this module is shared with Linux. Any patches may
+ * be picked up and redistributed in Linux with a dual GPL/BSD license.
+ */
+
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
-#include <sys/bitset.h>
+#include <sys/buf_ring.h>
 #include <sys/bus.h>
-#include <sys/ktr.h>
 #include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
 #include <sys/module.h>
-#include <sys/mutex.h>
-#include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
@@ -48,421 +55,164 @@ __FBSDID("$FreeBSD$");
 #include <net/if.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
+#include <net/if_media.h>
 #include <net/if_var.h>
 #include <net/bpf.h>
 #include <net/ethernet.h>
 
-#include <vm/vm.h>
-#include <vm/pmap.h>
-
 #include <machine/bus.h>
-#include <machine/cpufunc.h>
-
-#include <netinet/in.h>
-#include <netinet/ip.h>
-
-#include "../ntb_hw/ntb_hw.h"
-
-/*
- * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
- * allows you to connect two systems using a PCI-e link.
- *
- * This module contains a protocol for sending and receiving messages, and
- * exposes that protocol through a simulated ethernet device called ntb.
- *
- * NOTE: Much of the code in this module is shared with Linux. Any patches may
- * be picked up and redistributed in Linux with a dual GPL/BSD license.
- */
 
-#define QP_SETSIZE	64
-BITSET_DEFINE(_qpset, QP_SETSIZE);
-#define test_bit(pos, addr)	BIT_ISSET(QP_SETSIZE, (pos), (addr))
-#define set_bit(pos, addr)	BIT_SET(QP_SETSIZE, (pos), (addr))
-#define clear_bit(pos, addr)	BIT_CLR(QP_SETSIZE, (pos), (addr))
-#define ffs_bit(addr)		BIT_FFS(QP_SETSIZE, (addr))
+#include "../ntb_transport.h"
 
 #define KTR_NTB KTR_SPARE3
+#define NTB_MEDIATYPE		 (IFM_ETHER | IFM_AUTO | IFM_FDX)
 
-#define NTB_TRANSPORT_VERSION	4
-#define NTB_RX_MAX_PKTS		64
-#define	NTB_RXQ_SIZE		300
-
-enum ntb_link_event {
-	NTB_LINK_DOWN = 0,
-	NTB_LINK_UP,
-};
+#define	NTB_CSUM_FEATURES	(CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
+#define	NTB_CSUM_FEATURES6	(CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
+#define	NTB_CSUM_SET		(CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \
+				    CSUM_PSEUDO_HDR | \
+				    CSUM_IP_CHECKED | CSUM_IP_VALID | \
+				    CSUM_SCTP_VALID)
 
 static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW, 0, "if_ntb");
 
-static unsigned g_if_ntb_debug_level;
-SYSCTL_UINT(_hw_if_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
-    &g_if_ntb_debug_level, 0, "if_ntb log level -- higher is more verbose");
-#define ntb_printf(lvl, ...) do {			\
-	if ((lvl) <= g_if_ntb_debug_level) {		\
-		if_printf(nt->ifp, __VA_ARGS__);	\
-	}						\
-} while (0)
-
-static unsigned transport_mtu = IP_MAXPACKET + ETHER_HDR_LEN + ETHER_CRC_LEN;
-
-static uint64_t max_mw_size;
-SYSCTL_UQUAD(_hw_if_ntb, OID_AUTO, max_mw_size, CTLFLAG_RDTUN, &max_mw_size, 0,
-    "If enabled (non-zero), limit the size of large memory windows. "
-    "Both sides of the NTB MUST set the same value here.");
-
-static unsigned max_num_clients;
-SYSCTL_UINT(_hw_if_ntb, OID_AUTO, max_num_clients, CTLFLAG_RDTUN,
-    &max_num_clients, 0, "Maximum number of NTB transport clients.  "
-    "0 (default) - use all available NTB memory windows; "
-    "positive integer N - Limit to N memory windows.");
-
-static unsigned enable_xeon_watchdog;
-SYSCTL_UINT(_hw_if_ntb, OID_AUTO, enable_xeon_watchdog, CTLFLAG_RDTUN,
-    &enable_xeon_watchdog, 0, "If non-zero, write a register every second to "
-    "keep a watchdog from tearing down the NTB link");
-
-STAILQ_HEAD(ntb_queue_list, ntb_queue_entry);
-
-typedef uint32_t ntb_q_idx_t;
-
-struct ntb_queue_entry {
-	/* ntb_queue list reference */
-	STAILQ_ENTRY(ntb_queue_entry) entry;
-
-	/* info on data to be transferred */
-	void		*cb_data;
-	void		*buf;
-	uint32_t	len;
-	uint32_t	flags;
-
-	struct ntb_transport_qp		*qp;
-	struct ntb_payload_header	*x_hdr;
-	ntb_q_idx_t	index;
-};
-
-struct ntb_rx_info {
-	ntb_q_idx_t	entry;
-};
-
-struct ntb_transport_qp {
-	struct ntb_transport_ctx	*transport;
-	struct ntb_softc	*ntb;
-
-	void			*cb_data;
-
-	bool			client_ready;
-	volatile bool		link_is_up;
-	uint8_t			qp_num;	/* Only 64 QPs are allowed.  0-63 */
-
-	struct ntb_rx_info	*rx_info;
-	struct ntb_rx_info	*remote_rx_info;
-
-	void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
-	    void *data, int len);
-	struct ntb_queue_list	tx_free_q;
-	struct mtx		ntb_tx_free_q_lock;
-	caddr_t			tx_mw;
-	bus_addr_t		tx_mw_phys;
-	ntb_q_idx_t		tx_index;
-	ntb_q_idx_t		tx_max_entry;
-	uint64_t		tx_max_frame;
-
-	void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
-	    void *data, int len);
-	struct ntb_queue_list	rx_post_q;
-	struct ntb_queue_list	rx_pend_q;
-	/* ntb_rx_q_lock: synchronize access to rx_XXXX_q */
-	struct mtx		ntb_rx_q_lock;
-	struct task		rx_completion_task;
-	struct task		rxc_db_work;
-	caddr_t			rx_buff;
-	ntb_q_idx_t		rx_index;
-	ntb_q_idx_t		rx_max_entry;
-	uint64_t		rx_max_frame;
-
-	void (*event_handler)(void *data, enum ntb_link_event status);
-	struct callout		link_work;
-	struct callout		queue_full;
-	struct callout		rx_full;
-
-	uint64_t		last_rx_no_buf;
+static unsigned g_if_ntb_num_queues = UINT_MAX;
+SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN,
+    &g_if_ntb_num_queues, 0, "Number of queues per interface");
 
-	/* Stats */
-	uint64_t		rx_bytes;
-	uint64_t		rx_pkts;
-	uint64_t		rx_ring_empty;
-	uint64_t		rx_err_no_buf;
-	uint64_t		rx_err_oflow;
-	uint64_t		rx_err_ver;
-	uint64_t		tx_bytes;
-	uint64_t		tx_pkts;
-	uint64_t		tx_ring_full;
-	uint64_t		tx_err_no_buf;
-};
-
-struct ntb_queue_handlers {
-	void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
-	    void *data, int len);
-	void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
-	    void *data, int len);
-	void (*event_handler)(void *data, enum ntb_link_event status);
-};
-
-struct ntb_transport_mw {
-	vm_paddr_t	phys_addr;
-	size_t		phys_size;
-	size_t		xlat_align;
-	size_t		xlat_align_size;
-	bus_addr_t	addr_limit;
-	/* Tx buff is off vbase / phys_addr */
-	caddr_t		vbase;
-	size_t		xlat_size;
-	size_t		buff_size;
-	/* Rx buff is off virt_addr / dma_addr */
-	caddr_t		virt_addr;
-	bus_addr_t	dma_addr;
-};
-
-struct ntb_transport_ctx {
-	struct ntb_softc	*ntb;
-	struct ifnet		*ifp;
-	struct ntb_transport_mw	mw_vec[NTB_MAX_NUM_MW];
-	struct ntb_transport_qp	*qp_vec;
-	struct _qpset		qp_bitmap;
-	struct _qpset		qp_bitmap_free;
-	unsigned		mw_count;
-	unsigned		qp_count;
-	volatile bool		link_is_up;
-	struct callout		link_work;
-	struct callout		link_watchdog;
-	struct task		link_cleanup;
-	uint64_t		bufsize;
-	u_char			eaddr[ETHER_ADDR_LEN];
-	struct mtx		tx_lock;
-	struct mtx		rx_lock;
-
-	/* The hardcoded single queuepair in ntb_setup_interface() */
+struct ntb_net_queue {
+	struct ntb_net_ctx	*sc;
+	if_t			 ifp;
 	struct ntb_transport_qp *qp;
+	struct buf_ring		*br;
+	struct task		 tx_task;
+	struct taskqueue	*tx_tq;
+	struct mtx		 tx_lock;
+	struct callout		 queue_full;
 };
 
-static struct ntb_transport_ctx net_softc;
-
-enum {
-	IF_NTB_DESC_DONE_FLAG = 1 << 0,
-	IF_NTB_LINK_DOWN_FLAG = 1 << 1,
-};
-
-struct ntb_payload_header {
-	ntb_q_idx_t ver;
-	uint32_t len;
-	uint32_t flags;
-};
-
-enum {
-	/*
-	 * The order of this enum is part of the if_ntb remote protocol.  Do
-	 * not reorder without bumping protocol version (and it's probably best
-	 * to keep the protocol in lock-step with the Linux NTB driver.
-	 */
-	IF_NTB_VERSION = 0,
-	IF_NTB_QP_LINKS,
-	IF_NTB_NUM_QPS,
-	IF_NTB_NUM_MWS,
-	/*
-	 * N.B.: transport_link_work assumes MW1 enums = MW0 + 2.
-	 */
-	IF_NTB_MW0_SZ_HIGH,
-	IF_NTB_MW0_SZ_LOW,
-	IF_NTB_MW1_SZ_HIGH,
-	IF_NTB_MW1_SZ_LOW,
-	IF_NTB_MAX_SPAD,
-
-	/*
-	 * Some NTB-using hardware have a watchdog to work around NTB hangs; if
-	 * a register or doorbell isn't written every few seconds, the link is
-	 * torn down.  Write an otherwise unused register every few seconds to
-	 * work around this watchdog.
-	 */
-	IF_NTB_WATCHDOG_SPAD = 15
+struct ntb_net_ctx {
+	if_t			 ifp;
+	struct ifmedia		 media;
+	u_char			 eaddr[ETHER_ADDR_LEN];
+	int			 num_queues;
+	struct ntb_net_queue	*queues;
+	int			 mtu;
 };
-CTASSERT(IF_NTB_WATCHDOG_SPAD < XEON_SPAD_COUNT &&
-    IF_NTB_WATCHDOG_SPAD < ATOM_SPAD_COUNT);
-
-#define QP_TO_MW(nt, qp)	((qp) % nt->mw_count)
-#define NTB_QP_DEF_NUM_ENTRIES	100
-#define NTB_LINK_DOWN_TIMEOUT	10
 
-static int ntb_handle_module_events(struct module *m, int what, void *arg);
-static int ntb_setup_interface(void);
-static int ntb_teardown_interface(void);
+static int ntb_net_probe(device_t dev);
+static int ntb_net_attach(device_t dev);
+static int ntb_net_detach(device_t dev);
 static void ntb_net_init(void *arg);
-static int ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data);
-static void ntb_start(struct ifnet *ifp);
+static int ntb_ifmedia_upd(struct ifnet *);
+static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *);
+static int ntb_ioctl(if_t ifp, u_long command, caddr_t data);
+static int ntb_transmit(if_t ifp, struct mbuf *m);
 static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
     void *data, int len);
 static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
     void *data, int len);
 static void ntb_net_event_handler(void *data, enum ntb_link_event status);
-static int ntb_transport_probe(struct ntb_softc *ntb);
-static void ntb_transport_free(struct ntb_transport_ctx *);
-static void ntb_transport_init_queue(struct ntb_transport_ctx *nt,
-    unsigned int qp_num);
-static void ntb_transport_free_queue(struct ntb_transport_qp *qp);
-static struct ntb_transport_qp *ntb_transport_create_queue(void *data,
-    struct ntb_softc *pdev, const struct ntb_queue_handlers *handlers);
-static void ntb_transport_link_up(struct ntb_transport_qp *qp);
-static int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb,
-    void *data, unsigned int len);
-static int ntb_process_tx(struct ntb_transport_qp *qp,
-    struct ntb_queue_entry *entry);
-static void ntb_memcpy_tx(struct ntb_transport_qp *qp,
-    struct ntb_queue_entry *entry, void *offset);
+static void ntb_handle_tx(void *arg, int pending);
 static void ntb_qp_full(void *arg);
-static void ntb_transport_rxc_db(void *arg, int pending);
-static int ntb_process_rxc(struct ntb_transport_qp *qp);
-static void ntb_memcpy_rx(struct ntb_transport_qp *qp,
-    struct ntb_queue_entry *entry, void *offset);
-static inline void ntb_rx_copy_callback(struct ntb_transport_qp *qp,
-    void *data);
-static void ntb_complete_rxc(void *arg, int pending);
-static void ntb_transport_doorbell_callback(void *data, uint32_t vector);
-static void ntb_transport_event_callback(void *data);
-static void ntb_transport_link_work(void *arg);
-static int ntb_set_mw(struct ntb_transport_ctx *, int num_mw, size_t size);
-static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw);
-static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
-    unsigned int qp_num);
-static void ntb_qp_link_work(void *arg);
-static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt);
-static void ntb_transport_link_cleanup_work(void *, int);
-static void ntb_qp_link_down(struct ntb_transport_qp *qp);
-static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp);
-static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp);
-static void ntb_transport_link_down(struct ntb_transport_qp *qp);
-static void ntb_send_link_down(struct ntb_transport_qp *qp);
-static void ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
-    struct ntb_queue_list *list);
-static struct ntb_queue_entry *ntb_list_rm(struct mtx *lock,
-    struct ntb_queue_list *list);
-static struct ntb_queue_entry *ntb_list_mv(struct mtx *lock,
-    struct ntb_queue_list *from, struct ntb_queue_list *to);
+static void ntb_qflush(if_t ifp);
 static void create_random_local_eui48(u_char *eaddr);
-static unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp);
-static void xeon_link_watchdog_hb(void *);
-
-static const struct ntb_ctx_ops ntb_transport_ops = {
-	.link_event = ntb_transport_event_callback,
-	.db_event = ntb_transport_doorbell_callback,
-};
 
-MALLOC_DEFINE(M_NTB_IF, "if_ntb", "ntb network driver");
-
-static inline void
-iowrite32(uint32_t val, void *addr)
-{
-
-	bus_space_write_4(X86_BUS_SPACE_MEM, 0/* HACK */, (uintptr_t)addr,
-	    val);
-}
-
-/* Module setup and teardown */
 static int
-ntb_handle_module_events(struct module *m, int what, void *arg)
+ntb_net_probe(device_t dev)
 {
-	int err = 0;
 
-	switch (what) {
-	case MOD_LOAD:
-		err = ntb_setup_interface();
-		break;
-	case MOD_UNLOAD:
-		err = ntb_teardown_interface();
-		break;
-	default:
-		err = EOPNOTSUPP;
-		break;
-	}
-	return (err);
+	device_set_desc(dev, "NTB Network Interface");
+	return (0);
 }
 
-static moduledata_t if_ntb_mod = {
-	"if_ntb",
-	ntb_handle_module_events,
-	NULL
-};
-
-DECLARE_MODULE(if_ntb, if_ntb_mod, SI_SUB_KLD, SI_ORDER_ANY);
-MODULE_DEPEND(if_ntb, ntb_hw, 1, 1, 1);
-
 static int
-ntb_setup_interface(void)
+ntb_net_attach(device_t dev)
 {
-	struct ifnet *ifp;
+	struct ntb_net_ctx *sc = device_get_softc(dev);
+	struct ntb_net_queue *q;
+	if_t ifp;
 	struct ntb_queue_handlers handlers = { ntb_net_rx_handler,
 	    ntb_net_tx_handler, ntb_net_event_handler };
-	int rc;
-
-	net_softc.ntb = devclass_get_softc(devclass_find("ntb_hw"), 0);
-	if (net_softc.ntb == NULL) {
-		printf("ntb: Cannot find devclass\n");
-		return (ENXIO);
-	}
+	int i;
 
-	ifp = net_softc.ifp = if_alloc(IFT_ETHER);
+	ifp = sc->ifp = if_gethandle(IFT_ETHER);
 	if (ifp == NULL) {
-		ntb_transport_free(&net_softc);
 		printf("ntb: Cannot allocate ifnet structure\n");
 		return (ENOMEM);
 	}
-	if_initname(ifp, "ntb", 0);
-
-	rc = ntb_transport_probe(net_softc.ntb);
-	if (rc != 0) {
-		printf("ntb: Cannot init transport: %d\n", rc);
-		if_free(net_softc.ifp);
-		return (rc);
-	}
-
-	net_softc.qp = ntb_transport_create_queue(ifp, net_softc.ntb,
-	    &handlers);
-	ifp->if_init = ntb_net_init;
-	ifp->if_softc = &net_softc;
-	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
-	ifp->if_ioctl = ntb_ioctl;
-	ifp->if_start = ntb_start;
-	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
-	ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN;
-	IFQ_SET_READY(&ifp->if_snd);
-	create_random_local_eui48(net_softc.eaddr);
-	ether_ifattach(ifp, net_softc.eaddr);
-	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_JUMBO_MTU;
-	ifp->if_capenable = ifp->if_capabilities;
-	ifp->if_mtu = ntb_transport_max_size(net_softc.qp) - ETHER_HDR_LEN -
-	    ETHER_CRC_LEN;
-
-	ntb_transport_link_up(net_softc.qp);
-	net_softc.bufsize = ntb_transport_max_size(net_softc.qp) +
-	    sizeof(struct ether_header);
+	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
+	if_setdev(ifp, dev);
+
+	sc->num_queues = min(g_if_ntb_num_queues,
+	    ntb_transport_queue_count(dev));
+	sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue),
+	    M_DEVBUF, M_WAITOK | M_ZERO);
+	sc->mtu = INT_MAX;
+	for (i = 0; i < sc->num_queues; i++) {
+		q = &sc->queues[i];
+		q->sc = sc;
+		q->ifp = ifp;
+		q->qp = ntb_transport_create_queue(dev, i, &handlers, q);
+		if (q->qp == NULL)
+			break;
+		sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp));
+		mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF);
+		q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock);
+		TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q);
+		q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT,
+		    taskqueue_thread_enqueue, &q->tx_tq);
+		taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d",
+		    device_get_nameunit(dev), i);
+		callout_init(&q->queue_full, 1);
+	}
+	sc->num_queues = i;
+	device_printf(dev, "%d queue(s)\n", sc->num_queues);
+
+	if_setinitfn(ifp, ntb_net_init);
+	if_setsoftc(ifp, sc);
+	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
+	if_setioctlfn(ifp, ntb_ioctl);
+	if_settransmitfn(ifp, ntb_transmit);
+	if_setqflushfn(ifp, ntb_qflush);
+	create_random_local_eui48(sc->eaddr);
+	ether_ifattach(ifp, sc->eaddr);
+	if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 |
+	    IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
+	if_setcapenable(ifp, IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
+	if_setmtu(ifp, sc->mtu - ETHER_HDR_LEN);
+
+	ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd,
+	    ntb_ifmedia_sts);
+	ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL);
+	ifmedia_set(&sc->media, NTB_MEDIATYPE);
+
+	for (i = 0; i < sc->num_queues; i++)
+		ntb_transport_link_up(sc->queues[i].qp);
 	return (0);
 }
 
 static int
-ntb_teardown_interface(void)
+ntb_net_detach(device_t dev)
 {
+	struct ntb_net_ctx *sc = device_get_softc(dev);
+	struct ntb_net_queue *q;
+	int i;
 
-	if (net_softc.qp != NULL) {
-		ntb_transport_link_down(net_softc.qp);
-
-		ntb_transport_free_queue(net_softc.qp);
-		ntb_transport_free(&net_softc);
-	}
-
-	if (net_softc.ifp != NULL) {
-		ether_ifdetach(net_softc.ifp);
-		if_free(net_softc.ifp);
-		net_softc.ifp = NULL;
-	}
-
+	for (i = 0; i < sc->num_queues; i++)
+		ntb_transport_link_down(sc->queues[i].qp);
+	ether_ifdetach(sc->ifp);
+	if_free(sc->ifp);
+	ifmedia_removeall(&sc->media);
+	for (i = 0; i < sc->num_queues; i++) {
+		q = &sc->queues[i];
+		ntb_transport_free_queue(q->qp);
+		buf_ring_free(q->br, M_DEVBUF);
+		callout_drain(&q->queue_full);
+		taskqueue_drain_all(q->tx_tq);
+		mtx_destroy(&q->tx_lock);
+	}
+	free(sc->queues, M_DEVBUF);
 	return (0);
 }
 
@@ -471,1213 +221,268 @@ ntb_teardown_interface(void)
 static void
 ntb_net_init(void *arg)
 {
-	struct ntb_transport_ctx *ntb_softc = arg;
-	struct ifnet *ifp = ntb_softc->ifp;
+	struct ntb_net_ctx *sc = arg;
+	if_t ifp = sc->ifp;
 
-	ifp->if_drv_flags |= IFF_DRV_RUNNING;
-	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-	ifp->if_flags |= IFF_UP;
-	if_link_state_change(ifp, LINK_STATE_UP);
+	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
+	if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ?
+	    LINK_STATE_UP : LINK_STATE_DOWN);
 }
 
 static int
-ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+ntb_ioctl(if_t ifp, u_long command, caddr_t data)
 {
-	struct ntb_transport_ctx *nt = ifp->if_softc;
+	struct ntb_net_ctx *sc = if_getsoftc(ifp);
 	struct ifreq *ifr = (struct ifreq *)data;
 	int error = 0;
 
 	switch (command) {
 	case SIOCSIFMTU:
 	    {
-		if (ifr->ifr_mtu > ntb_transport_max_size(nt->qp) -
-		    ETHER_HDR_LEN - ETHER_CRC_LEN) {
+		if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) {
 			error = EINVAL;
 			break;
 		}
 
-		ifp->if_mtu = ifr->ifr_mtu;
+		if_setmtu(ifp, ifr->ifr_mtu);
 		break;
 	    }
-	default:
-		error = ether_ioctl(ifp, command, data);
-		break;
-	}
 
-	return (error);
-}
-
-
-static void
-ntb_start(struct ifnet *ifp)
-{
-	struct mbuf *m_head;
-	struct ntb_transport_ctx *nt = ifp->if_softc;
-	int rc;
+	case SIOCSIFMEDIA:
+	case SIOCGIFMEDIA:
+		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
+		break;
 
-	mtx_lock(&nt->tx_lock);
-	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-	CTR0(KTR_NTB, "TX: ntb_start");
-	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
-		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
-		CTR1(KTR_NTB, "TX: start mbuf %p", m_head);
-		rc = ntb_transport_tx_enqueue(nt->qp, m_head, m_head,
-			     m_length(m_head, NULL));
-		if (rc != 0) {
-			CTR1(KTR_NTB,
-			    "TX: could not tx mbuf %p. Returning to snd q",
-			    m_head);
-			if (rc == EAGAIN) {
-				ifp->if_drv_flags |= IFF_DRV_OACTIVE;
-				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
-				callout_reset(&nt->qp->queue_full, hz / 1000,
-				    ntb_qp_full, ifp);
-			}
-			break;
+	case SIOCSIFCAP:
+		if (ifr->ifr_reqcap & IFCAP_RXCSUM)
+			if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
+		else
+			if_setcapenablebit(ifp, 0, IFCAP_RXCSUM);
+		if (ifr->ifr_reqcap & IFCAP_TXCSUM) {
+			if_setcapenablebit(ifp, IFCAP_TXCSUM, 0);
+			if_sethwassistbits(ifp, NTB_CSUM_FEATURES, 0);
+		} else {
+			if_setcapenablebit(ifp, 0, IFCAP_TXCSUM);
+			if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES);
+		}
+		if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6)
+			if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
+		else
+			if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6);
+		if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) {
+			if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0);
+			if_sethwassistbits(ifp, NTB_CSUM_FEATURES6, 0);
+		} else {
+			if_setcapenablebit(ifp, 0, IFCAP_TXCSUM_IPV6);
+			if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES6);
 		}
-
-	}
-	mtx_unlock(&nt->tx_lock);
-}
-
-/* Network Device Callbacks */
-static void
-ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
-    int len)
-{
-
-	m_freem(data);
-	CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data);
-}
-
-static void
-ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
-    int len)
-{
-	struct mbuf *m = data;
-	struct ifnet *ifp = qp_data;
-
-	CTR0(KTR_NTB, "RX: rx handler");
-	(*ifp->if_input)(ifp, m);
-}
-
-static void
-ntb_net_event_handler(void *data, enum ntb_link_event status)
-{
-	struct ifnet *ifp;
-
-	ifp = data;
-	(void)ifp;
-
-	/* XXX The Linux driver munges with the carrier status here. */
-
-	switch (status) {
-	case NTB_LINK_DOWN:
-		break;
-	case NTB_LINK_UP:
 		break;
-	default:
-		panic("Bogus ntb_link_event %u\n", status);
-	}
-}
-
-/* Transport Init and teardown */
-
-static void
-xeon_link_watchdog_hb(void *arg)
-{
-	struct ntb_transport_ctx *nt;
-
-	nt = arg;
-	ntb_spad_write(nt->ntb, IF_NTB_WATCHDOG_SPAD, 0);
-	callout_reset(&nt->link_watchdog, 1 * hz, xeon_link_watchdog_hb, nt);
-}
-
-static int
-ntb_transport_probe(struct ntb_softc *ntb)
-{
-	struct ntb_transport_ctx *nt = &net_softc;
-	struct ntb_transport_mw *mw;
-	uint64_t qp_bitmap;
-	int rc;
-	unsigned i;
-
-	nt->mw_count = ntb_mw_count(ntb);
-	for (i = 0; i < nt->mw_count; i++) {
-		mw = &nt->mw_vec[i];
-
-		rc = ntb_mw_get_range(ntb, i, &mw->phys_addr, &mw->vbase,
-		    &mw->phys_size, &mw->xlat_align, &mw->xlat_align_size,
-		    &mw->addr_limit);
-		if (rc != 0)
-			goto err;
-
-		mw->buff_size = 0;
-		mw->xlat_size = 0;
-		mw->virt_addr = NULL;
-		mw->dma_addr = 0;
-
-		rc = ntb_mw_set_wc(nt->ntb, i, VM_MEMATTR_WRITE_COMBINING);
-		if (rc)
-			ntb_printf(0, "Unable to set mw%d caching\n", i);
-	}
-
-	qp_bitmap = ntb_db_valid_mask(ntb);
-	nt->qp_count = flsll(qp_bitmap);
-	KASSERT(nt->qp_count != 0, ("bogus db bitmap"));
-	nt->qp_count -= 1;
-
-	if (max_num_clients != 0 && max_num_clients < nt->qp_count)
-		nt->qp_count = max_num_clients;
-	else if (nt->mw_count < nt->qp_count)
-		nt->qp_count = nt->mw_count;
-	KASSERT(nt->qp_count <= QP_SETSIZE, ("invalid qp_count"));
-
-	mtx_init(&nt->tx_lock, "ntb transport tx", NULL, MTX_DEF);
-	mtx_init(&nt->rx_lock, "ntb transport rx", NULL, MTX_DEF);
-
-	nt->qp_vec = malloc(nt->qp_count * sizeof(*nt->qp_vec), M_NTB_IF,
-	    M_WAITOK | M_ZERO);
-
-	for (i = 0; i < nt->qp_count; i++) {
-		set_bit(i, &nt->qp_bitmap);
-		set_bit(i, &nt->qp_bitmap_free);
-		ntb_transport_init_queue(nt, i);
-	}
-
-	callout_init(&nt->link_work, 0);
-	callout_init(&nt->link_watchdog, 0);
-	TASK_INIT(&nt->link_cleanup, 0, ntb_transport_link_cleanup_work, nt);
-
-	rc = ntb_set_ctx(ntb, nt, &ntb_transport_ops);
-	if (rc != 0)
-		goto err;
-
-	nt->link_is_up = false;
-	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
-	ntb_link_event(ntb);
-
-	callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt);
-	if (enable_xeon_watchdog != 0)
-		callout_reset(&nt->link_watchdog, 0, xeon_link_watchdog_hb, nt);
-	return (0);
-
-err:
-	free(nt->qp_vec, M_NTB_IF);
-	nt->qp_vec = NULL;
-	return (rc);
-}
-
-static void
-ntb_transport_free(struct ntb_transport_ctx *nt)
-{
-	struct ntb_softc *ntb = nt->ntb;
-	struct _qpset qp_bitmap_alloc;
-	uint8_t i;
-
-	ntb_transport_link_cleanup(nt);
-	taskqueue_drain(taskqueue_swi, &nt->link_cleanup);
-	callout_drain(&nt->link_work);
-	callout_drain(&nt->link_watchdog);
-
-	BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &qp_bitmap_alloc);
-	BIT_NAND(QP_SETSIZE, &qp_bitmap_alloc, &nt->qp_bitmap_free);
-
-	/* Verify that all the QPs are freed */
-	for (i = 0; i < nt->qp_count; i++)
-		if (test_bit(i, &qp_bitmap_alloc))
-			ntb_transport_free_queue(&nt->qp_vec[i]);
-
-	ntb_link_disable(ntb);
-	ntb_clear_ctx(ntb);
-
-	for (i = 0; i < nt->mw_count; i++)
-		ntb_free_mw(nt, i);
-
-	free(nt->qp_vec, M_NTB_IF);
-}
-
-static void
-ntb_transport_init_queue(struct ntb_transport_ctx *nt, unsigned int qp_num)
-{
-	struct ntb_transport_mw *mw;
-	struct ntb_transport_qp *qp;
-	vm_paddr_t mw_base;
-	uint64_t mw_size, qp_offset;
-	size_t tx_size;
-	unsigned num_qps_mw, mw_num, mw_count;
-
-	mw_count = nt->mw_count;
-	mw_num = QP_TO_MW(nt, qp_num);
-	mw = &nt->mw_vec[mw_num];
-
-	qp = &nt->qp_vec[qp_num];
-	qp->qp_num = qp_num;
-	qp->transport = nt;
-	qp->ntb = nt->ntb;
-	qp->client_ready = false;
-	qp->event_handler = NULL;
-	ntb_qp_link_down_reset(qp);
-
-	if (nt->qp_count % mw_count && mw_num + 1 < nt->qp_count / mw_count)
-		num_qps_mw = nt->qp_count / mw_count + 1;
-	else
-		num_qps_mw = nt->qp_count / mw_count;
-
-	mw_base = mw->phys_addr;
-	mw_size = mw->phys_size;
-
-	tx_size = mw_size / num_qps_mw;
-	qp_offset = tx_size * (qp_num / mw_count);
-
-	qp->tx_mw = mw->vbase + qp_offset;
-	KASSERT(qp->tx_mw != NULL, ("uh oh?"));
-
-	/* XXX Assumes that a vm_paddr_t is equivalent to bus_addr_t */
-	qp->tx_mw_phys = mw_base + qp_offset;
-	KASSERT(qp->tx_mw_phys != 0, ("uh oh?"));
-
-	tx_size -= sizeof(struct ntb_rx_info);
-	qp->rx_info = (void *)(qp->tx_mw + tx_size);
-
-	/* Due to house-keeping, there must be at least 2 buffs */
-	qp->tx_max_frame = qmin(tx_size / 2,
-	    transport_mtu + sizeof(struct ntb_payload_header));
-	qp->tx_max_entry = tx_size / qp->tx_max_frame;
-
-	callout_init(&qp->link_work, 0);
-	callout_init(&qp->queue_full, 1);
-	callout_init(&qp->rx_full, 1);
-
-	mtx_init(&qp->ntb_rx_q_lock, "ntb rx q", NULL, MTX_SPIN);
-	mtx_init(&qp->ntb_tx_free_q_lock, "ntb tx free q", NULL, MTX_SPIN);
-	TASK_INIT(&qp->rx_completion_task, 0, ntb_complete_rxc, qp);
-	TASK_INIT(&qp->rxc_db_work, 0, ntb_transport_rxc_db, qp);
-
-	STAILQ_INIT(&qp->rx_post_q);
-	STAILQ_INIT(&qp->rx_pend_q);
-	STAILQ_INIT(&qp->tx_free_q);
 
-	callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
-}
-
-static void
-ntb_transport_free_queue(struct ntb_transport_qp *qp)
-{
-	struct ntb_queue_entry *entry;
-
-	if (qp == NULL)
-		return;
-
-	callout_drain(&qp->link_work);
-
-	ntb_db_set_mask(qp->ntb, 1ull << qp->qp_num);
-	taskqueue_drain(taskqueue_swi, &qp->rxc_db_work);
-	taskqueue_drain(taskqueue_swi, &qp->rx_completion_task);
-
-	qp->cb_data = NULL;
-	qp->rx_handler = NULL;
-	qp->tx_handler = NULL;
-	qp->event_handler = NULL;
-
-	while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q)))
-		free(entry, M_NTB_IF);
-
-	while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q)))
-		free(entry, M_NTB_IF);
-
-	while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
-		free(entry, M_NTB_IF);
-
-	set_bit(qp->qp_num, &qp->transport->qp_bitmap_free);
-}
-
-/**
- * ntb_transport_create_queue - Create a new NTB transport layer queue
- * @rx_handler: receive callback function
- * @tx_handler: transmit callback function
- * @event_handler: event callback function
- *
- * Create a new NTB transport layer queue and provide the queue with a callback
- * routine for both transmit and receive.  The receive callback routine will be
- * used to pass up data when the transport has received it on the queue.   The
- * transmit callback routine will be called when the transport has completed the
- * transmission of the data on the queue and the data is ready to be freed.
- *
- * RETURNS: pointer to newly created ntb_queue, NULL on error.
- */
-static struct ntb_transport_qp *
-ntb_transport_create_queue(void *data, struct ntb_softc *ntb,
-    const struct ntb_queue_handlers *handlers)
-{
-	struct ntb_queue_entry *entry;
-	struct ntb_transport_qp *qp;
-	struct ntb_transport_ctx *nt;
-	unsigned int free_queue;
-	int i;
-
-	nt = ntb_get_ctx(ntb, NULL);
-	KASSERT(nt != NULL, ("bogus"));
-
-	free_queue = ffs_bit(&nt->qp_bitmap);
-	if (free_queue == 0)
-		return (NULL);
-
-	/* decrement free_queue to make it zero based */
-	free_queue--;
-
-	qp = &nt->qp_vec[free_queue];
-	clear_bit(qp->qp_num, &nt->qp_bitmap_free);
-	qp->cb_data = data;
-	qp->rx_handler = handlers->rx_handler;
-	qp->tx_handler = handlers->tx_handler;
-	qp->event_handler = handlers->event_handler;
-
-	for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
-		entry = malloc(sizeof(*entry), M_NTB_IF, M_WAITOK | M_ZERO);
-		entry->cb_data = nt->ifp;
-		entry->buf = NULL;
-		entry->len = transport_mtu;
-		ntb_list_add(&qp->ntb_rx_q_lock, entry, &qp->rx_pend_q);
-	}
-
-	for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
-		entry = malloc(sizeof(*entry), M_NTB_IF, M_WAITOK | M_ZERO);
-		ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+	default:
+		error = ether_ioctl(ifp, command, data);
+		break;
 	}
 
-	ntb_db_clear(ntb, 1ull << qp->qp_num);
-	ntb_db_clear_mask(ntb, 1ull << qp->qp_num);
-	return (qp);
-}
-
-/**
- * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
- * @qp: NTB transport layer queue to be enabled
- *
- * Notify NTB transport layer of client readiness to use queue
- */
-static void
-ntb_transport_link_up(struct ntb_transport_qp *qp)
-{
-	struct ntb_transport_ctx *nt;
-
-	if (qp == NULL)
-		return;
-
-	qp->client_ready = true;
-
-	nt = qp->transport;
-	ntb_printf(2, "qp client ready\n");
-
-	if (qp->transport->link_is_up)
-		callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
+	return (error);
 }
 
-
-
-/* Transport Tx */
-
-/**
- * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry
- * @qp: NTB transport layer queue the entry is to be enqueued on
- * @cb: per buffer pointer for callback function to use
- * @data: pointer to data buffer that will be sent
- * @len: length of the data buffer
- *
- * Enqueue a new transmit buffer onto the transport queue from which a NTB
- * payload will be transmitted.  This assumes that a lock is being held to
- * serialize access to the qp.
- *
- * RETURNS: An appropriate ERRNO error value on error, or zero for success.
- */
 static int
-ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
-    unsigned int len)
+ntb_ifmedia_upd(struct ifnet *ifp)
 {
-	struct ntb_queue_entry *entry;
-	int rc;
+	struct ntb_net_ctx *sc = if_getsoftc(ifp);
+	struct ifmedia *ifm = &sc->media;
 
-	if (qp == NULL || !qp->link_is_up || len == 0) {
-		CTR0(KTR_NTB, "TX: link not up");
+	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
 		return (EINVAL);
-	}
-
-	entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
-	if (entry == NULL) {
-		CTR0(KTR_NTB, "TX: could not get entry from tx_free_q");
-		qp->tx_err_no_buf++;
-		return (EBUSY);
-	}
-	CTR1(KTR_NTB, "TX: got entry %p from tx_free_q", entry);
-
-	entry->cb_data = cb;
-	entry->buf = data;
-	entry->len = len;
-	entry->flags = 0;
-
-	rc = ntb_process_tx(qp, entry);
-	if (rc != 0) {
-		ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
-		CTR1(KTR_NTB,
-		    "TX: process_tx failed. Returning entry %p to tx_free_q",
-		    entry);
-	}
-	return (rc);
-}
-
-static int
-ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry)
-{
-	void *offset;
-
-	offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
-	CTR3(KTR_NTB,
-	    "TX: process_tx: tx_pkts=%lu, tx_index=%u, remote entry=%u",
-	    qp->tx_pkts, qp->tx_index, qp->remote_rx_info->entry);
-	if (qp->tx_index == qp->remote_rx_info->entry) {
-		CTR0(KTR_NTB, "TX: ring full");
-		qp->tx_ring_full++;
-		return (EAGAIN);
-	}
-
-	if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
-		if (qp->tx_handler != NULL)
-			qp->tx_handler(qp, qp->cb_data, entry->buf,
-			    EIO);
-		else
-			m_freem(entry->buf);
-
-		entry->buf = NULL;
-		ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
-		CTR1(KTR_NTB,
-		    "TX: frame too big. returning entry %p to tx_free_q",
-		    entry);
-		return (0);
-	}
-	CTR2(KTR_NTB, "TX: copying entry %p to offset %p", entry, offset);
-	ntb_memcpy_tx(qp, entry, offset);
-
-	qp->tx_index++;
-	qp->tx_index %= qp->tx_max_entry;
-
-	qp->tx_pkts++;
 
 	return (0);
 }
 
 static void
-ntb_memcpy_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry,
-    void *offset)
-{
-	struct ntb_payload_header *hdr;
-
-	/* This piece is from Linux' ntb_async_tx() */
-	hdr = (struct ntb_payload_header *)((char *)offset + qp->tx_max_frame -
-	    sizeof(struct ntb_payload_header));
-	entry->x_hdr = hdr;
-	iowrite32(entry->len, &hdr->len);
-	iowrite32(qp->tx_pkts, &hdr->ver);
-
-	/* This piece is ntb_memcpy_tx() */
-	CTR2(KTR_NTB, "TX: copying %d bytes to offset %p", entry->len, offset);
-	if (entry->buf != NULL) {
-		m_copydata((struct mbuf *)entry->buf, 0, entry->len, offset);
-
-		/*
-		 * Ensure that the data is fully copied before setting the
-		 * flags
-		 */
-		wmb();
-	}
-
-	/* The rest is ntb_tx_copy_callback() */
-	iowrite32(entry->flags | IF_NTB_DESC_DONE_FLAG, &hdr->flags);
-	CTR1(KTR_NTB, "TX: hdr %p set DESC_DONE", hdr);
-
-	ntb_peer_db_set(qp->ntb, 1ull << qp->qp_num);
-
-	/*
-	 * The entry length can only be zero if the packet is intended to be a
-	 * "link down" or similar.  Since no payload is being sent in these
-	 * cases, there is nothing to add to the completion queue.
-	 */
-	if (entry->len > 0) {
-		qp->tx_bytes += entry->len;
-
-		if (qp->tx_handler)
-			qp->tx_handler(qp, qp->cb_data, entry->buf,
-			    entry->len);
-		else
-			m_freem(entry->buf);
-		entry->buf = NULL;
-	}
-
-	CTR3(KTR_NTB,
-	    "TX: entry %p sent. hdr->ver = %u, hdr->flags = 0x%x, Returning "
-	    "to tx_free_q", entry, hdr->ver, hdr->flags);
-	ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
-}
-
-static void
-ntb_qp_full(void *arg)
+ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
+	struct ntb_net_ctx *sc = if_getsoftc(ifp);
 
-	CTR0(KTR_NTB, "TX: qp_full callout");
-	ntb_start(arg);
+	ifmr->ifm_status = IFM_AVALID;
+	ifmr->ifm_active = NTB_MEDIATYPE;
+	if (ntb_transport_link_query(sc->queues[0].qp))
+		ifmr->ifm_status |= IFM_ACTIVE;
 }
 
-/* Transport Rx */
 static void
-ntb_transport_rxc_db(void *arg, int pending __unused)
+ntb_transmit_locked(struct ntb_net_queue *q)
 {
-	struct ntb_transport_qp *qp = arg;
-	ntb_q_idx_t i;
-	int rc;
-
-	/*
-	 * Limit the number of packets processed in a single interrupt to
-	 * provide fairness to others
-	 */
-	CTR0(KTR_NTB, "RX: transport_rx");
-	mtx_lock(&qp->transport->rx_lock);
-	for (i = 0; i < qp->rx_max_entry; i++) {
-		rc = ntb_process_rxc(qp);
+	if_t ifp = q->ifp;
+	struct mbuf *m;
+	int rc, len;
+	short mflags;
+
+	CTR0(KTR_NTB, "TX: ntb_transmit_locked");
+	while ((m = drbr_peek(ifp, q->br)) != NULL) {
+		CTR1(KTR_NTB, "TX: start mbuf %p", m);
+		if_etherbpfmtap(ifp, m);
+		len = m->m_pkthdr.len;
+		mflags = m->m_flags;
+		rc = ntb_transport_tx_enqueue(q->qp, m, m, len);
 		if (rc != 0) {
-			CTR0(KTR_NTB, "RX: process_rxc failed");
+			CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc);
+			if (rc == EAGAIN) {
+				drbr_putback(ifp, q->br, m);
+				callout_reset_sbt(&q->queue_full,
+				    SBT_1MS / 4, SBT_1MS / 4,
+				    ntb_qp_full, q, 0);
+			} else {
+				m_freem(m);
+				drbr_advance(ifp, q->br);
+				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+			}
 			break;
 		}
-	}
-	mtx_unlock(&qp->transport->rx_lock);
-
-	if (i == qp->rx_max_entry)
-		taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
-	else if ((ntb_db_read(qp->ntb) & (1ull << qp->qp_num)) != 0) {
-		/* If db is set, clear it and read it back to commit clear. */
-		ntb_db_clear(qp->ntb, 1ull << qp->qp_num);
-		(void)ntb_db_read(qp->ntb);
-
-		/*
-		 * An interrupt may have arrived between finishing
-		 * ntb_process_rxc and clearing the doorbell bit: there might
-		 * be some more work to do.
-		 */
-		taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
+		drbr_advance(ifp, q->br);
+		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
+		if (mflags & M_MCAST)
+			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 	}
 }
 
 static int
-ntb_process_rxc(struct ntb_transport_qp *qp)
+ntb_transmit(if_t ifp, struct mbuf *m)
 {
-	struct ntb_payload_header *hdr;
-	struct ntb_queue_entry *entry;
-	caddr_t offset;
-
-	offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index;
-	hdr = (void *)(offset + qp->rx_max_frame -
-	    sizeof(struct ntb_payload_header));
-
-	CTR1(KTR_NTB, "RX: process_rxc rx_index = %u", qp->rx_index);
-	if ((hdr->flags & IF_NTB_DESC_DONE_FLAG) == 0) {
-		CTR0(KTR_NTB, "RX: hdr not done");
-		qp->rx_ring_empty++;
-		return (EAGAIN);
-	}
-
-	if ((hdr->flags & IF_NTB_LINK_DOWN_FLAG) != 0) {
-		CTR0(KTR_NTB, "RX: link down");
-		ntb_qp_link_down(qp);
-		hdr->flags = 0;
-		return (EAGAIN);
-	}
-
-	if (hdr->ver != (uint32_t)qp->rx_pkts) {
-		CTR2(KTR_NTB,"RX: ver != rx_pkts (%x != %lx). "
-		    "Returning entry to rx_pend_q", hdr->ver, qp->rx_pkts);
-		qp->rx_err_ver++;
-		return (EIO);
-	}
-
-	entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q);
-	if (entry == NULL) {
-		qp->rx_err_no_buf++;
-		CTR0(KTR_NTB, "RX: No entries in rx_pend_q");
-		return (EAGAIN);
-	}
-	callout_stop(&qp->rx_full);
-	CTR1(KTR_NTB, "RX: rx entry %p from rx_pend_q", entry);
-
-	entry->x_hdr = hdr;
-	entry->index = qp->rx_index;
-
-	if (hdr->len > entry->len) {
-		CTR2(KTR_NTB, "RX: len too long. Wanted %ju got %ju",
-		    (uintmax_t)hdr->len, (uintmax_t)entry->len);
-		qp->rx_err_oflow++;
-
-		entry->len = -EIO;
-		entry->flags |= IF_NTB_DESC_DONE_FLAG;
+	struct ntb_net_ctx *sc = if_getsoftc(ifp);
+	struct ntb_net_queue *q;
+	int error, i;
 
-		taskqueue_enqueue(taskqueue_swi, &qp->rx_completion_task);
-	} else {
-		qp->rx_bytes += hdr->len;
-		qp->rx_pkts++;
-
-		CTR1(KTR_NTB, "RX: received %ld rx_pkts", qp->rx_pkts);
-
-		entry->len = hdr->len;
-
-		ntb_memcpy_rx(qp, entry, offset);
-	}
-
-	qp->rx_index++;
-	qp->rx_index %= qp->rx_max_entry;
+	CTR0(KTR_NTB, "TX: ntb_transmit");
+	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+		i = m->m_pkthdr.flowid % sc->num_queues;
+	else
+		i = curcpu % sc->num_queues;
+	q = &sc->queues[i];
+
+	error = drbr_enqueue(ifp, q->br, m);
+	if (error)
+		return (error);
+
+	if (mtx_trylock(&q->tx_lock)) {
+		ntb_transmit_locked(q);
+		mtx_unlock(&q->tx_lock);
+	} else
+		taskqueue_enqueue(q->tx_tq, &q->tx_task);
 	return (0);
 }
 
 static void
-ntb_memcpy_rx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry,
-    void *offset)
+ntb_handle_tx(void *arg, int pending)
 {
-	struct ifnet *ifp = entry->cb_data;
-	unsigned int len = entry->len;
-	struct mbuf *m;
-
-	CTR2(KTR_NTB, "RX: copying %d bytes from offset %p", len, offset);
-	m = m_devget(offset, len, 0, ifp, NULL);
-	m->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID;
+	struct ntb_net_queue *q = arg;
 
-	entry->buf = (void *)m;
-
-	/* Ensure that the data is globally visible before clearing the flag */
-	wmb();
-
-	CTR2(KTR_NTB, "RX: copied entry %p to mbuf %p.", entry, m);
-	ntb_rx_copy_callback(qp, entry);
+	mtx_lock(&q->tx_lock);
+	ntb_transmit_locked(q);
+	mtx_unlock(&q->tx_lock);
 }
 
-static inline void
-ntb_rx_copy_callback(struct ntb_transport_qp *qp, void *data)
+static void
+ntb_qp_full(void *arg)
 {
-	struct ntb_queue_entry *entry;
+	struct ntb_net_queue *q = arg;
 
-	entry = data;
-	entry->flags |= IF_NTB_DESC_DONE_FLAG;
-	taskqueue_enqueue(taskqueue_swi, &qp->rx_completion_task);
+	CTR0(KTR_NTB, "TX: qp_full callout");
+	if (ntb_transport_tx_free_entry(q->qp) > 0)
+		taskqueue_enqueue(q->tx_tq, &q->tx_task);
+	else
+		callout_schedule_sbt(&q->queue_full,
+		    SBT_1MS / 4, SBT_1MS / 4, 0);
 }
 
 static void
-ntb_complete_rxc(void *arg, int pending)
+ntb_qflush(if_t ifp)
 {
-	struct ntb_transport_qp *qp = arg;
-	struct ntb_queue_entry *entry;
+	struct ntb_net_ctx *sc = if_getsoftc(ifp);
+	struct ntb_net_queue *q;
 	struct mbuf *m;
-	unsigned len;
-
-	CTR0(KTR_NTB, "RX: rx_completion_task");
-
-	mtx_lock_spin(&qp->ntb_rx_q_lock);
-
-	while (!STAILQ_EMPTY(&qp->rx_post_q)) {
-		entry = STAILQ_FIRST(&qp->rx_post_q);
-		if ((entry->flags & IF_NTB_DESC_DONE_FLAG) == 0)
-			break;
-
-		entry->x_hdr->flags = 0;
-		iowrite32(entry->index, &qp->rx_info->entry);
-
-		STAILQ_REMOVE_HEAD(&qp->rx_post_q, entry);
-
-		len = entry->len;
-		m = entry->buf;
-
-		/*
-		 * Re-initialize queue_entry for reuse; rx_handler takes
-		 * ownership of the mbuf.
-		 */
-		entry->buf = NULL;
-		entry->len = transport_mtu;
-		entry->cb_data = qp->transport->ifp;
-
-		STAILQ_INSERT_TAIL(&qp->rx_pend_q, entry, entry);
-
-		mtx_unlock_spin(&qp->ntb_rx_q_lock);
+	int i;
 
-		CTR2(KTR_NTB, "RX: completing entry %p, mbuf %p", entry, m);
-		if (qp->rx_handler != NULL && qp->client_ready)
-			qp->rx_handler(qp, qp->cb_data, m, len);
-		else
+	for (i = 0; i < sc->num_queues; i++) {
+		q = &sc->queues[i];
+		mtx_lock(&q->tx_lock);
+		while ((m = buf_ring_dequeue_sc(q->br)) != NULL)
 			m_freem(m);
-
-		mtx_lock_spin(&qp->ntb_rx_q_lock);
-	}
-
-	mtx_unlock_spin(&qp->ntb_rx_q_lock);
-}
-
-static void
-ntb_transport_doorbell_callback(void *data, uint32_t vector)
-{
-	struct ntb_transport_ctx *nt = data;
-	struct ntb_transport_qp *qp;
-	struct _qpset db_bits;
-	uint64_t vec_mask;
-	unsigned qp_num;
-
-	BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &db_bits);
-	BIT_NAND(QP_SETSIZE, &db_bits, &nt->qp_bitmap_free);
-
-	vec_mask = ntb_db_vector_mask(nt->ntb, vector);
-	while (vec_mask != 0) {
-		qp_num = ffsll(vec_mask) - 1;
-
-		if (test_bit(qp_num, &db_bits)) {
-			qp = &nt->qp_vec[qp_num];
-			taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
-		}
-
-		vec_mask &= ~(1ull << qp_num);
-	}
-}
-
-/* Link Event handler */
-static void
-ntb_transport_event_callback(void *data)
-{
-	struct ntb_transport_ctx *nt = data;
-
-	if (ntb_link_is_up(nt->ntb, NULL, NULL)) {
-		ntb_printf(1, "HW link up\n");
-		callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt);
-	} else {
-		ntb_printf(1, "HW link down\n");
-		taskqueue_enqueue(taskqueue_swi, &nt->link_cleanup);
+		mtx_unlock(&q->tx_lock);
 	}
+	if_qflush(ifp);
 }
 
-/* Link bring up */
+/* Network Device Callbacks */
 static void
-ntb_transport_link_work(void *arg)
-{
-	struct ntb_transport_ctx *nt = arg;
-	struct ntb_softc *ntb = nt->ntb;
-	struct ntb_transport_qp *qp;
-	uint64_t val64, size;
-	uint32_t val;
-	unsigned i;
-	int rc;
-
-	/* send the local info, in the opposite order of the way we read it */
-	for (i = 0; i < nt->mw_count; i++) {
-		size = nt->mw_vec[i].phys_size;
-
-		if (max_mw_size != 0 && size > max_mw_size)
-			size = max_mw_size;
-
-		ntb_peer_spad_write(ntb, IF_NTB_MW0_SZ_HIGH + (i * 2),
-		    size >> 32);
-		ntb_peer_spad_write(ntb, IF_NTB_MW0_SZ_LOW + (i * 2), size);
-	}
-
-	ntb_peer_spad_write(ntb, IF_NTB_NUM_MWS, nt->mw_count);
-
-	ntb_peer_spad_write(ntb, IF_NTB_NUM_QPS, nt->qp_count);
-
-	ntb_peer_spad_write(ntb, IF_NTB_VERSION, NTB_TRANSPORT_VERSION);
-
-	/* Query the remote side for its info */
-	val = 0;
-	ntb_spad_read(ntb, IF_NTB_VERSION, &val);
-	if (val != NTB_TRANSPORT_VERSION)
-		goto out;
-
-	ntb_spad_read(ntb, IF_NTB_NUM_QPS, &val);
-	if (val != nt->qp_count)
-		goto out;
-
-	ntb_spad_read(ntb, IF_NTB_NUM_MWS, &val);
-	if (val != nt->mw_count)
-		goto out;
-
-	for (i = 0; i < nt->mw_count; i++) {
-		ntb_spad_read(ntb, IF_NTB_MW0_SZ_HIGH + (i * 2), &val);
-		val64 = (uint64_t)val << 32;
-
-		ntb_spad_read(ntb, IF_NTB_MW0_SZ_LOW + (i * 2), &val);
-		val64 |= val;
-
-		rc = ntb_set_mw(nt, i, val64);
-		if (rc != 0)
-			goto free_mws;
-	}
-
-	nt->link_is_up = true;
-	ntb_printf(1, "transport link up\n");
-
-	for (i = 0; i < nt->qp_count; i++) {
-		qp = &nt->qp_vec[i];
-
-		ntb_transport_setup_qp_mw(nt, i);
-
-		if (qp->client_ready)
-			callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
-	}
-
-	return;
-
-free_mws:
-	for (i = 0; i < nt->mw_count; i++)
-		ntb_free_mw(nt, i);
-out:
-	if (ntb_link_is_up(ntb, NULL, NULL))
-		callout_reset(&nt->link_work,
-		    NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_transport_link_work, nt);
-}
-
-static int
-ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, size_t size)
+ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
+    int len)
 {
-	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
-	size_t xlat_size, buff_size;
-	int rc;
-
-	if (size == 0)
-		return (EINVAL);
-
-	xlat_size = roundup(size, mw->xlat_align_size);
-	buff_size = xlat_size;
-
-	/* No need to re-setup */
-	if (mw->xlat_size == xlat_size)
-		return (0);
-
-	if (mw->buff_size != 0)
-		ntb_free_mw(nt, num_mw);
-
-	/* Alloc memory for receiving data.  Must be aligned */
-	mw->xlat_size = xlat_size;
-	mw->buff_size = buff_size;
-
-	mw->virt_addr = contigmalloc(mw->buff_size, M_NTB_IF, M_ZERO, 0,
-	    mw->addr_limit, mw->xlat_align, 0);
-	if (mw->virt_addr == NULL) {
-		ntb_printf(0, "Unable to allocate MW buffer of size %zu/%zu\n",
-		    mw->buff_size, mw->xlat_size);
-		mw->xlat_size = 0;
-		mw->buff_size = 0;
-		return (ENOMEM);
-	}
-	/* TODO: replace with bus_space_* functions */
-	mw->dma_addr = vtophys(mw->virt_addr);
-
-	/*
-	 * Ensure that the allocation from contigmalloc is aligned as
-	 * requested.  XXX: This may not be needed -- brought in for parity
-	 * with the Linux driver.
-	 */
-	if (mw->dma_addr % mw->xlat_align != 0) {
-		ntb_printf(0,
-		    "DMA memory 0x%jx not aligned to BAR size 0x%zx\n",
-		    (uintmax_t)mw->dma_addr, size);
-		ntb_free_mw(nt, num_mw);
-		return (ENOMEM);
-	}
-
-	/* Notify HW the memory location of the receive buffer */
-	rc = ntb_mw_set_trans(nt->ntb, num_mw, mw->dma_addr, mw->xlat_size);
-	if (rc) {
-		ntb_printf(0, "Unable to set mw%d translation\n", num_mw);
-		ntb_free_mw(nt, num_mw);
-		return (rc);
-	}
 
-	return (0);
+	m_freem(data);
+	CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data);
 }
 
 static void
-ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
+ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
+    int len)
 {
-	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
+	struct ntb_net_queue *q = qp_data;
+	struct ntb_net_ctx *sc = q->sc;
+	struct mbuf *m = data;
+	if_t ifp = q->ifp;
+	uint16_t proto;
 
-	if (mw->virt_addr == NULL)
+	CTR1(KTR_NTB, "RX: rx handler (%d)", len);
+	if (len < 0) {
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		return;
-
-	ntb_mw_clear_trans(nt->ntb, num_mw);
-	contigfree(mw->virt_addr, mw->xlat_size, M_NTB_IF);
-	mw->xlat_size = 0;
-	mw->buff_size = 0;
-	mw->virt_addr = NULL;
-}
-
-static int
-ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, unsigned int qp_num)
-{
-	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
-	struct ntb_transport_mw *mw;
-	void *offset;
-	ntb_q_idx_t i;
-	size_t rx_size;
-	unsigned num_qps_mw, mw_num, mw_count;
-
-	mw_count = nt->mw_count;
-	mw_num = QP_TO_MW(nt, qp_num);
-	mw = &nt->mw_vec[mw_num];
-
-	if (mw->virt_addr == NULL)
-		return (ENOMEM);
-
-	if (nt->qp_count % mw_count && mw_num + 1 < nt->qp_count / mw_count)
-		num_qps_mw = nt->qp_count / mw_count + 1;
-	else
-		num_qps_mw = nt->qp_count / mw_count;
-
-	rx_size = mw->xlat_size / num_qps_mw;
-	qp->rx_buff = mw->virt_addr + rx_size * (qp_num / mw_count);
-	rx_size -= sizeof(struct ntb_rx_info);
-
-	qp->remote_rx_info = (void*)(qp->rx_buff + rx_size);
-
-	/* Due to house-keeping, there must be at least 2 buffs */
-	qp->rx_max_frame = qmin(rx_size / 2,
-	    transport_mtu + sizeof(struct ntb_payload_header));
-	qp->rx_max_entry = rx_size / qp->rx_max_frame;
-	qp->rx_index = 0;
-
-	qp->remote_rx_info->entry = qp->rx_max_entry - 1;
-
-	/* Set up the hdr offsets with 0s */
-	for (i = 0; i < qp->rx_max_entry; i++) {
-		offset = (void *)(qp->rx_buff + qp->rx_max_frame * (i + 1) -
-		    sizeof(struct ntb_payload_header));
-		memset(offset, 0, sizeof(struct ntb_payload_header));
 	}
 
-	qp->rx_pkts = 0;
-	qp->tx_pkts = 0;
-	qp->tx_index = 0;
-
-	return (0);
-}
-
-static void
-ntb_qp_link_work(void *arg)
-{
-	struct ntb_transport_qp *qp = arg;
-	struct ntb_softc *ntb = qp->ntb;
-	struct ntb_transport_ctx *nt = qp->transport;
-	uint32_t val, dummy;
-
-	ntb_spad_read(ntb, IF_NTB_QP_LINKS, &val);
-
-	ntb_peer_spad_write(ntb, IF_NTB_QP_LINKS, val | (1ull << qp->qp_num));
-
-	/* query remote spad for qp ready bits */
-	ntb_peer_spad_read(ntb, IF_NTB_QP_LINKS, &dummy);
-
-	/* See if the remote side is up */
-	if ((val & (1ull << qp->qp_num)) != 0) {
-		ntb_printf(2, "qp link up\n");
-		qp->link_is_up = true;
-
-		if (qp->event_handler != NULL)
-			qp->event_handler(qp->cb_data, NTB_LINK_UP);
-
-		taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
-	} else if (nt->link_is_up)
-		callout_reset(&qp->link_work,
-		    NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp);
-}
-
-/* Link down event*/
-static void
-ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
-{
-	struct ntb_transport_qp *qp;
-	struct _qpset qp_bitmap_alloc;
-	unsigned i;
-
-	BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &qp_bitmap_alloc);
-	BIT_NAND(QP_SETSIZE, &qp_bitmap_alloc, &nt->qp_bitmap_free);
-
-	/* Pass along the info to any clients */
-	for (i = 0; i < nt->qp_count; i++)
-		if (test_bit(i, &qp_bitmap_alloc)) {
-			qp = &nt->qp_vec[i];
-			ntb_qp_link_cleanup(qp);
-			callout_drain(&qp->link_work);
-		}
-
-	if (!nt->link_is_up)
-		callout_drain(&nt->link_work);
-
-	/*
-	 * The scratchpad registers keep the values if the remote side
-	 * goes down, blast them now to give them a sane value the next
-	 * time they are accessed
-	 */
-	for (i = 0; i < IF_NTB_MAX_SPAD; i++)
-		ntb_spad_write(nt->ntb, i, 0);
-}
-
-static void
-ntb_transport_link_cleanup_work(void *arg, int pending __unused)
-{
-
-	ntb_transport_link_cleanup(arg);
-}
-
-static void
-ntb_qp_link_down(struct ntb_transport_qp *qp)
-{
-
-	ntb_qp_link_cleanup(qp);
-}
-
-static void
-ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
-{
-
-	qp->link_is_up = false;
-
-	qp->tx_index = qp->rx_index = 0;
-	qp->tx_bytes = qp->rx_bytes = 0;
-	qp->tx_pkts = qp->rx_pkts = 0;
-
-	qp->rx_ring_empty = 0;
-	qp->tx_ring_full = 0;
-
-	qp->rx_err_no_buf = qp->tx_err_no_buf = 0;
-	qp->rx_err_oflow = qp->rx_err_ver = 0;
-}
-
-static void
-ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
-{
-	struct ntb_transport_ctx *nt = qp->transport;
-
-	callout_drain(&qp->link_work);
-	ntb_qp_link_down_reset(qp);
-
-	if (qp->event_handler != NULL)
-		qp->event_handler(qp->cb_data, NTB_LINK_DOWN);
-
-	if (nt->link_is_up)
-		callout_reset(&qp->link_work,
-		    NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp);
-}
-
-/* Link commanded down */
-/**
- * ntb_transport_link_down - Notify NTB transport to no longer enqueue data
- * @qp: NTB transport layer queue to be disabled
- *
- * Notify NTB transport layer of client's desire to no longer receive data on
- * transport queue specified.  It is the client's responsibility to ensure all
- * entries on queue are purged or otherwise handled appropriately.
- */
-static void
-ntb_transport_link_down(struct ntb_transport_qp *qp)
-{
-	uint32_t val;
-
-	if (qp == NULL)
-		return;
-
-	qp->client_ready = false;
-
-	ntb_spad_read(qp->ntb, IF_NTB_QP_LINKS, &val);
-
-	ntb_peer_spad_write(qp->ntb, IF_NTB_QP_LINKS,
-	   val & ~(1 << qp->qp_num));
-
-	if (qp->link_is_up)
-		ntb_send_link_down(qp);
-	else
-		callout_drain(&qp->link_work);
-}
-
-static void
-ntb_send_link_down(struct ntb_transport_qp *qp)
-{
-	struct ntb_queue_entry *entry;
-	int i, rc;
-
-	if (!qp->link_is_up)
-		return;
-
-	for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) {
-		entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
-		if (entry != NULL)
+	m->m_pkthdr.rcvif = ifp;
+	if (sc->num_queues > 1) {
+		m->m_pkthdr.flowid = q - sc->queues;
+		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
+	}
+	if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
+		m_copydata(m, 12, 2, (void *)&proto);
+		switch (ntohs(proto)) {
+		case ETHERTYPE_IP:
+			if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
+				m->m_pkthdr.csum_data = 0xffff;
+				m->m_pkthdr.csum_flags = NTB_CSUM_SET;
+			}
+			break;
+		case ETHERTYPE_IPV6:
+			if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) {
+				m->m_pkthdr.csum_data = 0xffff;
+				m->m_pkthdr.csum_flags = NTB_CSUM_SET;
+			}
 			break;
-		pause("NTB Wait for link down", hz / 10);
+		}
 	}
-
-	if (entry == NULL)
-		return;
-
-	entry->cb_data = NULL;
-	entry->buf = NULL;
-	entry->len = 0;
-	entry->flags = IF_NTB_LINK_DOWN_FLAG;
-
-	mtx_lock(&qp->transport->tx_lock);
-	rc = ntb_process_tx(qp, entry);
-	if (rc != 0)
-		printf("ntb: Failed to send link down\n");
-	mtx_unlock(&qp->transport->tx_lock);
-
-	ntb_qp_link_down_reset(qp);
+	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+	if_input(ifp, m);
 }
 
-
-/* List Management */
-
 static void
-ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
-    struct ntb_queue_list *list)
-{
-
-	mtx_lock_spin(lock);
-	STAILQ_INSERT_TAIL(list, entry, entry);
-	mtx_unlock_spin(lock);
-}
-
-static struct ntb_queue_entry *
-ntb_list_rm(struct mtx *lock, struct ntb_queue_list *list)
-{
-	struct ntb_queue_entry *entry;
-
-	mtx_lock_spin(lock);
-	if (STAILQ_EMPTY(list)) {
-		entry = NULL;
-		goto out;
-	}
-	entry = STAILQ_FIRST(list);
-	STAILQ_REMOVE_HEAD(list, entry);
-out:
-	mtx_unlock_spin(lock);
-
-	return (entry);
-}
-
-static struct ntb_queue_entry *
-ntb_list_mv(struct mtx *lock, struct ntb_queue_list *from,
-    struct ntb_queue_list *to)
+ntb_net_event_handler(void *data, enum ntb_link_event status)
 {
-	struct ntb_queue_entry *entry;
+	struct ntb_net_queue *q = data;
+	int new_state;
 
-	mtx_lock_spin(lock);
-	if (STAILQ_EMPTY(from)) {
-		entry = NULL;
-		goto out;
+	switch (status) {
+	case NTB_LINK_DOWN:
+		new_state = LINK_STATE_DOWN;
+		break;
+	case NTB_LINK_UP:
+		new_state = LINK_STATE_UP;
+		break;
+	default:
+		new_state = LINK_STATE_UNKNOWN;
+		break;
 	}
-	entry = STAILQ_FIRST(from);
-	STAILQ_REMOVE_HEAD(from, entry);
-	STAILQ_INSERT_TAIL(to, entry, entry);
-
-out:
-	mtx_unlock_spin(lock);
-	return (entry);
+	if_link_state_change(q->ifp, new_state);
 }
 
 /* Helper functions */
@@ -1688,27 +493,24 @@ static void
 create_random_local_eui48(u_char *eaddr)
 {
 	static uint8_t counter = 0;
-	uint32_t seed = ticks;
 
 	eaddr[0] = EUI48_LOCALLY_ADMINISTERED;
-	memcpy(&eaddr[1], &seed, sizeof(uint32_t));
+	arc4rand(&eaddr[1], 4, 0);
 	eaddr[5] = counter++;
 }
 
-/**
- * ntb_transport_max_size - Query the max payload size of a qp
- * @qp: NTB transport layer queue to be queried
- *
- * Query the maximum payload size permissible on the given qp
- *
- * RETURNS: the max payload size of a qp
- */
-static unsigned int
-ntb_transport_max_size(struct ntb_transport_qp *qp)
-{
-
-	if (qp == NULL)
-		return (0);
+static device_method_t ntb_net_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,     ntb_net_probe),
+	DEVMETHOD(device_attach,    ntb_net_attach),
+	DEVMETHOD(device_detach,    ntb_net_detach),
+	DEVMETHOD_END
+};
 
-	return (qp->tx_max_frame - sizeof(struct ntb_payload_header));
-}
+devclass_t ntb_net_devclass;
+static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods,
+    sizeof(struct ntb_net_ctx));
+DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, ntb_net_devclass,
+    NULL, NULL);
+MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1);
+MODULE_VERSION(if_ntb, 1);
diff --git a/sys/dev/ntb/ntb.c b/sys/dev/ntb/ntb.c
new file mode 100644
index 0000000..44c0c61
--- /dev/null
+++ b/sys/dev/ntb/ntb.c
@@ -0,0 +1,462 @@
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/rmlock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+
+#include "ntb.h"
+
+devclass_t ntb_hw_devclass;
+SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
+
+struct ntb_child {
+	device_t	dev;
+	int		enabled;
+	int		mwoff;
+	int		mwcnt;
+	int		spadoff;
+	int		spadcnt;
+	int		dboff;
+	int		dbmask;
+	void		*ctx;
+	const struct ntb_ctx_ops *ctx_ops;
+	struct rmlock	ctx_lock;
+	struct ntb_child *next;
+};
+
+int
+ntb_register_device(device_t dev)
+{
+	struct ntb_child **cpp = device_get_softc(dev);
+	struct ntb_child *nc;
+	int i, mw, mwu, mwt, spad, spadu, spadt, db, dbu, dbt;
+	char cfg[128] = "";
+	char buf[32];
+	char *n, *np, *c, *p, *name;
+
+	mwu = 0;
+	mwt = NTB_MW_COUNT(dev);
+	spadu = 0;
+	spadt = NTB_SPAD_COUNT(dev);
+	dbu = 0;
+	dbt = flsll(NTB_DB_VALID_MASK(dev));
+
+	device_printf(dev, "%d memory windows, %d scratchpads, "
+	    "%d doorbells\n", mwt, spadt, dbt);
+
+	snprintf(buf, sizeof(buf), "hint.%s.%d.config", device_get_name(dev),
+	    device_get_unit(dev));
+	TUNABLE_STR_FETCH(buf, cfg, sizeof(cfg));
+	n = cfg;
+	i = 0;
+	while ((c = strsep(&n, ",")) != NULL) {
+		np = c;
+		name = strsep(&np, ":");
+		if (name != NULL && name[0] == 0)
+			name = NULL;
+		p = strsep(&np, ":");
+		mw = (p && p[0] != 0) ? strtol(p, NULL, 10) : mwt - mwu;
+		p = strsep(&np, ":");
+		spad = (p && p[0] != 0) ? strtol(p, NULL, 10) : spadt - spadu;
+		db = (np && np[0] != 0) ? strtol(np, NULL, 10) : dbt - dbu;
+
+		if (mw > mwt - mwu || spad > spadt - spadu || db > dbt - dbu) {
+			device_printf(dev, "Not enough resources for config\n");
+			break;
+		}
+
+		nc = malloc(sizeof(*nc), M_DEVBUF, M_WAITOK | M_ZERO);
+		nc->mwoff = mwu;
+		nc->mwcnt = mw;
+		nc->spadoff = spadu;
+		nc->spadcnt = spad;
+		nc->dboff = dbu;
+		nc->dbmask = (db == 0) ? 0 : (0xffffffffffffffff >> (64 - db));
+		rm_init(&nc->ctx_lock, "ntb ctx");
+		nc->dev = device_add_child(dev, name, -1);
+		if (nc->dev == NULL) {
+			ntb_unregister_device(dev);
+			return (ENOMEM);
+		}
+		device_set_ivars(nc->dev, nc);
+		*cpp = nc;
+		cpp = &nc->next;
+
+		if (bootverbose) {
+			device_printf(dev, "%d \"%s\":", i, name);
+			if (mw > 0) {
+				printf(" memory windows %d", mwu);
+				if (mw > 1)
+					printf("-%d", mwu + mw - 1);
+			}
+			if (spad > 0) {
+				printf(" scratchpads %d", spadu);
+				if (spad > 1)
+					printf("-%d", spadu + spad - 1);
+			}
+			if (db > 0) {
+				printf(" doorbells %d", dbu);
+				if (db > 1)
+					printf("-%d", dbu + db - 1);
+			}
+			printf("\n");
+		}
+
+		mwu += mw;
+		spadu += spad;
+		dbu += db;
+		i++;
+	}
+
+	bus_generic_attach(dev);
+	return (0);
+}
+
+int
+ntb_unregister_device(device_t dev)
+{
+	struct ntb_child **cpp = device_get_softc(dev);
+	struct ntb_child *nc;
+	int error = 0;
+
+	while ((nc = *cpp) != NULL) {
+		*cpp = (*cpp)->next;
+		error = device_delete_child(dev, nc->dev);
+		if (error)
+			break;
+		rm_destroy(&nc->ctx_lock);
+		free(nc, M_DEVBUF);
+	}
+	return (error);
+}
+
+void
+ntb_link_event(device_t dev)
+{
+	struct ntb_child **cpp = device_get_softc(dev);
+	struct ntb_child *nc;
+	struct rm_priotracker ctx_tracker;
+
+	for (nc = *cpp; nc != NULL; nc = nc->next) {
+		rm_rlock(&nc->ctx_lock, &ctx_tracker);
+		if (nc->ctx_ops != NULL && nc->ctx_ops->link_event != NULL)
+			nc->ctx_ops->link_event(nc->ctx);
+		rm_runlock(&nc->ctx_lock, &ctx_tracker);
+	}
+}
+
+void
+ntb_db_event(device_t dev, uint32_t vec)
+{
+	struct ntb_child **cpp = device_get_softc(dev);
+	struct ntb_child *nc;
+	struct rm_priotracker ctx_tracker;
+
+	for (nc = *cpp; nc != NULL; nc = nc->next) {
+		rm_rlock(&nc->ctx_lock, &ctx_tracker);
+		if (nc->ctx_ops != NULL && nc->ctx_ops->db_event != NULL)
+			nc->ctx_ops->db_event(nc->ctx, vec);
+		rm_runlock(&nc->ctx_lock, &ctx_tracker);
+	}
+}
+
+bool
+ntb_link_is_up(device_t ntb, enum ntb_speed *speed, enum ntb_width *width)
+{
+
+	return (NTB_LINK_IS_UP(device_get_parent(ntb), speed, width));
+}
+
+int
+ntb_link_enable(device_t ntb, enum ntb_speed speed, enum ntb_width width)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+	struct ntb_child **cpp = device_get_softc(device_get_parent(nc->dev));
+	struct ntb_child *nc1;
+
+	for (nc1 = *cpp; nc1 != NULL; nc1 = nc1->next) {
+		if (nc1->enabled) {
+			nc->enabled = 1;
+			return (0);
+		}
+	}
+	nc->enabled = 1;
+	return (NTB_LINK_ENABLE(device_get_parent(ntb), speed, width));
+}
+
+int
+ntb_link_disable(device_t ntb)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+	struct ntb_child **cpp = device_get_softc(device_get_parent(nc->dev));
+	struct ntb_child *nc1;
+
+	if (!nc->enabled)
+		return (0);
+	nc->enabled = 0;
+	for (nc1 = *cpp; nc1 != NULL; nc1 = nc1->next) {
+		if (nc1->enabled)
+			return (0);
+	}
+	return (NTB_LINK_DISABLE(device_get_parent(ntb)));
+}
+
+bool
+ntb_link_enabled(device_t ntb)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (nc->enabled && NTB_LINK_ENABLED(device_get_parent(ntb)));
+}
+
+int
+ntb_set_ctx(device_t ntb, void *ctx, const struct ntb_ctx_ops *ctx_ops)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	if (ctx == NULL || ctx_ops == NULL)
+		return (EINVAL);
+
+	rm_wlock(&nc->ctx_lock);
+	if (nc->ctx_ops != NULL) {
+		rm_wunlock(&nc->ctx_lock);
+		return (EINVAL);
+	}
+	nc->ctx = ctx;
+	nc->ctx_ops = ctx_ops;
+	rm_wunlock(&nc->ctx_lock);
+
+	return (0);
+}
+
+void *
+ntb_get_ctx(device_t ntb, const struct ntb_ctx_ops **ctx_ops)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	KASSERT(nc->ctx != NULL && nc->ctx_ops != NULL, ("bogus"));
+	if (ctx_ops != NULL)
+		*ctx_ops = nc->ctx_ops;
+	return (nc->ctx);
+}
+
+void
+ntb_clear_ctx(device_t ntb)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	rm_wlock(&nc->ctx_lock);
+	nc->ctx = NULL;
+	nc->ctx_ops = NULL;
+	rm_wunlock(&nc->ctx_lock);
+}
+
+uint8_t
+ntb_mw_count(device_t ntb)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (nc->mwcnt);
+}
+
+int
+ntb_mw_get_range(device_t ntb, unsigned mw_idx, vm_paddr_t *base,
+    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
+    bus_addr_t *plimit)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_MW_GET_RANGE(device_get_parent(ntb), mw_idx + nc->mwoff,
+	    base, vbase, size, align, align_size, plimit));
+}
+
+int
+ntb_mw_set_trans(device_t ntb, unsigned mw_idx, bus_addr_t addr, size_t size)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_MW_SET_TRANS(device_get_parent(ntb), mw_idx + nc->mwoff,
+	    addr, size));
+}
+
+int
+ntb_mw_clear_trans(device_t ntb, unsigned mw_idx)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_MW_CLEAR_TRANS(device_get_parent(ntb), mw_idx + nc->mwoff));
+}
+
+int
+ntb_mw_get_wc(device_t ntb, unsigned mw_idx, vm_memattr_t *mode)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_MW_GET_WC(device_get_parent(ntb), mw_idx + nc->mwoff, mode));
+}
+
+int
+ntb_mw_set_wc(device_t ntb, unsigned mw_idx, vm_memattr_t mode)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_MW_SET_WC(device_get_parent(ntb), mw_idx + nc->mwoff, mode));
+}
+
+uint8_t
+ntb_spad_count(device_t ntb)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (nc->spadcnt);
+}
+
+void
+ntb_spad_clear(device_t ntb)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+	unsigned i;
+
+	for (i = 0; i < nc->spadcnt; i++)
+		NTB_SPAD_WRITE(device_get_parent(ntb), i + nc->spadoff, 0);
+}
+
+int
+ntb_spad_write(device_t ntb, unsigned int idx, uint32_t val)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_SPAD_WRITE(device_get_parent(ntb), idx + nc->spadoff, val));
+}
+
+int
+ntb_spad_read(device_t ntb, unsigned int idx, uint32_t *val)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_SPAD_READ(device_get_parent(ntb), idx + nc->spadoff, val));
+}
+
+int
+ntb_peer_spad_write(device_t ntb, unsigned int idx, uint32_t val)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_PEER_SPAD_WRITE(device_get_parent(ntb), idx + nc->spadoff,
+	    val));
+}
+
+int
+ntb_peer_spad_read(device_t ntb, unsigned int idx, uint32_t *val)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_PEER_SPAD_READ(device_get_parent(ntb), idx + nc->spadoff,
+	    val));
+}
+
+uint64_t
+ntb_db_valid_mask(device_t ntb)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (nc->dbmask);
+}
+
+int
+ntb_db_vector_count(device_t ntb)
+{
+
+	return (NTB_DB_VECTOR_COUNT(device_get_parent(ntb)));
+}
+
+uint64_t
+ntb_db_vector_mask(device_t ntb, uint32_t vector)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return ((NTB_DB_VECTOR_MASK(device_get_parent(ntb), vector)
+	    >> nc->dboff) & nc->dbmask);
+}
+
+int
+ntb_peer_db_addr(device_t ntb, bus_addr_t *db_addr, vm_size_t *db_size)
+{
+
+	return (NTB_PEER_DB_ADDR(device_get_parent(ntb), db_addr, db_size));
+}
+
+void
+ntb_db_clear(device_t ntb, uint64_t bits)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_DB_CLEAR(device_get_parent(ntb), bits << nc->dboff));
+}
+
+void
+ntb_db_clear_mask(device_t ntb, uint64_t bits)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_DB_CLEAR_MASK(device_get_parent(ntb), bits << nc->dboff));
+}
+
+uint64_t
+ntb_db_read(device_t ntb)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return ((NTB_DB_READ(device_get_parent(ntb)) >> nc->dboff)
+	    & nc->dbmask);
+}
+
+void
+ntb_db_set_mask(device_t ntb, uint64_t bits)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_DB_SET_MASK(device_get_parent(ntb), bits << nc->dboff));
+}
+
+void
+ntb_peer_db_set(device_t ntb, uint64_t bits)
+{
+	struct ntb_child *nc = device_get_ivars(ntb);
+
+	return (NTB_PEER_DB_SET(device_get_parent(ntb), bits << nc->dboff));
+}
+
+MODULE_VERSION(ntb, 1);
diff --git a/sys/dev/ntb/ntb.h b/sys/dev/ntb/ntb.h
new file mode 100644
index 0000000..8593c65
--- /dev/null
+++ b/sys/dev/ntb/ntb.h
@@ -0,0 +1,409 @@
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NTB_H_
+#define _NTB_H_
+
+#include "ntb_if.h"
+
+extern devclass_t ntb_hw_devclass;
+SYSCTL_DECL(_hw_ntb);
+
+int ntb_register_device(device_t ntb);
+int ntb_unregister_device(device_t ntb);
+
+/*
+ * ntb_link_event() - notify driver context of a change in link status
+ * @ntb:        NTB device context
+ *
+ * Notify the driver context that the link status may have changed.  The driver
+ * should call intb_link_is_up() to get the current status.
+ */
+void ntb_link_event(device_t ntb);
+
+/*
+ * ntb_db_event() - notify driver context of a doorbell event
+ * @ntb:        NTB device context
+ * @vector:     Interrupt vector number
+ *
+ * Notify the driver context of a doorbell event.  If hardware supports
+ * multiple interrupt vectors for doorbells, the vector number indicates which
+ * vector received the interrupt.  The vector number is relative to the first
+ * vector used for doorbells, starting at zero, and must be less than
+ * ntb_db_vector_count().  The driver may call ntb_db_read() to check which
+ * doorbell bits need service, and ntb_db_vector_mask() to determine which of
+ * those bits are associated with the vector number.
+ */
+void ntb_db_event(device_t ntb, uint32_t vec);
+
+/*
+ * ntb_link_is_up() - get the current ntb link state
+ * @ntb:        NTB device context
+ * @speed:      OUT - The link speed expressed as PCIe generation number
+ * @width:      OUT - The link width expressed as the number of PCIe lanes
+ *
+ * RETURNS: true or false based on the hardware link state
+ */
+bool ntb_link_is_up(device_t ntb, enum ntb_speed *speed, enum ntb_width *width);
+
+/*
+ * ntb_link_enable() - enable the link on the secondary side of the ntb
+ * @ntb:        NTB device context
+ * @max_speed:  The maximum link speed expressed as PCIe generation number[0]
+ * @max_width:  The maximum link width expressed as the number of PCIe lanes[0]
+ *
+ * Enable the link on the secondary side of the ntb.  This can only be done
+ * from the primary side of the ntb in primary or b2b topology.  The ntb device
+ * should train the link to its maximum speed and width, or the requested speed
+ * and width, whichever is smaller, if supported.
+ *
+ * Return: Zero on success, otherwise an error number.
+ *
+ * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed
+ *      and width input will be ignored.
+ */
+int ntb_link_enable(device_t ntb, enum ntb_speed speed, enum ntb_width width);
+
+/*
+ * ntb_link_disable() - disable the link on the secondary side of the ntb
+ * @ntb:        NTB device context
+ *
+ * Disable the link on the secondary side of the ntb.  This can only be done
+ * from the primary side of the ntb in primary or b2b topology.  The ntb device
+ * should disable the link.  Returning from this call must indicate that a
+ * barrier has passed, though with no more writes may pass in either direction
+ * across the link, except if this call returns an error number.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+int ntb_link_disable(device_t ntb);
+
+/*
+ * get enable status of the link on the secondary side of the ntb
+ */
+bool ntb_link_enabled(device_t ntb);
+
+/*
+ * ntb_set_ctx() - associate a driver context with an ntb device
+ * @ntb:        NTB device context
+ * @ctx:        Driver context
+ * @ctx_ops:    Driver context operations
+ *
+ * Associate a driver context and operations with a ntb device.  The context is
+ * provided by the client driver, and the driver may associate a different
+ * context with each ntb device.
+ *
+ * Return: Zero if the context is associated, otherwise an error number.
+ */
+int ntb_set_ctx(device_t ntb, void *ctx, const struct ntb_ctx_ops *ctx_ops);
+
+/*
+ * ntb_set_ctx() - get a driver context associated with an ntb device
+ * @ntb:        NTB device context
+ * @ctx_ops:    Driver context operations
+ *
+ * Get a driver context and operations associated with a ntb device.
+ */
+void * ntb_get_ctx(device_t ntb, const struct ntb_ctx_ops **ctx_ops);
+
+/*
+ * ntb_clear_ctx() - disassociate any driver context from an ntb device
+ * @ntb:        NTB device context
+ *
+ * Clear any association that may exist between a driver context and the ntb
+ * device.
+ */
+void ntb_clear_ctx(device_t ntb);
+
+/*
+ * ntb_mw_count() - Get the number of memory windows available for KPI
+ * consumers.
+ *
+ * (Excludes any MW wholly reserved for register access.)
+ */
+uint8_t ntb_mw_count(device_t ntb);
+
+/*
+ * ntb_mw_get_range() - get the range of a memory window
+ * @ntb:        NTB device context
+ * @idx:        Memory window number
+ * @base:       OUT - the base address for mapping the memory window
+ * @size:       OUT - the size for mapping the memory window
+ * @align:      OUT - the base alignment for translating the memory window
+ * @align_size: OUT - the size alignment for translating the memory window
+ *
+ * Get the range of a memory window.  NULL may be given for any output
+ * parameter if the value is not needed.  The base and size may be used for
+ * mapping the memory window, to access the peer memory.  The alignment and
+ * size may be used for translating the memory window, for the peer to access
+ * memory on the local system.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+int ntb_mw_get_range(device_t ntb, unsigned mw_idx, vm_paddr_t *base,
+    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
+    bus_addr_t *plimit);
+
+/*
+ * ntb_mw_set_trans() - set the translation of a memory window
+ * @ntb:        NTB device context
+ * @idx:        Memory window number
+ * @addr:       The dma address local memory to expose to the peer
+ * @size:       The size of the local memory to expose to the peer
+ *
+ * Set the translation of a memory window.  The peer may access local memory
+ * through the window starting at the address, up to the size.  The address
+ * must be aligned to the alignment specified by ntb_mw_get_range().  The size
+ * must be aligned to the size alignment specified by ntb_mw_get_range().  The
+ * address must be below the plimit specified by ntb_mw_get_range() (i.e. for
+ * 32-bit BARs).
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+int ntb_mw_set_trans(device_t ntb, unsigned mw_idx, bus_addr_t addr,
+    size_t size);
+
+/*
+ * ntb_mw_clear_trans() - clear the translation of a memory window
+ * @ntb:	NTB device context
+ * @idx:	Memory window number
+ *
+ * Clear the translation of a memory window.  The peer may no longer access
+ * local memory through the window.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+int ntb_mw_clear_trans(device_t ntb, unsigned mw_idx);
+
+/*
+ * ntb_mw_get_wc - Get the write-combine status of a memory window
+ *
+ * Returns:  Zero on success, setting *wc; otherwise an error number (e.g. if
+ * idx is an invalid memory window).
+ *
+ * Mode is a VM_MEMATTR_* type.
+ */
+int ntb_mw_get_wc(device_t ntb, unsigned mw_idx, vm_memattr_t *mode);
+
+/*
+ * ntb_mw_set_wc - Set the write-combine status of a memory window
+ *
+ * If 'mode' matches the current status, this does nothing and succeeds.  Mode
+ * is a VM_MEMATTR_* type.
+ *
+ * Returns:  Zero on success, setting the caching attribute on the virtual
+ * mapping of the BAR; otherwise an error number (e.g. if idx is an invalid
+ * memory window, or if changing the caching attribute fails).
+ */
+int ntb_mw_set_wc(device_t ntb, unsigned mw_idx, vm_memattr_t mode);
+
+/*
+ * ntb_spad_count() - get the total scratch regs usable
+ * @ntb: pointer to ntb_softc instance
+ *
+ * This function returns the max 32bit scratchpad registers usable by the
+ * upper layer.
+ *
+ * RETURNS: total number of scratch pad registers available
+ */
+uint8_t ntb_spad_count(device_t ntb);
+
+/*
+ * ntb_get_max_spads() - zero local scratch registers
+ * @ntb: pointer to ntb_softc instance
+ *
+ * This functions overwrites all local scratchpad registers with zeroes.
+ */
+void ntb_spad_clear(device_t ntb);
+
+/*
+ * ntb_spad_write() - write to the secondary scratchpad register
+ * @ntb: pointer to ntb_softc instance
+ * @idx: index to the scratchpad register, 0 based
+ * @val: the data value to put into the register
+ *
+ * This function allows writing of a 32bit value to the indexed scratchpad
+ * register. The register resides on the secondary (external) side.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int ntb_spad_write(device_t ntb, unsigned int idx, uint32_t val);
+
+/*
+ * ntb_spad_read() - read from the primary scratchpad register
+ * @ntb: pointer to ntb_softc instance
+ * @idx: index to scratchpad register, 0 based
+ * @val: pointer to 32bit integer for storing the register value
+ *
+ * This function allows reading of the 32bit scratchpad register on
+ * the primary (internal) side.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int ntb_spad_read(device_t ntb, unsigned int idx, uint32_t *val);
+
+/*
+ * ntb_peer_spad_write() - write to the secondary scratchpad register
+ * @ntb: pointer to ntb_softc instance
+ * @idx: index to the scratchpad register, 0 based
+ * @val: the data value to put into the register
+ *
+ * This function allows writing of a 32bit value to the indexed scratchpad
+ * register. The register resides on the secondary (external) side.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int ntb_peer_spad_write(device_t ntb, unsigned int idx, uint32_t val);
+
+/*
+ * ntb_peer_spad_read() - read from the primary scratchpad register
+ * @ntb: pointer to ntb_softc instance
+ * @idx: index to scratchpad register, 0 based
+ * @val: pointer to 32bit integer for storing the register value
+ *
+ * This function allows reading of the 32bit scratchpad register on
+ * the primary (internal) side.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int ntb_peer_spad_read(device_t ntb, unsigned int idx, uint32_t *val);
+
+/*
+ * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
+ * @ntb:	NTB device context
+ *
+ * Hardware may support different number or arrangement of doorbell bits.
+ *
+ * Return: A mask of doorbell bits supported by the ntb.
+ */
+uint64_t ntb_db_valid_mask(device_t ntb);
+
+/*
+ * ntb_db_vector_count() - get the number of doorbell interrupt vectors
+ * @ntb:	NTB device context.
+ *
+ * Hardware may support different number of interrupt vectors.
+ *
+ * Return: The number of doorbell interrupt vectors.
+ */
+int ntb_db_vector_count(device_t ntb);
+
+/*
+ * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector
+ * @ntb:	NTB device context
+ * @vector:	Doorbell vector number
+ *
+ * Each interrupt vector may have a different number or arrangement of bits.
+ *
+ * Return: A mask of doorbell bits serviced by a vector.
+ */
+uint64_t ntb_db_vector_mask(device_t ntb, uint32_t vector);
+
+/*
+ * ntb_peer_db_addr() - address and size of the peer doorbell register
+ * @ntb:	NTB device context.
+ * @db_addr:	OUT - The address of the peer doorbell register.
+ * @db_size:	OUT - The number of bytes to write the peer doorbell register.
+ *
+ * Return the address of the peer doorbell register.  This may be used, for
+ * example, by drivers that offload memory copy operations to a dma engine.
+ * The drivers may wish to ring the peer doorbell at the completion of memory
+ * copy operations.  For efficiency, and to simplify ordering of operations
+ * between the dma memory copies and the ringing doorbell, the driver may
+ * append one additional dma memory copy with the doorbell register as the
+ * destination, after the memory copy operations.
+ *
+ * Return: Zero on success, otherwise an error number.
+ *
+ * Note that writing the peer doorbell via a memory window will *not* generate
+ * an interrupt on the remote host; that must be done separately.
+ */
+int ntb_peer_db_addr(device_t ntb, bus_addr_t *db_addr, vm_size_t *db_size);
+
+/*
+ * ntb_db_clear() - clear bits in the local doorbell register
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell bits to clear.
+ *
+ * Clear bits in the local doorbell register, arming the bits for the next
+ * doorbell.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+void ntb_db_clear(device_t ntb, uint64_t bits);
+
+/*
+ * ntb_db_clear_mask() - clear bits in the local doorbell mask
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell bits to clear.
+ *
+ * Clear bits in the local doorbell mask register, allowing doorbell interrupts
+ * from being generated for those doorbell bits.  If a doorbell bit is already
+ * set at the time the mask is cleared, and the corresponding mask bit is
+ * changed from set to clear, then the ntb driver must ensure that
+ * ntb_db_event() is called.  If the hardware does not generate the interrupt
+ * on clearing the mask bit, then the driver must call ntb_db_event() anyway.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+void ntb_db_clear_mask(device_t ntb, uint64_t bits);
+
+/*
+ * ntb_db_read() - read the local doorbell register
+ * @ntb:	NTB device context.
+ *
+ * Read the local doorbell register, and return the bits that are set.
+ *
+ * Return: The bits currently set in the local doorbell register.
+ */
+uint64_t ntb_db_read(device_t ntb);
+
+/*
+ * ntb_db_set_mask() - set bits in the local doorbell mask
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell mask bits to set.
+ *
+ * Set bits in the local doorbell mask register, preventing doorbell interrupts
+ * from being generated for those doorbell bits.  Bits that were already set
+ * must remain set.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+void ntb_db_set_mask(device_t ntb, uint64_t bits);
+
+/*
+ * ntb_peer_db_set() - Set the doorbell on the secondary/external side
+ * @ntb: pointer to ntb_softc instance
+ * @bit: doorbell bits to ring
+ *
+ * This function allows triggering of a doorbell on the secondary/external
+ * side that will initiate an interrupt on the remote host
+ */
+void ntb_peer_db_set(device_t ntb, uint64_t bits);
+
+#endif /* _NTB_H_ */
diff --git a/sys/dev/ntb/ntb_hw/ntb_hw.c b/sys/dev/ntb/ntb_hw/ntb_hw.c
index c1381f0..dac3699 100644
--- a/sys/dev/ntb/ntb_hw/ntb_hw.c
+++ b/sys/dev/ntb/ntb_hw/ntb_hw.c
@@ -1,4 +1,5 @@
 /*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
  * Copyright (C) 2013 Intel Corporation
  * Copyright (C) 2015 EMC Corporation
  * All rights reserved.
@@ -25,6 +26,16 @@
  * SUCH DAMAGE.
  */
 
+/*
+ * The Non-Transparent Bridge (NTB) is a device that allows you to connect
+ * two or more systems using a PCI-e links, providing remote memory access.
+ *
+ * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs.
+ *
+ * NOTE: Much of the code in this module is shared with Linux. Any patches may
+ * be picked up and redistributed in Linux with a dual GPL/BSD license.
+ */
+
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
@@ -33,6 +44,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
+#include <sys/interrupt.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
@@ -50,19 +62,7 @@ __FBSDID("$FreeBSD$");
 #include <dev/pci/pcivar.h>
 
 #include "ntb_regs.h"
-#include "ntb_hw.h"
-
-/*
- * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
- * allows you to connect two systems using a PCI-e link.
- *
- * This module contains the hardware abstraction layer for the NTB. It allows
- * you to send and receive interrupts, map the memory windows and send and
- * receive messages in the scratch-pad registers.
- *
- * NOTE: Much of the code in this module is shared with Linux. Any patches may
- * be picked up and redistributed in Linux with a dual GPL/BSD license.
- */
+#include "../ntb.h"
 
 #define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
 
@@ -70,8 +70,6 @@ __FBSDID("$FreeBSD$");
 #define ATOM_LINK_RECOVERY_TIME	500 /* ms */
 #define BAR_HIGH_MASK		(~((1ull << 12) - 1))
 
-#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
-
 #define	NTB_MSIX_VER_GUARD	0xaabbccdd
 #define	NTB_MSIX_RECEIVED	0xe0f0e0f0
 
@@ -122,8 +120,8 @@ enum {
 };
 
 /* Device features and workarounds */
-#define HAS_FEATURE(feature)	\
-	((ntb->features & (feature)) != 0)
+#define HAS_FEATURE(ntb, feature)	\
+	(((ntb)->features & (feature)) != 0)
 
 struct ntb_hw_info {
 	uint32_t		device_id;
@@ -202,6 +200,9 @@ struct ntb_msix_data {
 };
 
 struct ntb_softc {
+	/* ntb.c context. Do not move! Must go first! */
+	void			*ntb_store;
+
 	device_t		device;
 	enum ntb_device_type	type;
 	uint32_t		features;
@@ -220,13 +221,7 @@ struct ntb_softc {
 	struct callout		heartbeat_timer;
 	struct callout		lr_timer;
 
-	void			*ntb_ctx;
-	const struct ntb_ctx_ops *ctx_ops;
 	struct ntb_vec		*msix_vec;
-#define CTX_LOCK(sc)		mtx_lock(&(sc)->ctx_lock)
-#define CTX_UNLOCK(sc)		mtx_unlock(&(sc)->ctx_lock)
-#define CTX_ASSERT(sc,f)	mtx_assert(&(sc)->ctx_lock, (f))
-	struct mtx		ctx_lock;
 
 	uint32_t		ppd;
 	enum ntb_conn_type	conn_type;
@@ -258,6 +253,7 @@ struct ntb_softc {
 	uint64_t			db_valid_mask;
 	uint64_t			db_link_mask;
 	uint64_t			db_mask;
+	uint64_t			fake_db_bell;	/* NTB_SB01BASE_LOCKUP*/
 
 	int				last_ts;	/* ticks @ last irq */
 
@@ -287,61 +283,74 @@ bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
 }
 #endif
 
-#define ntb_bar_read(SIZE, bar, offset) \
+#define intel_ntb_bar_read(SIZE, bar, offset) \
 	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
 	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
-#define ntb_bar_write(SIZE, bar, offset, val) \
+#define intel_ntb_bar_write(SIZE, bar, offset, val) \
 	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
 	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
-#define ntb_reg_read(SIZE, offset) ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
-#define ntb_reg_write(SIZE, offset, val) \
-	    ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
-#define ntb_mw_read(SIZE, offset) \
-	    ntb_bar_read(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), offset)
-#define ntb_mw_write(SIZE, offset, val) \
-	    ntb_bar_write(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
+#define intel_ntb_reg_read(SIZE, offset) \
+	    intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
+#define intel_ntb_reg_write(SIZE, offset, val) \
+	    intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
+#define intel_ntb_mw_read(SIZE, offset) \
+	    intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
+		offset)
+#define intel_ntb_mw_write(SIZE, offset, val) \
+	    intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
 		offset, val)
 
-static int ntb_probe(device_t device);
-static int ntb_attach(device_t device);
-static int ntb_detach(device_t device);
-static unsigned ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
-static inline enum ntb_bar ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
+static int intel_ntb_probe(device_t device);
+static int intel_ntb_attach(device_t device);
+static int intel_ntb_detach(device_t device);
+static uint64_t intel_ntb_db_valid_mask(device_t dev);
+static void intel_ntb_spad_clear(device_t dev);
+static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector);
+static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed,
+    enum ntb_width *width);
+static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed,
+    enum ntb_width width);
+static int intel_ntb_link_disable(device_t dev);
+static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val);
+static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val);
+
+static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
+static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
 static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
 static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
     uint32_t *base, uint32_t *xlat, uint32_t *lmt);
-static int ntb_map_pci_bars(struct ntb_softc *ntb);
-static int ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
+static int intel_ntb_map_pci_bars(struct ntb_softc *ntb);
+static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
     vm_memattr_t);
 static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
     const char *);
 static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
 static int map_memory_window_bar(struct ntb_softc *ntb,
     struct ntb_pci_bar_info *bar);
-static void ntb_unmap_pci_bar(struct ntb_softc *ntb);
-static int ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
-static int ntb_init_isr(struct ntb_softc *ntb);
-static int ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
-static int ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
-static void ntb_teardown_interrupts(struct ntb_softc *ntb);
-static inline uint64_t ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
-static void ntb_interrupt(struct ntb_softc *, uint32_t vec);
+static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb);
+static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
+static int intel_ntb_init_isr(struct ntb_softc *ntb);
+static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
+static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
+static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb);
+static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
+static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec);
 static void ndev_vec_isr(void *arg);
 static void ndev_irq_isr(void *arg);
 static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
 static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
 static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
-static int ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
-static void ntb_free_msix_vec(struct ntb_softc *ntb);
-static void ntb_get_msix_info(struct ntb_softc *ntb);
-static void ntb_exchange_msix(void *);
-static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
-static void ntb_detect_max_mw(struct ntb_softc *ntb);
-static int ntb_detect_xeon(struct ntb_softc *ntb);
-static int ntb_detect_atom(struct ntb_softc *ntb);
-static int ntb_xeon_init_dev(struct ntb_softc *ntb);
-static int ntb_atom_init_dev(struct ntb_softc *ntb);
-static void ntb_teardown_xeon(struct ntb_softc *ntb);
+static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
+static void intel_ntb_free_msix_vec(struct ntb_softc *ntb);
+static void intel_ntb_get_msix_info(struct ntb_softc *ntb);
+static void intel_ntb_exchange_msix(void *);
+static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id);
+static void intel_ntb_detect_max_mw(struct ntb_softc *ntb);
+static int intel_ntb_detect_xeon(struct ntb_softc *ntb);
+static int intel_ntb_detect_atom(struct ntb_softc *ntb);
+static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb);
+static int intel_ntb_atom_init_dev(struct ntb_softc *ntb);
+static void intel_ntb_teardown_xeon(struct ntb_softc *ntb);
 static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
 static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
     enum ntb_bar regbar);
@@ -351,18 +360,16 @@ static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
     enum ntb_bar idx);
 static int xeon_setup_b2b_mw(struct ntb_softc *,
     const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
-static int xeon_setup_msix_bar(struct ntb_softc *);
 static inline bool link_is_up(struct ntb_softc *ntb);
 static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
 static inline bool atom_link_is_err(struct ntb_softc *ntb);
-static inline enum ntb_speed ntb_link_sta_speed(struct ntb_softc *);
-static inline enum ntb_width ntb_link_sta_width(struct ntb_softc *);
+static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *);
+static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *);
 static void atom_link_hb(void *arg);
-static void ntb_db_event(struct ntb_softc *ntb, uint32_t vec);
 static void recover_atom_link(void *arg);
-static bool ntb_poll_link(struct ntb_softc *ntb);
+static bool intel_ntb_poll_link(struct ntb_softc *ntb);
 static void save_bar_parameters(struct ntb_pci_bar_info *bar);
-static void ntb_sysctl_init(struct ntb_softc *);
+static void intel_ntb_sysctl_init(struct ntb_softc *);
 static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
@@ -372,7 +379,7 @@ static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
 static unsigned g_ntb_hw_debug_level;
 SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
     &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
-#define ntb_printf(lvl, ...) do {				\
+#define intel_ntb_printf(lvl, ...) do {				\
 	if ((lvl) <= g_ntb_hw_debug_level) {			\
 		device_printf(ntb->device, __VA_ARGS__);	\
 	}							\
@@ -395,7 +402,7 @@ SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
     "UC-: " __XSTRING(_NTB_PAT_UCM));
 
 static inline vm_memattr_t
-ntb_pat_flags(void)
+intel_ntb_pat_flags(void)
 {
 
 	switch (g_ntb_mw_pat) {
@@ -421,7 +428,7 @@ ntb_pat_flags(void)
  * anywhere better yet.
  */
 static inline const char *
-ntb_vm_memattr_to_str(vm_memattr_t pat)
+intel_ntb_vm_memattr_to_str(vm_memattr_t pat)
 {
 
 	switch (pat) {
@@ -442,7 +449,7 @@ ntb_vm_memattr_to_str(vm_memattr_t pat)
 	}
 }
 
-static int g_ntb_msix_idx = 0;
+static int g_ntb_msix_idx = 1;
 SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
     0, "Use this memory window to access the peer MSIX message complex on "
     "certain Xeon-based NTB systems, as a workaround for a hardware errata.  "
@@ -457,6 +464,18 @@ SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
     "available memory window.  Both sides of the NTB MUST set the same "
     "value here!  (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
 
+/* Hardware owns the low 16 bits of features. */
+#define NTB_BAR_SIZE_4K		(1 << 0)
+#define NTB_SDOORBELL_LOCKUP	(1 << 1)
+#define NTB_SB01BASE_LOCKUP	(1 << 2)
+#define NTB_B2BDOORBELL_BIT14	(1 << 3)
+/* Software/configuration owns the top 16 bits. */
+#define NTB_SPLIT_BAR		(1ull << 16)
+
+#define NTB_FEATURES_STR \
+    "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
+    "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"
+
 static struct ntb_hw_info pci_ids[] = {
 	/* XXX: PS/SS IDs left out until they are supported. */
 	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
@@ -597,35 +616,15 @@ SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
  */
 MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
 
-static device_method_t ntb_pci_methods[] = {
-	/* Device interface */
-	DEVMETHOD(device_probe,     ntb_probe),
-	DEVMETHOD(device_attach,    ntb_attach),
-	DEVMETHOD(device_detach,    ntb_detach),
-	DEVMETHOD_END
-};
-
-static driver_t ntb_pci_driver = {
-	"ntb_hw",
-	ntb_pci_methods,
-	sizeof(struct ntb_softc),
-};
-
-static devclass_t ntb_devclass;
-DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL);
-MODULE_VERSION(ntb_hw, 1);
-
-SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
-
 /*
  * OS <-> Driver linkage functions
  */
 static int
-ntb_probe(device_t device)
+intel_ntb_probe(device_t device)
 {
 	struct ntb_hw_info *p;
 
-	p = ntb_get_device_info(pci_get_devid(device));
+	p = intel_ntb_get_device_info(pci_get_devid(device));
 	if (p == NULL)
 		return (ENXIO);
 
@@ -634,14 +633,14 @@ ntb_probe(device_t device)
 }
 
 static int
-ntb_attach(device_t device)
+intel_ntb_attach(device_t device)
 {
 	struct ntb_softc *ntb;
 	struct ntb_hw_info *p;
 	int error;
 
-	ntb = DEVICE2SOFTC(device);
-	p = ntb_get_device_info(pci_get_devid(device));
+	ntb = device_get_softc(device);
+	p = intel_ntb_get_device_info(pci_get_devid(device));
 
 	ntb->device = device;
 	ntb->type = p->type;
@@ -654,47 +653,52 @@ ntb_attach(device_t device)
 	callout_init(&ntb->lr_timer, 1);
 	callout_init(&ntb->peer_msix_work, 1);
 	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
-	mtx_init(&ntb->ctx_lock, "ntb ctx", NULL, MTX_DEF);
 
 	if (ntb->type == NTB_ATOM)
-		error = ntb_detect_atom(ntb);
+		error = intel_ntb_detect_atom(ntb);
 	else
-		error = ntb_detect_xeon(ntb);
+		error = intel_ntb_detect_xeon(ntb);
 	if (error != 0)
 		goto out;
 
-	ntb_detect_max_mw(ntb);
+	intel_ntb_detect_max_mw(ntb);
 
 	pci_enable_busmaster(ntb->device);
 
-	error = ntb_map_pci_bars(ntb);
+	error = intel_ntb_map_pci_bars(ntb);
 	if (error != 0)
 		goto out;
 	if (ntb->type == NTB_ATOM)
-		error = ntb_atom_init_dev(ntb);
+		error = intel_ntb_atom_init_dev(ntb);
 	else
-		error = ntb_xeon_init_dev(ntb);
+		error = intel_ntb_xeon_init_dev(ntb);
 	if (error != 0)
 		goto out;
 
-	ntb_spad_clear(ntb);
+	intel_ntb_spad_clear(device);
+
+	intel_ntb_poll_link(ntb);
 
-	ntb_poll_link(ntb);
+	intel_ntb_sysctl_init(ntb);
 
-	ntb_sysctl_init(ntb);
+	/* Attach children to this controller */
+	error = ntb_register_device(device);
 
 out:
 	if (error != 0)
-		ntb_detach(device);
+		intel_ntb_detach(device);
 	return (error);
 }
 
 static int
-ntb_detach(device_t device)
+intel_ntb_detach(device_t device)
 {
 	struct ntb_softc *ntb;
 
-	ntb = DEVICE2SOFTC(device);
+	ntb = device_get_softc(device);
+
+	/* Detach & delete all children */
+	ntb_unregister_device(device);
 
 	if (ntb->self_reg != NULL) {
 		DB_MASK_LOCK(ntb);
@@ -706,13 +710,12 @@ ntb_detach(device_t device)
 	callout_drain(&ntb->peer_msix_work);
 	pci_disable_busmaster(ntb->device);
 	if (ntb->type == NTB_XEON)
-		ntb_teardown_xeon(ntb);
-	ntb_teardown_interrupts(ntb);
+		intel_ntb_teardown_xeon(ntb);
+	intel_ntb_teardown_interrupts(ntb);
 
 	mtx_destroy(&ntb->db_mask_lock);
-	mtx_destroy(&ntb->ctx_lock);
 
-	ntb_unmap_pci_bar(ntb);
+	intel_ntb_unmap_pci_bar(ntb);
 
 	return (0);
 }
@@ -721,7 +724,7 @@ ntb_detach(device_t device)
  * Driver internal routines
  */
 static inline enum ntb_bar
-ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
+intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
 {
 
 	KASSERT(mw < ntb->mw_count,
@@ -736,7 +739,7 @@ bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
 {
 	/* XXX This assertion could be stronger. */
 	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
-	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(NTB_SPLIT_BAR));
+	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR));
 }
 
 static inline void
@@ -777,7 +780,7 @@ bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
 }
 
 static int
-ntb_map_pci_bars(struct ntb_softc *ntb)
+intel_ntb_map_pci_bars(struct ntb_softc *ntb)
 {
 	int rc;
 
@@ -802,7 +805,7 @@ ntb_map_pci_bars(struct ntb_softc *ntb)
 	ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
 	ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
 
-	if (!HAS_FEATURE(NTB_SPLIT_BAR))
+	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR))
 		goto out;
 
 	ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
@@ -876,7 +879,7 @@ map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
 	 * but the PCI driver does not honor the size in this call, so we have
 	 * to modify it after the fact.
 	 */
-	if (HAS_FEATURE(NTB_BAR_SIZE_4K)) {
+	if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) {
 		if (bar->pci_resource_id == PCIR_BAR(2))
 			bar_size_bits = pci_read_config(ntb->device,
 			    XEON_PBAR23SZ_OFFSET, 1);
@@ -903,7 +906,7 @@ map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
 	 * Optionally, mark MW BARs as anything other than UC to improve
 	 * performance.
 	 */
-	mapmode = ntb_pat_flags();
+	mapmode = intel_ntb_pat_flags();
 	if (mapmode == bar->map_mode)
 		return (0);
 
@@ -916,7 +919,7 @@ map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
 		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
 		    (char *)bar->vbase + bar->size - 1,
 		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
-		    ntb_vm_memattr_to_str(mapmode));
+		    intel_ntb_vm_memattr_to_str(mapmode));
 	} else
 		device_printf(ntb->device,
 		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
@@ -924,13 +927,13 @@ map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
 		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
 		    (char *)bar->vbase + bar->size - 1,
 		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
-		    ntb_vm_memattr_to_str(mapmode), rc);
+		    intel_ntb_vm_memattr_to_str(mapmode), rc);
 		/* Proceed anyway */
 	return (0);
 }
 
 static void
-ntb_unmap_pci_bar(struct ntb_softc *ntb)
+intel_ntb_unmap_pci_bar(struct ntb_softc *ntb)
 {
 	struct ntb_pci_bar_info *current_bar;
 	int i;
@@ -945,7 +948,7 @@ ntb_unmap_pci_bar(struct ntb_softc *ntb)
 }
 
 static int
-ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
+intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
 {
 	uint32_t i;
 	int rc;
@@ -998,7 +1001,7 @@ SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
  * round-robin fashion.
  */
 static int
-ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
+intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
 {
 	u_int *vectors;
 	uint32_t i;
@@ -1018,7 +1021,7 @@ ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
 }
 
 static int
-ntb_init_isr(struct ntb_softc *ntb)
+intel_ntb_init_isr(struct ntb_softc *ntb)
 {
 	uint32_t desired_vectors, num_vectors;
 	int rc;
@@ -1044,7 +1047,7 @@ ntb_init_isr(struct ntb_softc *ntb)
 			num_vectors--;
 
 		if (rc == 0 && num_vectors < desired_vectors) {
-			rc = ntb_remap_msix(ntb->device, desired_vectors,
+			rc = intel_ntb_remap_msix(ntb->device, desired_vectors,
 			    num_vectors);
 			if (rc == 0)
 				num_vectors = desired_vectors;
@@ -1057,7 +1060,7 @@ ntb_init_isr(struct ntb_softc *ntb)
 		num_vectors = 1;
 
 	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
-		if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
 			device_printf(ntb->device,
 			    "Errata workaround does not support MSI or INTX\n");
 			return (EINVAL);
@@ -1065,32 +1068,30 @@ ntb_init_isr(struct ntb_softc *ntb)
 
 		ntb->db_vec_count = 1;
 		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
-		rc = ntb_setup_legacy_interrupt(ntb);
+		rc = intel_ntb_setup_legacy_interrupt(ntb);
 	} else {
 		if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
-		    HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+		    HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
 			device_printf(ntb->device,
 			    "Errata workaround expects %d doorbell bits\n",
 			    XEON_NONLINK_DB_MSIX_BITS);
 			return (EINVAL);
 		}
 
-		ntb_create_msix_vec(ntb, num_vectors);
-		rc = ntb_setup_msix(ntb, num_vectors);
-		if (rc == 0 && HAS_FEATURE(NTB_SB01BASE_LOCKUP))
-			ntb_get_msix_info(ntb);
+		intel_ntb_create_msix_vec(ntb, num_vectors);
+		rc = intel_ntb_setup_msix(ntb, num_vectors);
 	}
 	if (rc != 0) {
 		device_printf(ntb->device,
 		    "Error allocating interrupts: %d\n", rc);
-		ntb_free_msix_vec(ntb);
+		intel_ntb_free_msix_vec(ntb);
 	}
 
 	return (rc);
 }
 
 static int
-ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
+intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
 {
 	int rc;
 
@@ -1117,7 +1118,7 @@ ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
 }
 
 static void
-ntb_teardown_interrupts(struct ntb_softc *ntb)
+intel_ntb_teardown_interrupts(struct ntb_softc *ntb)
 {
 	struct ntb_int_info *current_int;
 	int i;
@@ -1133,7 +1134,7 @@ ntb_teardown_interrupts(struct ntb_softc *ntb)
 			    rman_get_rid(current_int->res), current_int->res);
 	}
 
-	ntb_free_msix_vec(ntb);
+	intel_ntb_free_msix_vec(ntb);
 	pci_release_msi(ntb->device);
 }
 
@@ -1146,11 +1147,11 @@ db_ioread(struct ntb_softc *ntb, uint64_t regoff)
 {
 
 	if (ntb->type == NTB_ATOM)
-		return (ntb_reg_read(8, regoff));
+		return (intel_ntb_reg_read(8, regoff));
 
 	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
 
-	return (ntb_reg_read(2, regoff));
+	return (intel_ntb_reg_read(2, regoff));
 }
 
 static inline void
@@ -1172,89 +1173,78 @@ db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
 {
 
 	if (ntb->type == NTB_ATOM) {
-		ntb_reg_write(8, regoff, val);
+		intel_ntb_reg_write(8, regoff, val);
 		return;
 	}
 
 	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
-	ntb_reg_write(2, regoff, (uint16_t)val);
+	intel_ntb_reg_write(2, regoff, (uint16_t)val);
 }
 
-void
-ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits)
+static void
+intel_ntb_db_set_mask(device_t dev, uint64_t bits)
 {
-
-	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
-		return;
+	struct ntb_softc *ntb = device_get_softc(dev);
 
 	DB_MASK_LOCK(ntb);
 	ntb->db_mask |= bits;
-	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+	if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
+		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
 	DB_MASK_UNLOCK(ntb);
 }
 
-void
-ntb_db_clear_mask(struct ntb_softc *ntb, uint64_t bits)
+static void
+intel_ntb_db_clear_mask(device_t dev, uint64_t bits)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
+	uint64_t ibits;
+	int i;
 
 	KASSERT((bits & ~ntb->db_valid_mask) == 0,
 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
 	     (uintmax_t)(bits & ~ntb->db_valid_mask),
 	     (uintmax_t)ntb->db_valid_mask));
 
-	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
-		return;
-
 	DB_MASK_LOCK(ntb);
+	ibits = ntb->fake_db_bell & ntb->db_mask & bits;
 	ntb->db_mask &= ~bits;
-	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+		/* Simulate fake interrupts if unmasked DB bits are set. */
+		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+			if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0)
+				swi_sched(ntb->int_info[i].tag, 0);
+		}
+	} else {
+		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+	}
 	DB_MASK_UNLOCK(ntb);
 }
 
-uint64_t
-ntb_db_read(struct ntb_softc *ntb)
+static uint64_t
+intel_ntb_db_read(device_t dev)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 
-	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
-		uint64_t res;
-		unsigned i;
-
-		res = 0;
-		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
-			if (ntb->msix_vec[i].masked != 0)
-				res |= ntb_db_vector_mask(ntb, i);
-		}
-		return (res);
-	}
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
+		return (ntb->fake_db_bell);
 
 	return (db_ioread(ntb, ntb->self_reg->db_bell));
 }
 
-void
-ntb_db_clear(struct ntb_softc *ntb, uint64_t bits)
+static void
+intel_ntb_db_clear(device_t dev, uint64_t bits)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 
 	KASSERT((bits & ~ntb->db_valid_mask) == 0,
 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
 	     (uintmax_t)(bits & ~ntb->db_valid_mask),
 	     (uintmax_t)ntb->db_valid_mask));
 
-	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
-		unsigned i;
-
-		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
-			if ((bits & ntb_db_vector_mask(ntb, i)) != 0) {
-				DB_MASK_LOCK(ntb);
-				if (ntb->msix_vec[i].masked != 0) {
-					/* XXX These need a public API. */
-#if 0
-					pci_unmask_msix(ntb->device, i);
-#endif
-					ntb->msix_vec[i].masked = 0;
-				}
-				DB_MASK_UNLOCK(ntb);
-			}
-		}
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+		DB_MASK_LOCK(ntb);
+		ntb->fake_db_bell &= ~bits;
+		DB_MASK_UNLOCK(ntb);
 		return;
 	}
 
@@ -1262,43 +1252,59 @@ ntb_db_clear(struct ntb_softc *ntb, uint64_t bits)
 }
 
 static inline uint64_t
-ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
+intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
 {
 	uint64_t shift, mask;
 
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+		/*
+		 * Remap vectors in custom way to make at least first
+		 * three doorbells to not generate stray events.
+		 * This breaks Linux compatibility (if one existed)
+		 * when more then one DB is used (not by if_ntb).
+		 */
+		if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1)
+			return (1 << db_vector);
+		if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1)
+			return (0x7ffc);
+	}
+
 	shift = ntb->db_vec_shift;
 	mask = (1ull << shift) - 1;
 	return (mask << (shift * db_vector));
 }
 
 static void
-ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
+intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
 {
 	uint64_t vec_mask;
 
 	ntb->last_ts = ticks;
-	vec_mask = ntb_vec_mask(ntb, vec);
+	vec_mask = intel_ntb_vec_mask(ntb, vec);
 
 	if ((vec_mask & ntb->db_link_mask) != 0) {
-		if (ntb_poll_link(ntb))
-			ntb_link_event(ntb);
+		if (intel_ntb_poll_link(ntb))
+			ntb_link_event(ntb->device);
 	}
 
-	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP) &&
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
 	    (vec_mask & ntb->db_link_mask) == 0) {
 		DB_MASK_LOCK(ntb);
-		if (ntb->msix_vec[vec].masked == 0) {
-			/* XXX These need a public API. */
-#if 0
-			pci_mask_msix(ntb->device, vec);
-#endif
-			ntb->msix_vec[vec].masked = 1;
-		}
+
+		/* Do not report same DB events again if not cleared yet. */
+		vec_mask &= ~ntb->fake_db_bell;
+
+		/* Update our internal doorbell register. */
+		ntb->fake_db_bell |= vec_mask;
+
+		/* Do not report masked DB events. */
+		vec_mask &= ~ntb->db_mask;
+
 		DB_MASK_UNLOCK(ntb);
 	}
 
 	if ((vec_mask & ntb->db_valid_mask) != 0)
-		ntb_db_event(ntb, vec);
+		ntb_db_event(ntb->device, vec);
 }
 
 static void
@@ -1306,18 +1312,18 @@ ndev_vec_isr(void *arg)
 {
 	struct ntb_vec *nvec = arg;
 
-	ntb_interrupt(nvec->ntb, nvec->num);
+	intel_ntb_interrupt(nvec->ntb, nvec->num);
 }
 
 static void
 ndev_irq_isr(void *arg)
 {
 	/* If we couldn't set up MSI-X, we only have the one vector. */
-	ntb_interrupt(arg, 0);
+	intel_ntb_interrupt(arg, 0);
 }
 
 static int
-ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
+intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
 {
 	uint32_t i;
 
@@ -1332,7 +1338,7 @@ ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
 }
 
 static void
-ntb_free_msix_vec(struct ntb_softc *ntb)
+intel_ntb_free_msix_vec(struct ntb_softc *ntb)
 {
 
 	if (ntb->msix_vec == NULL)
@@ -1343,7 +1349,7 @@ ntb_free_msix_vec(struct ntb_softc *ntb)
 }
 
 static void
-ntb_get_msix_info(struct ntb_softc *ntb)
+intel_ntb_get_msix_info(struct ntb_softc *ntb)
 {
 	struct pci_devinfo *dinfo;
 	struct pcicfg_msix *msix;
@@ -1352,8 +1358,6 @@ ntb_get_msix_info(struct ntb_softc *ntb)
 	dinfo = device_get_ivars(ntb->device);
 	msix = &dinfo->cfg.msix;
 
-	laddr = data = 0;
-
 	CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));
 
 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
@@ -1361,7 +1365,7 @@ ntb_get_msix_info(struct ntb_softc *ntb)
 
 		laddr = bus_read_4(msix->msix_table_res, offset +
 		    PCI_MSIX_ENTRY_LOWER_ADDR);
-		ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);
+		intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);
 
 		KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
 		    ("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
@@ -1370,14 +1374,14 @@ ntb_get_msix_info(struct ntb_softc *ntb)
 
 		data = bus_read_4(msix->msix_table_res, offset +
 		    PCI_MSIX_ENTRY_DATA);
-		ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
+		intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
 
 		ntb->msix_data[i].nmd_data = data;
 	}
 }
 
 static struct ntb_hw_info *
-ntb_get_device_info(uint32_t device_id)
+intel_ntb_get_device_info(uint32_t device_id)
 {
 	struct ntb_hw_info *ep = pci_ids;
 
@@ -1390,15 +1394,15 @@ ntb_get_device_info(uint32_t device_id)
 }
 
 static void
-ntb_teardown_xeon(struct ntb_softc *ntb)
+intel_ntb_teardown_xeon(struct ntb_softc *ntb)
 {
 
 	if (ntb->reg != NULL)
-		ntb_link_disable(ntb);
+		intel_ntb_link_disable(ntb->device);
 }
 
 static void
-ntb_detect_max_mw(struct ntb_softc *ntb)
+intel_ntb_detect_max_mw(struct ntb_softc *ntb)
 {
 
 	if (ntb->type == NTB_ATOM) {
@@ -1406,14 +1410,14 @@ ntb_detect_max_mw(struct ntb_softc *ntb)
 		return;
 	}
 
-	if (HAS_FEATURE(NTB_SPLIT_BAR))
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
 		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
 	else
 		ntb->mw_count = XEON_SNB_MW_COUNT;
 }
 
 static int
-ntb_detect_xeon(struct ntb_softc *ntb)
+intel_ntb_detect_xeon(struct ntb_softc *ntb)
 {
 	uint8_t ppd, conn_type;
 
@@ -1428,11 +1432,21 @@ ntb_detect_xeon(struct ntb_softc *ntb)
 	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
 		ntb->features |= NTB_SPLIT_BAR;
 
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
+	    !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
+		device_printf(ntb->device,
+		    "Can not apply SB01BASE_LOCKUP workaround "
+		    "with split BARs disabled!\n");
+		device_printf(ntb->device,
+		    "Expect system hangs under heavy NTB traffic!\n");
+		ntb->features &= ~NTB_SB01BASE_LOCKUP;
+	}
+
 	/*
 	 * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
 	 * errata workaround; only do one at a time.
 	 */
-	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
 		ntb->features &= ~NTB_SDOORBELL_LOCKUP;
 
 	conn_type = ppd & XEON_PPD_CONN_TYPE;
@@ -1451,7 +1465,7 @@ ntb_detect_xeon(struct ntb_softc *ntb)
 }
 
 static int
-ntb_detect_atom(struct ntb_softc *ntb)
+intel_ntb_detect_atom(struct ntb_softc *ntb)
 {
 	uint32_t ppd, conn_type;
 
@@ -1476,7 +1490,7 @@ ntb_detect_atom(struct ntb_softc *ntb)
 }
 
 static int
-ntb_xeon_init_dev(struct ntb_softc *ntb)
+intel_ntb_xeon_init_dev(struct ntb_softc *ntb)
 {
 	int rc;
 
@@ -1497,15 +1511,16 @@ ntb_xeon_init_dev(struct ntb_softc *ntb)
 	ntb->peer_reg = &xeon_b2b_reg;
 	ntb->xlat_reg = &xeon_sec_xlat;
 
-	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+		ntb->fake_db_bell = 0;
 		ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
 		    ntb->mw_count;
-		ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
+		intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
 		    g_ntb_msix_idx, ntb->msix_mw_idx);
-		rc = ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
+		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
 		    VM_MEMATTR_UNCACHEABLE);
 		KASSERT(rc == 0, ("shouldn't fail"));
-	} else if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
+	} else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
 		/*
 		 * There is a Xeon hardware errata related to writes to SDOORBELL or
 		 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
@@ -1515,12 +1530,12 @@ ntb_xeon_init_dev(struct ntb_softc *ntb)
 		 */
 		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
 		    ntb->mw_count;
-		ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
+		intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
 		    g_ntb_mw_idx, ntb->b2b_mw_idx);
-		rc = ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
+		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
 		    VM_MEMATTR_UNCACHEABLE);
 		KASSERT(rc == 0, ("shouldn't fail"));
-	} else if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14))
+	} else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14))
 		/*
 		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
 		 * mirrored to the remote system.  Shrink the number of bits by one,
@@ -1543,7 +1558,7 @@ ntb_xeon_init_dev(struct ntb_softc *ntb)
 		return (rc);
 
 	/* Enable Bus Master and Memory Space on the secondary side */
-	ntb_reg_write(2, XEON_SPCICMD_OFFSET,
+	intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET,
 	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
 
 	/*
@@ -1554,16 +1569,12 @@ ntb_xeon_init_dev(struct ntb_softc *ntb)
 	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
 	DB_MASK_UNLOCK(ntb);
 
-	rc = xeon_setup_msix_bar(ntb);
-	if (rc != 0)
-		return (rc);
-
-	rc = ntb_init_isr(ntb);
+	rc = intel_ntb_init_isr(ntb);
 	return (rc);
 }
 
 static int
-ntb_atom_init_dev(struct ntb_softc *ntb)
+intel_ntb_atom_init_dev(struct ntb_softc *ntb)
 {
 	int error;
 
@@ -1590,15 +1601,15 @@ ntb_atom_init_dev(struct ntb_softc *ntb)
 	configure_atom_secondary_side_bars(ntb);
 
 	/* Enable Bus Master and Memory Space on the secondary side */
-	ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
+	intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
 	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
 
-	error = ntb_init_isr(ntb);
+	error = intel_ntb_init_isr(ntb);
 	if (error != 0)
 		return (error);
 
 	/* Initiate PCI-E link training */
-	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+	intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
 
 	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
 
@@ -1611,19 +1622,19 @@ configure_atom_secondary_side_bars(struct ntb_softc *ntb)
 {
 
 	if (ntb->dev_type == NTB_DEV_USD) {
-		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
+		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
 		    XEON_B2B_BAR2_ADDR64);
-		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
+		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
 		    XEON_B2B_BAR4_ADDR64);
-		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
-		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
+		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
+		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
 	} else {
-		ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
+		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
 		    XEON_B2B_BAR2_ADDR64);
-		ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
+		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
 		    XEON_B2B_BAR4_ADDR64);
-		ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
-		ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
+		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
+		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
 	}
 }
 
@@ -1649,7 +1660,7 @@ xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
 	struct ntb_pci_bar_info *bar;
 	uint8_t bar_sz;
 
-	if (!HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
+	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
 		return;
 
 	bar = &ntb->bar_info[idx];
@@ -1673,28 +1684,28 @@ xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
 	uint32_t base_reg, lmt_reg;
 
 	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
-	if (idx == regbar)
-		bar_addr += ntb->b2b_off;
+	if (idx == regbar) {
+		if (ntb->b2b_off)
+			bar_addr += ntb->b2b_off;
+		else
+			bar_addr = 0;
+	}
 
-	/*
-	 * Set limit registers first to avoid an errata where setting the base
-	 * registers locks the limit registers.
-	 */
 	if (!bar_is_64bit(ntb, idx)) {
-		ntb_reg_write(4, lmt_reg, bar_addr);
-		reg_val = ntb_reg_read(4, lmt_reg);
+		intel_ntb_reg_write(4, base_reg, bar_addr);
+		reg_val = intel_ntb_reg_read(4, base_reg);
 		(void)reg_val;
 
-		ntb_reg_write(4, base_reg, bar_addr);
-		reg_val = ntb_reg_read(4, base_reg);
+		intel_ntb_reg_write(4, lmt_reg, bar_addr);
+		reg_val = intel_ntb_reg_read(4, lmt_reg);
 		(void)reg_val;
 	} else {
-		ntb_reg_write(8, lmt_reg, bar_addr);
-		reg_val = ntb_reg_read(8, lmt_reg);
+		intel_ntb_reg_write(8, base_reg, bar_addr);
+		reg_val = intel_ntb_reg_read(8, base_reg);
 		(void)reg_val;
 
-		ntb_reg_write(8, base_reg, bar_addr);
-		reg_val = ntb_reg_read(8, base_reg);
+		intel_ntb_reg_write(8, lmt_reg, bar_addr);
+		reg_val = intel_ntb_reg_read(8, lmt_reg);
 		(void)reg_val;
 	}
 }
@@ -1705,30 +1716,17 @@ xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
 	struct ntb_pci_bar_info *bar;
 
 	bar = &ntb->bar_info[idx];
-	if (HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
-		ntb_reg_write(4, bar->pbarxlat_off, base_addr);
-		base_addr = ntb_reg_read(4, bar->pbarxlat_off);
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
+		intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr);
+		base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off);
 	} else {
-		ntb_reg_write(8, bar->pbarxlat_off, base_addr);
-		base_addr = ntb_reg_read(8, bar->pbarxlat_off);
+		intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr);
+		base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off);
 	}
 	(void)base_addr;
 }
 
 static int
-xeon_setup_msix_bar(struct ntb_softc *ntb)
-{
-	enum ntb_bar bar_num;
-
-	if (!HAS_FEATURE(NTB_SB01BASE_LOCKUP))
-		return (0);
-
-	bar_num = ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
-	ntb->peer_lapic_bar =  &ntb->bar_info[bar_num];
-	return (0);
-}
-
-static int
 xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
     const struct ntb_b2b_addr *peer_addr)
 {
@@ -1742,7 +1740,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
 		b2b_bar_num = NTB_CONFIG_BAR;
 		ntb->b2b_off = 0;
 	} else {
-		b2b_bar_num = ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
+		b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
 		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
 		    ("invalid b2b mw bar"));
 
@@ -1773,7 +1771,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
 		bar_addr = addr->bar0_addr;
 	else if (b2b_bar_num == NTB_B2B_BAR_1)
 		bar_addr = addr->bar2_addr64;
-	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
+	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
 		bar_addr = addr->bar4_addr64;
 	else if (b2b_bar_num == NTB_B2B_BAR_2)
 		bar_addr = addr->bar4_addr32;
@@ -1782,7 +1780,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
 	else
 		KASSERT(false, ("invalid bar"));
 
-	ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
+	intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
 
 	/*
 	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
@@ -1793,7 +1791,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
 	 */
 	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
 	    b2b_bar_num);
-	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
 		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
 		    NTB_B2B_BAR_2, b2b_bar_num);
 		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
@@ -1803,56 +1801,41 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
 		    NTB_B2B_BAR_2, b2b_bar_num);
 
 	/* Zero incoming translation addrs */
-	ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
-	ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
-
-	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
-		size_t size, xlatoffset;
+	intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
+	intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
 
-		switch (ntb_mw_to_bar(ntb, ntb->msix_mw_idx)) {
-		case NTB_B2B_BAR_1:
-			size = 8;
-			xlatoffset = XEON_SBAR2XLAT_OFFSET;
-			break;
-		case NTB_B2B_BAR_2:
-			xlatoffset = XEON_SBAR4XLAT_OFFSET;
-			if (HAS_FEATURE(NTB_SPLIT_BAR))
-				size = 4;
-			else
-				size = 8;
-			break;
-		case NTB_B2B_BAR_3:
-			xlatoffset = XEON_SBAR5XLAT_OFFSET;
-			size = 4;
-			break;
-		default:
-			KASSERT(false, ("Bogus msix mw idx: %u",
-			    ntb->msix_mw_idx));
-			return (EINVAL);
-		}
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+		uint32_t xlat_reg, lmt_reg;
+		enum ntb_bar bar_num;
 
 		/*
 		 * We point the chosen MSIX MW BAR xlat to remote LAPIC for
 		 * workaround
 		 */
-		if (size == 4) {
-			ntb_reg_write(4, xlatoffset, MSI_INTEL_ADDR_BASE);
-			ntb->msix_xlat = ntb_reg_read(4, xlatoffset);
+		bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
+		bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg);
+		if (bar_is_64bit(ntb, bar_num)) {
+			intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE);
+			ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg);
+			intel_ntb_reg_write(8, lmt_reg, 0);
 		} else {
-			ntb_reg_write(8, xlatoffset, MSI_INTEL_ADDR_BASE);
-			ntb->msix_xlat = ntb_reg_read(8, xlatoffset);
+			intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE);
+			ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg);
+			intel_ntb_reg_write(4, lmt_reg, 0);
 		}
+
+		ntb->peer_lapic_bar =  &ntb->bar_info[bar_num];
 	}
-	(void)ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
-	(void)ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
+	(void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
+	(void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
 
 	/* Zero outgoing translation limits (whole bar size windows) */
-	ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
-	ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
+	intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
+	intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
 
 	/* Set outgoing translation offsets */
 	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
-	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
 		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
 		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
 	} else
@@ -1864,7 +1847,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
 		bar_addr = peer_addr->bar0_addr;
 	else if (b2b_bar_num == NTB_B2B_BAR_1)
 		bar_addr = peer_addr->bar2_addr64;
-	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
+	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
 		bar_addr = peer_addr->bar4_addr64;
 	else if (b2b_bar_num == NTB_B2B_BAR_2)
 		bar_addr = peer_addr->bar4_addr32;
@@ -1877,8 +1860,8 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
 	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
 	 * at a time.
 	 */
-	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
-	ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
+	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
+	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
 	return (0);
 }
 
@@ -1897,7 +1880,7 @@ link_is_up(struct ntb_softc *ntb)
 
 	if (ntb->type == NTB_XEON)
 		return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
-		    !HAS_FEATURE(NTB_SB01BASE_LOCKUP)));
+		    !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)));
 
 	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
 	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
@@ -1910,11 +1893,11 @@ atom_link_is_err(struct ntb_softc *ntb)
 
 	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
 
-	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
+	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
 	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
 		return (true);
 
-	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
+	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
 	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
 }
 
@@ -1937,8 +1920,8 @@ atom_link_hb(void *arg)
 		goto out;
 	}
 
-	if (ntb_poll_link(ntb))
-		ntb_link_event(ntb);
+	if (intel_ntb_poll_link(ntb))
+		ntb_link_event(ntb->device);
 
 	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
 		/* Link is down with error, proceed with recovery */
@@ -1956,166 +1939,47 @@ atom_perform_link_restart(struct ntb_softc *ntb)
 	uint32_t status;
 
 	/* Driver resets the NTB ModPhy lanes - magic! */
-	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
-	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
-	ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
-	ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
+	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
+	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
+	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
+	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
 
 	/* Driver waits 100ms to allow the NTB ModPhy to settle */
 	pause("ModPhy", hz / 10);
 
 	/* Clear AER Errors, write to clear */
-	status = ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
+	status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
 	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
-	ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
+	intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
 
 	/* Clear unexpected electrical idle event in LTSSM, write to clear */
-	status = ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
+	status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
 	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
-	ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
+	intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
 
 	/* Clear DeSkew Buffer error, write to clear */
-	status = ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
+	status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
 	status |= ATOM_DESKEWSTS_DBERR;
-	ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
+	intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
 
-	status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
+	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
 	status &= ATOM_IBIST_ERR_OFLOW;
-	ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
+	intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
 
 	/* Releases the NTB state machine to allow the link to retrain */
-	status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
+	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
 	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
-	ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
-}
-
-/*
- * ntb_set_ctx() - associate a driver context with an ntb device
- * @ntb:        NTB device context
- * @ctx:        Driver context
- * @ctx_ops:    Driver context operations
- *
- * Associate a driver context and operations with a ntb device.  The context is
- * provided by the client driver, and the driver may associate a different
- * context with each ntb device.
- *
- * Return: Zero if the context is associated, otherwise an error number.
- */
-int
-ntb_set_ctx(struct ntb_softc *ntb, void *ctx, const struct ntb_ctx_ops *ops)
-{
-
-	if (ctx == NULL || ops == NULL)
-		return (EINVAL);
-	if (ntb->ctx_ops != NULL)
-		return (EINVAL);
-
-	CTX_LOCK(ntb);
-	if (ntb->ctx_ops != NULL) {
-		CTX_UNLOCK(ntb);
-		return (EINVAL);
-	}
-	ntb->ntb_ctx = ctx;
-	ntb->ctx_ops = ops;
-	CTX_UNLOCK(ntb);
-
-	return (0);
-}
-
-/*
- * It is expected that this will only be used from contexts where the ctx_lock
- * is not needed to protect ntb_ctx lifetime.
- */
-void *
-ntb_get_ctx(struct ntb_softc *ntb, const struct ntb_ctx_ops **ops)
-{
-
-	KASSERT(ntb->ntb_ctx != NULL && ntb->ctx_ops != NULL, ("bogus"));
-	if (ops != NULL)
-		*ops = ntb->ctx_ops;
-	return (ntb->ntb_ctx);
-}
-
-/*
- * ntb_clear_ctx() - disassociate any driver context from an ntb device
- * @ntb:        NTB device context
- *
- * Clear any association that may exist between a driver context and the ntb
- * device.
- */
-void
-ntb_clear_ctx(struct ntb_softc *ntb)
-{
-
-	CTX_LOCK(ntb);
-	ntb->ntb_ctx = NULL;
-	ntb->ctx_ops = NULL;
-	CTX_UNLOCK(ntb);
-}
-
-/*
- * ntb_link_event() - notify driver context of a change in link status
- * @ntb:        NTB device context
- *
- * Notify the driver context that the link status may have changed.  The driver
- * should call ntb_link_is_up() to get the current status.
- */
-void
-ntb_link_event(struct ntb_softc *ntb)
-{
-
-	CTX_LOCK(ntb);
-	if (ntb->ctx_ops != NULL && ntb->ctx_ops->link_event != NULL)
-		ntb->ctx_ops->link_event(ntb->ntb_ctx);
-	CTX_UNLOCK(ntb);
+	intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
 }
 
-/*
- * ntb_db_event() - notify driver context of a doorbell event
- * @ntb:        NTB device context
- * @vector:     Interrupt vector number
- *
- * Notify the driver context of a doorbell event.  If hardware supports
- * multiple interrupt vectors for doorbells, the vector number indicates which
- * vector received the interrupt.  The vector number is relative to the first
- * vector used for doorbells, starting at zero, and must be less than
- * ntb_db_vector_count().  The driver may call ntb_db_read() to check which
- * doorbell bits need service, and ntb_db_vector_mask() to determine which of
- * those bits are associated with the vector number.
- */
-static void
-ntb_db_event(struct ntb_softc *ntb, uint32_t vec)
-{
-
-	CTX_LOCK(ntb);
-	if (ntb->ctx_ops != NULL && ntb->ctx_ops->db_event != NULL)
-		ntb->ctx_ops->db_event(ntb->ntb_ctx, vec);
-	CTX_UNLOCK(ntb);
-}
-
-/*
- * ntb_link_enable() - enable the link on the secondary side of the ntb
- * @ntb:        NTB device context
- * @max_speed:  The maximum link speed expressed as PCIe generation number[0]
- * @max_width:  The maximum link width expressed as the number of PCIe lanes[0]
- *
- * Enable the link on the secondary side of the ntb.  This can only be done
- * from the primary side of the ntb in primary or b2b topology.  The ntb device
- * should train the link to its maximum speed and width, or the requested speed
- * and width, whichever is smaller, if supported.
- *
- * Return: Zero on success, otherwise an error number.
- *
- * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed
- *      and width input will be ignored.
- */
-int
-ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused,
-    enum ntb_width w __unused)
+static int
+intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused,
+    enum ntb_width width __unused)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 	uint32_t cntl;
 
-	ntb_printf(2, "%s\n", __func__);
+	intel_ntb_printf(2, "%s\n", __func__);
 
 	if (ntb->type == NTB_ATOM) {
 		pci_write_config(ntb->device, NTB_PPD_OFFSET,
@@ -2124,57 +1988,47 @@ ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused,
 	}
 
 	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
-		ntb_link_event(ntb);
+		ntb_link_event(dev);
 		return (0);
 	}
 
-	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
+	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
 	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
 	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
 	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
-	if (HAS_FEATURE(NTB_SPLIT_BAR))
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
 		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
-	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
+	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
 	return (0);
 }
 
-/*
- * ntb_link_disable() - disable the link on the secondary side of the ntb
- * @ntb:        NTB device context
- *
- * Disable the link on the secondary side of the ntb.  This can only be done
- * from the primary side of the ntb in primary or b2b topology.  The ntb device
- * should disable the link.  Returning from this call must indicate that a
- * barrier has passed, though with no more writes may pass in either direction
- * across the link, except if this call returns an error number.
- *
- * Return: Zero on success, otherwise an error number.
- */
-int
-ntb_link_disable(struct ntb_softc *ntb)
+static int
+intel_ntb_link_disable(device_t dev)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 	uint32_t cntl;
 
-	ntb_printf(2, "%s\n", __func__);
+	intel_ntb_printf(2, "%s\n", __func__);
 
 	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
-		ntb_link_event(ntb);
+		ntb_link_event(dev);
 		return (0);
 	}
 
-	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
+	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
 	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
 	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
-	if (HAS_FEATURE(NTB_SPLIT_BAR))
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
 		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
 	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
-	ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
+	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
 	return (0);
 }
 
-bool
-ntb_link_enabled(struct ntb_softc *ntb)
+static bool
+intel_ntb_link_enabled(device_t dev)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 	uint32_t cntl;
 
 	if (ntb->type == NTB_ATOM) {
@@ -2185,7 +2039,7 @@ ntb_link_enabled(struct ntb_softc *ntb)
 	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
 		return (true);
 
-	cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
+	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
 	return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
 }
 
@@ -2210,11 +2064,11 @@ recover_atom_link(void *arg)
 	if (atom_link_is_err(ntb))
 		goto retry;
 
-	status32 = ntb_reg_read(4, ntb->reg->ntb_ctl);
+	status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
 	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
 		goto out;
 
-	status32 = ntb_reg_read(4, ntb->reg->lnk_sta);
+	status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
 	width = NTB_LNK_STA_WIDTH(status32);
 	speed = status32 & NTB_LINK_SPEED_MASK;
 
@@ -2237,18 +2091,18 @@ retry:
  * Polls the HW link status register(s); returns true if something has changed.
  */
 static bool
-ntb_poll_link(struct ntb_softc *ntb)
+intel_ntb_poll_link(struct ntb_softc *ntb)
 {
 	uint32_t ntb_cntl;
 	uint16_t reg_val;
 
 	if (ntb->type == NTB_ATOM) {
-		ntb_cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
+		ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
 		if (ntb_cntl == ntb->ntb_ctl)
 			return (false);
 
 		ntb->ntb_ctl = ntb_cntl;
-		ntb->lnk_sta = ntb_reg_read(4, ntb->reg->lnk_sta);
+		ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
 	} else {
 		db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
 
@@ -2258,11 +2112,11 @@ ntb_poll_link(struct ntb_softc *ntb)
 
 		ntb->lnk_sta = reg_val;
 
-		if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
 			if (_xeon_link_is_up(ntb)) {
 				if (!ntb->peer_msix_good) {
 					callout_reset(&ntb->peer_msix_work, 0,
-					    ntb_exchange_msix, ntb);
+					    intel_ntb_exchange_msix, ntb);
 					return (false);
 				}
 			} else {
@@ -2275,7 +2129,7 @@ ntb_poll_link(struct ntb_softc *ntb)
 }
 
 static inline enum ntb_speed
-ntb_link_sta_speed(struct ntb_softc *ntb)
+intel_ntb_link_sta_speed(struct ntb_softc *ntb)
 {
 
 	if (!link_is_up(ntb))
@@ -2284,7 +2138,7 @@ ntb_link_sta_speed(struct ntb_softc *ntb)
 }
 
 static inline enum ntb_width
-ntb_link_sta_width(struct ntb_softc *ntb)
+intel_ntb_link_sta_width(struct ntb_softc *ntb)
 {
 
 	if (!link_is_up(ntb))
@@ -2306,7 +2160,7 @@ SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0,
 #define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
 
 static void
-ntb_sysctl_init(struct ntb_softc *ntb)
+intel_ntb_sysctl_init(struct ntb_softc *ntb)
 {
 	struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
 	struct sysctl_ctx_list *ctx;
@@ -2405,7 +2259,7 @@ ntb_sysctl_init(struct ntb_softc *ntb)
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
 	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
-	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
@@ -2425,7 +2279,7 @@ ntb_sysctl_init(struct ntb_softc *ntb)
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
 	    sysctl_handle_register, "QU", "Incoming LMT23 register");
-	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
@@ -2516,7 +2370,7 @@ ntb_sysctl_init(struct ntb_softc *ntb)
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
 	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
-	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
@@ -2536,7 +2390,7 @@ ntb_sysctl_init(struct ntb_softc *ntb)
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
 	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
-	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
@@ -2560,7 +2414,7 @@ ntb_sysctl_init(struct ntb_softc *ntb)
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
 	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
-	if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
@@ -2583,13 +2437,10 @@ ntb_sysctl_init(struct ntb_softc *ntb)
 static int
 sysctl_handle_features(SYSCTL_HANDLER_ARGS)
 {
-	struct ntb_softc *ntb;
+	struct ntb_softc *ntb = arg1;
 	struct sbuf sb;
 	int error;
 
-	error = 0;
-	ntb = arg1;
-
 	sbuf_new_for_sysctl(&sb, NULL, 256, req);
 
 	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
@@ -2604,14 +2455,11 @@ sysctl_handle_features(SYSCTL_HANDLER_ARGS)
 static int
 sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
 {
-	struct ntb_softc *ntb;
+	struct ntb_softc *ntb = arg1;
 	unsigned old, new;
 	int error;
 
-	error = 0;
-	ntb = arg1;
-
-	old = ntb_link_enabled(ntb);
+	old = intel_ntb_link_enabled(ntb->device);
 
 	error = SYSCTL_OUT(req, &old, sizeof(old));
 	if (error != 0 || req->newptr == NULL)
@@ -2621,31 +2469,28 @@ sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
 	if (error != 0)
 		return (error);
 
-	ntb_printf(0, "Admin set interface state to '%sabled'\n",
+	intel_ntb_printf(0, "Admin set interface state to '%sabled'\n",
 	    (new != 0)? "en" : "dis");
 
 	if (new != 0)
-		error = ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+		error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
 	else
-		error = ntb_link_disable(ntb);
+		error = intel_ntb_link_disable(ntb->device);
 	return (error);
 }
 
 static int
 sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
 {
-	struct ntb_softc *ntb;
+	struct ntb_softc *ntb = arg1;
 	struct sbuf sb;
 	enum ntb_speed speed;
 	enum ntb_width width;
 	int error;
 
-	error = 0;
-	ntb = arg1;
-
 	sbuf_new_for_sysctl(&sb, NULL, 32, req);
 
-	if (ntb_link_is_up(ntb, &speed, &width))
+	if (intel_ntb_link_is_up(ntb->device, &speed, &width))
 		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
 		    (unsigned)speed, (unsigned)width);
 	else
@@ -2662,14 +2507,11 @@ sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
 static int
 sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
 {
-	struct ntb_softc *ntb;
+	struct ntb_softc *ntb = arg1;
 	unsigned res;
 	int error;
 
-	error = 0;
-	ntb = arg1;
-
-	res = ntb_link_is_up(ntb, NULL, NULL);
+	res = intel_ntb_link_is_up(ntb->device, NULL, NULL);
 
 	error = SYSCTL_OUT(req, &res, sizeof(res));
 	if (error || !req->newptr)
@@ -2708,28 +2550,28 @@ sysctl_handle_register(SYSCTL_HANDLER_ARGS)
 			if (pci)
 				umv = pci_read_config(ntb->device, reg, 8);
 			else
-				umv = ntb_reg_read(8, reg);
+				umv = intel_ntb_reg_read(8, reg);
 			outsz = sizeof(uint64_t);
 			break;
 		case NTB_REG_32:
 			if (pci)
 				umv = pci_read_config(ntb->device, reg, 4);
 			else
-				umv = ntb_reg_read(4, reg);
+				umv = intel_ntb_reg_read(4, reg);
 			outsz = sizeof(uint32_t);
 			break;
 		case NTB_REG_16:
 			if (pci)
 				umv = pci_read_config(ntb->device, reg, 2);
 			else
-				umv = ntb_reg_read(2, reg);
+				umv = intel_ntb_reg_read(2, reg);
 			outsz = sizeof(uint16_t);
 			break;
 		case NTB_REG_8:
 			if (pci)
 				umv = pci_read_config(ntb->device, reg, 1);
 			else
-				umv = ntb_reg_read(1, reg);
+				umv = intel_ntb_reg_read(1, reg);
 			outsz = sizeof(uint8_t);
 			break;
 		default:
@@ -2749,7 +2591,7 @@ sysctl_handle_register(SYSCTL_HANDLER_ARGS)
 }
 
 static unsigned
-ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
+intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
 {
 
 	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
@@ -2763,8 +2605,21 @@ ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
 	return (uidx);
 }
 
+#ifndef EARLY_AP_STARTUP
+static int msix_ready;
+
+static void
+intel_ntb_msix_ready(void *arg __unused)
+{
+
+	msix_ready = 1;
+}
+SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY,
+    intel_ntb_msix_ready, NULL);
+#endif
+
 static void
-ntb_exchange_msix(void *ctx)
+intel_ntb_exchange_msix(void *ctx)
 {
 	struct ntb_softc *ntb;
 	uint32_t val;
@@ -2777,42 +2632,50 @@ ntb_exchange_msix(void *ctx)
 	if (ntb->peer_msix_done)
 		goto msix_done;
 
+#ifndef EARLY_AP_STARTUP
+	/* Block MSIX negotiation until SMP started and IRQ reshuffled. */
+	if (!msix_ready)
+		goto reschedule;
+#endif
+
+	intel_ntb_get_msix_info(ntb);
 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
-		ntb_peer_spad_write(ntb, NTB_MSIX_DATA0 + i,
+		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i,
 		    ntb->msix_data[i].nmd_data);
-		ntb_peer_spad_write(ntb, NTB_MSIX_OFS0 + i,
+		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i,
 		    ntb->msix_data[i].nmd_ofs - ntb->msix_xlat);
 	}
-	ntb_peer_spad_write(ntb, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
+	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
 
-	ntb_spad_read(ntb, NTB_MSIX_GUARD, &val);
+	intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val);
 	if (val != NTB_MSIX_VER_GUARD)
 		goto reschedule;
 
 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
-		ntb_spad_read(ntb, NTB_MSIX_DATA0 + i, &val);
-		ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
+		intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val);
+		intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
 		ntb->peer_msix_data[i].nmd_data = val;
-		ntb_spad_read(ntb, NTB_MSIX_OFS0 + i, &val);
-		ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
+		intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val);
+		intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
 		ntb->peer_msix_data[i].nmd_ofs = val;
 	}
 
 	ntb->peer_msix_done = true;
 
 msix_done:
-	ntb_peer_spad_write(ntb, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
-	ntb_spad_read(ntb, NTB_MSIX_DONE, &val);
+	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
+	intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val);
 	if (val != NTB_MSIX_RECEIVED)
 		goto reschedule;
 
+	intel_ntb_spad_clear(ntb->device);
 	ntb->peer_msix_good = true;
 	/* Give peer time to see our NTB_MSIX_RECEIVED. */
 	goto reschedule;
 
 msix_good:
-	ntb_poll_link(ntb);
-	ntb_link_event(ntb);
+	intel_ntb_poll_link(ntb);
+	ntb_link_event(ntb->device);
 	return;
 
 reschedule:
@@ -2820,40 +2683,27 @@ reschedule:
 	if (_xeon_link_is_up(ntb)) {
 		callout_reset(&ntb->peer_msix_work,
 		    hz * (ntb->peer_msix_good ? 2 : 1) / 100,
-		    ntb_exchange_msix, ntb);
+		    intel_ntb_exchange_msix, ntb);
 	} else
-		ntb_spad_clear(ntb);
+		intel_ntb_spad_clear(ntb->device);
 }
 
 /*
  * Public API to the rest of the OS
  */
 
-/**
- * ntb_get_max_spads() - get the total scratch regs usable
- * @ntb: pointer to ntb_softc instance
- *
- * This function returns the max 32bit scratchpad registers usable by the
- * upper layer.
- *
- * RETURNS: total number of scratch pad registers available
- */
-uint8_t
-ntb_get_max_spads(struct ntb_softc *ntb)
+static uint8_t
+intel_ntb_spad_count(device_t dev)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 
 	return (ntb->spad_count);
 }
 
-/*
- * ntb_mw_count() - Get the number of memory windows available for KPI
- * consumers.
- *
- * (Excludes any MW wholly reserved for register access.)
- */
-uint8_t
-ntb_mw_count(struct ntb_softc *ntb)
+static uint8_t
+intel_ntb_mw_count(device_t dev)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 	uint8_t res;
 
 	res = ntb->mw_count;
@@ -2864,25 +2714,15 @@ ntb_mw_count(struct ntb_softc *ntb)
 	return (res);
 }
 
-/**
- * ntb_spad_write() - write to the secondary scratchpad register
- * @ntb: pointer to ntb_softc instance
- * @idx: index to the scratchpad register, 0 based
- * @val: the data value to put into the register
- *
- * This function allows writing of a 32bit value to the indexed scratchpad
- * register. The register resides on the secondary (external) side.
- *
- * RETURNS: An appropriate ERRNO error value on error, or zero for success.
- */
-int
-ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
+static int
+intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 
 	if (idx >= ntb->spad_count)
 		return (EINVAL);
 
-	ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
+	intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
 
 	return (0);
 }
@@ -2890,122 +2730,77 @@ ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
 /*
  * Zeros the local scratchpad.
  */
-void
-ntb_spad_clear(struct ntb_softc *ntb)
+static void
+intel_ntb_spad_clear(device_t dev)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 	unsigned i;
 
 	for (i = 0; i < ntb->spad_count; i++)
-		ntb_spad_write(ntb, i, 0);
+		intel_ntb_spad_write(dev, i, 0);
 }
 
-/**
- * ntb_spad_read() - read from the primary scratchpad register
- * @ntb: pointer to ntb_softc instance
- * @idx: index to scratchpad register, 0 based
- * @val: pointer to 32bit integer for storing the register value
- *
- * This function allows reading of the 32bit scratchpad register on
- * the primary (internal) side.
- *
- * RETURNS: An appropriate ERRNO error value on error, or zero for success.
- */
-int
-ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
+static int
+intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 
 	if (idx >= ntb->spad_count)
 		return (EINVAL);
 
-	*val = ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
+	*val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
 
 	return (0);
 }
 
-/**
- * ntb_peer_spad_write() - write to the secondary scratchpad register
- * @ntb: pointer to ntb_softc instance
- * @idx: index to the scratchpad register, 0 based
- * @val: the data value to put into the register
- *
- * This function allows writing of a 32bit value to the indexed scratchpad
- * register. The register resides on the secondary (external) side.
- *
- * RETURNS: An appropriate ERRNO error value on error, or zero for success.
- */
-int
-ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
+static int
+intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 
 	if (idx >= ntb->spad_count)
 		return (EINVAL);
 
-	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
-		ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
+	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
+		intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
 	else
-		ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
+		intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
 
 	return (0);
 }
 
-/**
- * ntb_peer_spad_read() - read from the primary scratchpad register
- * @ntb: pointer to ntb_softc instance
- * @idx: index to scratchpad register, 0 based
- * @val: pointer to 32bit integer for storing the register value
- *
- * This function allows reading of the 32bit scratchpad register on
- * the primary (internal) side.
- *
- * RETURNS: An appropriate ERRNO error value on error, or zero for success.
- */
-int
-ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
+static int
+intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 
 	if (idx >= ntb->spad_count)
 		return (EINVAL);
 
-	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
-		*val = ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
+	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
+		*val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
 	else
-		*val = ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
+		*val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
 
 	return (0);
 }
 
-/*
- * ntb_mw_get_range() - get the range of a memory window
- * @ntb:        NTB device context
- * @idx:        Memory window number
- * @base:       OUT - the base address for mapping the memory window
- * @size:       OUT - the size for mapping the memory window
- * @align:      OUT - the base alignment for translating the memory window
- * @align_size: OUT - the size alignment for translating the memory window
- *
- * Get the range of a memory window.  NULL may be given for any output
- * parameter if the value is not needed.  The base and size may be used for
- * mapping the memory window, to access the peer memory.  The alignment and
- * size may be used for translating the memory window, for the peer to access
- * memory on the local system.
- *
- * Return: Zero on success, otherwise an error number.
- */
-int
-ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base,
+static int
+intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base,
     caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
     bus_addr_t *plimit)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 	struct ntb_pci_bar_info *bar;
 	bus_addr_t limit;
 	size_t bar_b2b_off;
 	enum ntb_bar bar_num;
 
-	if (mw_idx >= ntb_mw_count(ntb))
+	if (mw_idx >= intel_ntb_mw_count(dev))
 		return (EINVAL);
-	mw_idx = ntb_user_mw_to_idx(ntb, mw_idx);
+	mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx);
 
-	bar_num = ntb_mw_to_bar(ntb, mw_idx);
+	bar_num = intel_ntb_mw_to_bar(ntb, mw_idx);
 	bar = &ntb->bar_info[bar_num];
 	bar_b2b_off = 0;
 	if (mw_idx == ntb->b2b_mw_idx) {
@@ -3034,37 +2829,21 @@ ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base,
 	return (0);
 }
 
-/*
- * ntb_mw_set_trans() - set the translation of a memory window
- * @ntb:        NTB device context
- * @idx:        Memory window number
- * @addr:       The dma address local memory to expose to the peer
- * @size:       The size of the local memory to expose to the peer
- *
- * Set the translation of a memory window.  The peer may access local memory
- * through the window starting at the address, up to the size.  The address
- * must be aligned to the alignment specified by ntb_mw_get_range().  The size
- * must be aligned to the size alignment specified by ntb_mw_get_range().  The
- * address must be below the plimit specified by ntb_mw_get_range() (i.e. for
- * 32-bit BARs).
- *
- * Return: Zero on success, otherwise an error number.
- */
-int
-ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
-    size_t size)
+static int
+intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 	struct ntb_pci_bar_info *bar;
 	uint64_t base, limit, reg_val;
 	size_t bar_size, mw_size;
 	uint32_t base_reg, xlat_reg, limit_reg;
 	enum ntb_bar bar_num;
 
-	if (idx >= ntb_mw_count(ntb))
+	if (idx >= intel_ntb_mw_count(dev))
 		return (EINVAL);
-	idx = ntb_user_mw_to_idx(ntb, idx);
+	idx = intel_ntb_user_mw_to_idx(ntb, idx);
 
-	bar_num = ntb_mw_to_bar(ntb, idx);
+	bar_num = intel_ntb_mw_to_bar(ntb, idx);
 	bar = &ntb->bar_info[bar_num];
 
 	bar_size = bar->size;
@@ -3084,25 +2863,25 @@ ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
 
 	limit = 0;
 	if (bar_is_64bit(ntb, bar_num)) {
-		base = ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
+		base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
 
 		if (limit_reg != 0 && size != mw_size)
 			limit = base + size;
 
 		/* Set and verify translation address */
-		ntb_reg_write(8, xlat_reg, addr);
-		reg_val = ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
+		intel_ntb_reg_write(8, xlat_reg, addr);
+		reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
 		if (reg_val != addr) {
-			ntb_reg_write(8, xlat_reg, 0);
+			intel_ntb_reg_write(8, xlat_reg, 0);
 			return (EIO);
 		}
 
 		/* Set and verify the limit */
-		ntb_reg_write(8, limit_reg, limit);
-		reg_val = ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
+		intel_ntb_reg_write(8, limit_reg, limit);
+		reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
 		if (reg_val != limit) {
-			ntb_reg_write(8, limit_reg, base);
-			ntb_reg_write(8, xlat_reg, 0);
+			intel_ntb_reg_write(8, limit_reg, base);
+			intel_ntb_reg_write(8, xlat_reg, 0);
 			return (EIO);
 		}
 	} else {
@@ -3113,98 +2892,72 @@ ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
 		if (((addr + size) & UINT32_MAX) != (addr + size))
 			return (ERANGE);
 
-		base = ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
+		base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
 
 		if (limit_reg != 0 && size != mw_size)
 			limit = base + size;
 
 		/* Set and verify translation address */
-		ntb_reg_write(4, xlat_reg, addr);
-		reg_val = ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
+		intel_ntb_reg_write(4, xlat_reg, addr);
+		reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
 		if (reg_val != addr) {
-			ntb_reg_write(4, xlat_reg, 0);
+			intel_ntb_reg_write(4, xlat_reg, 0);
 			return (EIO);
 		}
 
 		/* Set and verify the limit */
-		ntb_reg_write(4, limit_reg, limit);
-		reg_val = ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
+		intel_ntb_reg_write(4, limit_reg, limit);
+		reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
 		if (reg_val != limit) {
-			ntb_reg_write(4, limit_reg, base);
-			ntb_reg_write(4, xlat_reg, 0);
+			intel_ntb_reg_write(4, limit_reg, base);
+			intel_ntb_reg_write(4, xlat_reg, 0);
 			return (EIO);
 		}
 	}
 	return (0);
 }
 
-/*
- * ntb_mw_clear_trans() - clear the translation of a memory window
- * @ntb:	NTB device context
- * @idx:	Memory window number
- *
- * Clear the translation of a memory window.  The peer may no longer access
- * local memory through the window.
- *
- * Return: Zero on success, otherwise an error number.
- */
-int
-ntb_mw_clear_trans(struct ntb_softc *ntb, unsigned mw_idx)
+static int
+intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx)
 {
 
-	return (ntb_mw_set_trans(ntb, mw_idx, 0, 0));
+	return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0));
 }
 
-/*
- * ntb_mw_get_wc - Get the write-combine status of a memory window
- *
- * Returns:  Zero on success, setting *wc; otherwise an error number (e.g. if
- * idx is an invalid memory window).
- *
- * Mode is a VM_MEMATTR_* type.
- */
-int
-ntb_mw_get_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t *mode)
+static int
+intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 	struct ntb_pci_bar_info *bar;
 
-	if (idx >= ntb_mw_count(ntb))
+	if (idx >= intel_ntb_mw_count(dev))
 		return (EINVAL);
-	idx = ntb_user_mw_to_idx(ntb, idx);
+	idx = intel_ntb_user_mw_to_idx(ntb, idx);
 
-	bar = &ntb->bar_info[ntb_mw_to_bar(ntb, idx)];
+	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
 	*mode = bar->map_mode;
 	return (0);
 }
 
-/*
- * ntb_mw_set_wc - Set the write-combine status of a memory window
- *
- * If 'mode' matches the current status, this does nothing and succeeds.  Mode
- * is a VM_MEMATTR_* type.
- *
- * Returns:  Zero on success, setting the caching attribute on the virtual
- * mapping of the BAR; otherwise an error number (e.g. if idx is an invalid
- * memory window, or if changing the caching attribute fails).
- */
-int
-ntb_mw_set_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
+static int
+intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 
-	if (idx >= ntb_mw_count(ntb))
+	if (idx >= intel_ntb_mw_count(dev))
 		return (EINVAL);
 
-	idx = ntb_user_mw_to_idx(ntb, idx);
-	return (ntb_mw_set_wc_internal(ntb, idx, mode));
+	idx = intel_ntb_user_mw_to_idx(ntb, idx);
+	return (intel_ntb_mw_set_wc_internal(ntb, idx, mode));
 }
 
 static int
-ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
+intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
 {
 	struct ntb_pci_bar_info *bar;
 	int rc;
 
-	bar = &ntb->bar_info[ntb_mw_to_bar(ntb, idx)];
+	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
 	if (bar->map_mode == mode)
 		return (0);
 
@@ -3215,26 +2968,19 @@ ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
 	return (rc);
 }
 
-/**
- * ntb_peer_db_set() - Set the doorbell on the secondary/external side
- * @ntb: pointer to ntb_softc instance
- * @bit: doorbell bits to ring
- *
- * This function allows triggering of a doorbell on the secondary/external
- * side that will initiate an interrupt on the remote host
- */
-void
-ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
+static void
+intel_ntb_peer_db_set(device_t dev, uint64_t bit)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 
-	if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
 		struct ntb_pci_bar_info *lapic;
 		unsigned i;
 
 		lapic = ntb->peer_lapic_bar;
 
 		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
-			if ((bit & ntb_db_vector_mask(ntb, i)) != 0)
+			if ((bit & intel_ntb_db_vector_mask(dev, i)) != 0)
 				bus_space_write_4(lapic->pci_bus_tag,
 				    lapic->pci_bus_handle,
 				    ntb->peer_msix_data[i].nmd_ofs,
@@ -3243,99 +2989,76 @@ ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
 		return;
 	}
 
-	if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
-		ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
+	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
+		intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
 		return;
 	}
 
 	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
 }
 
-/*
- * ntb_get_peer_db_addr() - Return the address of the remote doorbell register,
- * as well as the size of the register (via *sz_out).
- *
- * This function allows a caller using I/OAT DMA to chain the remote doorbell
- * ring to its memory window write.
- *
- * Note that writing the peer doorbell via a memory window will *not* generate
- * an interrupt on the remote host; that must be done separately.
- */
-bus_addr_t
-ntb_get_peer_db_addr(struct ntb_softc *ntb, vm_size_t *sz_out)
+static int
+intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 	struct ntb_pci_bar_info *bar;
 	uint64_t regoff;
 
-	KASSERT(sz_out != NULL, ("must be non-NULL"));
+	KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL"));
 
-	if (!HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
+	if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
 		bar = &ntb->bar_info[NTB_CONFIG_BAR];
 		regoff = ntb->peer_reg->db_bell;
 	} else {
 		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
 		    ("invalid b2b idx"));
 
-		bar = &ntb->bar_info[ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
+		bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
 		regoff = XEON_PDOORBELL_OFFSET;
 	}
 	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
 
-	*sz_out = ntb->reg->db_size;
 	/* HACK: Specific to current x86 bus implementation. */
-	return ((uint64_t)bar->pci_bus_handle + regoff);
+	*db_addr = ((uint64_t)bar->pci_bus_handle + regoff);
+	*db_size = ntb->reg->db_size;
+	return (0);
 }
 
-/*
- * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
- * @ntb:	NTB device context
- *
- * Hardware may support different number or arrangement of doorbell bits.
- *
- * Return: A mask of doorbell bits supported by the ntb.
- */
-uint64_t
-ntb_db_valid_mask(struct ntb_softc *ntb)
+static uint64_t
+intel_ntb_db_valid_mask(device_t dev)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 
 	return (ntb->db_valid_mask);
 }
 
-/*
- * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector
- * @ntb:	NTB device context
- * @vector:	Doorbell vector number
- *
- * Each interrupt vector may have a different number or arrangement of bits.
- *
- * Return: A mask of doorbell bits serviced by a vector.
- */
-uint64_t
-ntb_db_vector_mask(struct ntb_softc *ntb, uint32_t vector)
+static int
+intel_ntb_db_vector_count(device_t dev)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
+
+	return (ntb->db_vec_count);
+}
+
+static uint64_t
+intel_ntb_db_vector_mask(device_t dev, uint32_t vector)
+{
+	struct ntb_softc *ntb = device_get_softc(dev);
 
 	if (vector > ntb->db_vec_count)
 		return (0);
-	return (ntb->db_valid_mask & ntb_vec_mask(ntb, vector));
+	return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector));
 }
 
-/**
- * ntb_link_is_up() - get the current ntb link state
- * @ntb:        NTB device context
- * @speed:      OUT - The link speed expressed as PCIe generation number
- * @width:      OUT - The link width expressed as the number of PCIe lanes
- *
- * RETURNS: true or false based on the hardware link state
- */
-bool
-ntb_link_is_up(struct ntb_softc *ntb, enum ntb_speed *speed,
-    enum ntb_width *width)
+static bool
+intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width)
 {
+	struct ntb_softc *ntb = device_get_softc(dev);
 
 	if (speed != NULL)
-		*speed = ntb_link_sta_speed(ntb);
+		*speed = intel_ntb_link_sta_speed(ntb);
 	if (width != NULL)
-		*width = ntb_link_sta_width(ntb);
+		*width = intel_ntb_link_sta_width(ntb);
 	return (link_is_up(ntb));
 }
 
@@ -3350,17 +3073,42 @@ save_bar_parameters(struct ntb_pci_bar_info *bar)
 	bar->vbase = rman_get_virtual(bar->pci_resource);
 }
 
-device_t
-ntb_get_device(struct ntb_softc *ntb)
-{
-
-	return (ntb->device);
-}
-
-/* Export HW-specific errata information. */
-bool
-ntb_has_feature(struct ntb_softc *ntb, uint32_t feature)
-{
+static device_method_t ntb_intel_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		intel_ntb_probe),
+	DEVMETHOD(device_attach,	intel_ntb_attach),
+	DEVMETHOD(device_detach,	intel_ntb_detach),
+	/* NTB interface */
+	DEVMETHOD(ntb_link_is_up,	intel_ntb_link_is_up),
+	DEVMETHOD(ntb_link_enable,	intel_ntb_link_enable),
+	DEVMETHOD(ntb_link_disable,	intel_ntb_link_disable),
+	DEVMETHOD(ntb_link_enabled,	intel_ntb_link_enabled),
+	DEVMETHOD(ntb_mw_count,		intel_ntb_mw_count),
+	DEVMETHOD(ntb_mw_get_range,	intel_ntb_mw_get_range),
+	DEVMETHOD(ntb_mw_set_trans,	intel_ntb_mw_set_trans),
+	DEVMETHOD(ntb_mw_clear_trans,	intel_ntb_mw_clear_trans),
+	DEVMETHOD(ntb_mw_get_wc,	intel_ntb_mw_get_wc),
+	DEVMETHOD(ntb_mw_set_wc,	intel_ntb_mw_set_wc),
+	DEVMETHOD(ntb_spad_count,	intel_ntb_spad_count),
+	DEVMETHOD(ntb_spad_clear,	intel_ntb_spad_clear),
+	DEVMETHOD(ntb_spad_write,	intel_ntb_spad_write),
+	DEVMETHOD(ntb_spad_read,	intel_ntb_spad_read),
+	DEVMETHOD(ntb_peer_spad_write,	intel_ntb_peer_spad_write),
+	DEVMETHOD(ntb_peer_spad_read,	intel_ntb_peer_spad_read),
+	DEVMETHOD(ntb_db_valid_mask,	intel_ntb_db_valid_mask),
+	DEVMETHOD(ntb_db_vector_count,	intel_ntb_db_vector_count),
+	DEVMETHOD(ntb_db_vector_mask,	intel_ntb_db_vector_mask),
+	DEVMETHOD(ntb_db_clear,		intel_ntb_db_clear),
+	DEVMETHOD(ntb_db_clear_mask,	intel_ntb_db_clear_mask),
+	DEVMETHOD(ntb_db_read,		intel_ntb_db_read),
+	DEVMETHOD(ntb_db_set_mask,	intel_ntb_db_set_mask),
+	DEVMETHOD(ntb_peer_db_addr,	intel_ntb_peer_db_addr),
+	DEVMETHOD(ntb_peer_db_set,	intel_ntb_peer_db_set),
+	DEVMETHOD_END
+};
 
-	return (HAS_FEATURE(feature));
-}
+static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods,
+    sizeof(struct ntb_softc));
+DRIVER_MODULE(ntb_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL);
+MODULE_DEPEND(ntb_intel, ntb, 1, 1, 1);
+MODULE_VERSION(ntb_intel, 1);
diff --git a/sys/dev/ntb/ntb_hw/ntb_hw.h b/sys/dev/ntb/ntb_hw/ntb_hw.h
deleted file mode 100644
index f05acda..0000000
--- a/sys/dev/ntb/ntb_hw/ntb_hw.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*-
- * Copyright (C) 2013 Intel Corporation
- * Copyright (C) 2015 EMC Corporation
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _NTB_HW_H_
-#define _NTB_HW_H_
-
-struct ntb_softc;
-
-#define NTB_MAX_NUM_MW	3
-
-enum ntb_speed {
-	NTB_SPEED_AUTO = -1,
-	NTB_SPEED_NONE = 0,
-	NTB_SPEED_GEN1 = 1,
-	NTB_SPEED_GEN2 = 2,
-	NTB_SPEED_GEN3 = 3,
-};
-
-enum ntb_width {
-	NTB_WIDTH_AUTO = -1,
-	NTB_WIDTH_NONE = 0,
-	NTB_WIDTH_1 = 1,
-	NTB_WIDTH_2 = 2,
-	NTB_WIDTH_4 = 4,
-	NTB_WIDTH_8 = 8,
-	NTB_WIDTH_12 = 12,
-	NTB_WIDTH_16 = 16,
-	NTB_WIDTH_32 = 32,
-};
-
-SYSCTL_DECL(_hw_ntb);
-
-typedef void (*ntb_db_callback)(void *data, uint32_t vector);
-typedef void (*ntb_event_callback)(void *data);
-
-struct ntb_ctx_ops {
-	ntb_event_callback	link_event;
-	ntb_db_callback		db_event;
-};
-
-device_t ntb_get_device(struct ntb_softc *);
-
-bool ntb_link_is_up(struct ntb_softc *, enum ntb_speed *, enum ntb_width *);
-void ntb_link_event(struct ntb_softc *);
-int ntb_link_enable(struct ntb_softc *, enum ntb_speed, enum ntb_width);
-int ntb_link_disable(struct ntb_softc *);
-bool ntb_link_enabled(struct ntb_softc *);
-
-int ntb_set_ctx(struct ntb_softc *, void *, const struct ntb_ctx_ops *);
-void *ntb_get_ctx(struct ntb_softc *, const struct ntb_ctx_ops **);
-void ntb_clear_ctx(struct ntb_softc *);
-
-uint8_t ntb_mw_count(struct ntb_softc *);
-int ntb_mw_get_range(struct ntb_softc *, unsigned mw_idx, vm_paddr_t *base,
-    caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
-    bus_addr_t *plimit);
-int ntb_mw_set_trans(struct ntb_softc *, unsigned mw_idx, bus_addr_t, size_t);
-int ntb_mw_clear_trans(struct ntb_softc *, unsigned mw_idx);
-
-int ntb_mw_get_wc(struct ntb_softc *, unsigned mw_idx, vm_memattr_t *mode);
-int ntb_mw_set_wc(struct ntb_softc *, unsigned mw_idx, vm_memattr_t mode);
-
-uint8_t ntb_get_max_spads(struct ntb_softc *ntb);
-void ntb_spad_clear(struct ntb_softc *ntb);
-int ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val);
-int ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val);
-int ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx,
-    uint32_t val);
-int ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx,
-    uint32_t *val);
-
-uint64_t ntb_db_valid_mask(struct ntb_softc *);
-uint64_t ntb_db_vector_mask(struct ntb_softc *, uint32_t vector);
-bus_addr_t ntb_get_peer_db_addr(struct ntb_softc *, vm_size_t *sz_out);
-
-void ntb_db_clear(struct ntb_softc *, uint64_t bits);
-void ntb_db_clear_mask(struct ntb_softc *, uint64_t bits);
-uint64_t ntb_db_read(struct ntb_softc *);
-void ntb_db_set_mask(struct ntb_softc *, uint64_t bits);
-void ntb_peer_db_set(struct ntb_softc *, uint64_t bits);
-
-#define XEON_SPAD_COUNT		16
-#define ATOM_SPAD_COUNT		16
-
-/* Hardware owns the low 16 bits of features. */
-#define NTB_BAR_SIZE_4K		(1 << 0)
-#define NTB_SDOORBELL_LOCKUP	(1 << 1)
-#define NTB_SB01BASE_LOCKUP	(1 << 2)
-#define NTB_B2BDOORBELL_BIT14	(1 << 3)
-/* Software/configuration owns the top 16 bits. */
-#define NTB_SPLIT_BAR		(1ull << 16)
-
-#define NTB_FEATURES_STR \
-    "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
-    "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"
-
-bool ntb_has_feature(struct ntb_softc *, uint32_t);
-
-#endif /* _NTB_HW_H_ */
diff --git a/sys/dev/ntb/ntb_hw/ntb_regs.h b/sys/dev/ntb/ntb_hw/ntb_regs.h
index fb445d7..a037736 100644
--- a/sys/dev/ntb/ntb_hw/ntb_regs.h
+++ b/sys/dev/ntb/ntb_hw/ntb_regs.h
@@ -1,4 +1,5 @@
 /*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
  * Copyright (C) 2013 Intel Corporation
  * Copyright (C) 2015 EMC Corporation
  * All rights reserved.
@@ -76,6 +77,7 @@
 #define XEON_SDBMSK_OFFSET	0x0066
 #define XEON_USMEMMISS_OFFSET	0x0070
 #define XEON_SPAD_OFFSET	0x0080
+#define XEON_SPAD_COUNT		16
 #define XEON_SPADSEMA4_OFFSET	0x00c0
 #define XEON_WCCNTRL_OFFSET	0x00e0
 #define XEON_UNCERRSTS_OFFSET	0x014c
@@ -104,6 +106,7 @@
 #define ATOM_NTBCNTL_OFFSET	0x0060
 #define ATOM_EBDF_OFFSET		0x0064
 #define ATOM_SPAD_OFFSET		0x0080
+#define ATOM_SPAD_COUNT		16
 #define ATOM_SPADSEMA_OFFSET	0x00c0
 #define ATOM_STKYSPAD_OFFSET	0x00c4
 #define ATOM_PBAR2XLAT_OFFSET	0x8008
diff --git a/sys/dev/ntb/ntb_if.m b/sys/dev/ntb/ntb_if.m
new file mode 100644
index 0000000..d8ca227
--- /dev/null
+++ b/sys/dev/ntb/ntb_if.m
@@ -0,0 +1,210 @@
+#-
+# Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD$
+#
+
+#include <sys/bus.h>
+#include <machine/bus.h>
+
+INTERFACE ntb;
+
+HEADER {
+	enum ntb_speed {
+		NTB_SPEED_AUTO = -1,
+		NTB_SPEED_NONE = 0,
+		NTB_SPEED_GEN1 = 1,
+		NTB_SPEED_GEN2 = 2,
+		NTB_SPEED_GEN3 = 3,
+	};
+
+	enum ntb_width {
+		NTB_WIDTH_AUTO = -1,
+		NTB_WIDTH_NONE = 0,
+		NTB_WIDTH_1 = 1,
+		NTB_WIDTH_2 = 2,
+		NTB_WIDTH_4 = 4,
+		NTB_WIDTH_8 = 8,
+		NTB_WIDTH_12 = 12,
+		NTB_WIDTH_16 = 16,
+		NTB_WIDTH_32 = 32,
+	};
+
+	typedef void (*ntb_db_callback)(void *data, uint32_t vector);
+	typedef void (*ntb_event_callback)(void *data);
+	struct ntb_ctx_ops {
+		ntb_event_callback	link_event;
+		ntb_db_callback		db_event;
+	};
+};
+
+METHOD bool link_is_up {
+	device_t	 ntb;
+	enum ntb_speed	*speed;
+	enum ntb_width	*width;
+};
+
+METHOD int link_enable {
+	device_t	 ntb;
+	enum ntb_speed	 speed;
+	enum ntb_width	 width;
+};
+
+METHOD int link_disable {
+	device_t	 ntb;
+};
+
+METHOD bool link_enabled {
+	device_t	 ntb;
+};
+
+METHOD int set_ctx {
+	device_t	 ntb;
+	void		*ctx;
+	const struct ntb_ctx_ops *ctx_ops;
+};
+
+METHOD void * get_ctx {
+	device_t	 ntb;
+	const struct ntb_ctx_ops **ctx_ops;
+};
+
+METHOD void clear_ctx {
+	device_t	 ntb;
+};
+
+METHOD uint8_t mw_count {
+	device_t	 ntb;
+};
+
+METHOD int mw_get_range {
+	device_t	 ntb;
+	unsigned	 mw_idx;
+	vm_paddr_t	*base;
+	caddr_t		*vbase;
+	size_t		*size;
+	size_t		*align;
+	size_t		*align_size;
+	bus_addr_t	*plimit;
+};
+
+METHOD int mw_set_trans {
+	device_t	 ntb;
+	unsigned	 mw_idx;
+	bus_addr_t	 addr;
+	size_t		 size;
+};
+
+METHOD int mw_clear_trans {
+	device_t	 ntb;
+	unsigned	 mw_idx;
+};
+
+METHOD int mw_get_wc {
+	device_t	 ntb;
+	unsigned	 mw_idx;
+	vm_memattr_t	*mode;
+};
+
+METHOD int mw_set_wc {
+	device_t	 ntb;
+	unsigned	 mw_idx;
+	vm_memattr_t	 mode;
+};
+
+METHOD uint8_t spad_count {
+	device_t	 ntb;
+};
+
+METHOD void spad_clear {
+	device_t	 ntb;
+};
+
+METHOD int spad_write {
+	device_t	 ntb;
+	unsigned int	 idx;
+	uint32_t	 val;
+};
+
+METHOD int spad_read {
+	device_t	 ntb;
+	unsigned int	 idx;
+	uint32_t	 *val;
+};
+
+METHOD int peer_spad_write {
+	device_t	 ntb;
+	unsigned int	 idx;
+	uint32_t	 val;
+};
+
+METHOD int peer_spad_read {
+	device_t	 ntb;
+	unsigned int	 idx;
+	uint32_t	*val;
+};
+
+METHOD uint64_t db_valid_mask {
+	device_t	 ntb;
+};
+
+METHOD int db_vector_count {
+	device_t	 ntb;
+};
+
+METHOD uint64_t db_vector_mask {
+	device_t	 ntb;
+	uint32_t	 vector;
+};
+
+METHOD int peer_db_addr {
+	device_t	 ntb;
+	bus_addr_t	*db_addr;
+	vm_size_t	*db_size;
+};
+
+METHOD void db_clear {
+	device_t	 ntb;
+	uint64_t	 bits;
+};
+
+METHOD void db_clear_mask {
+	device_t	 ntb;
+	uint64_t	 bits;
+};
+
+METHOD uint64_t db_read {
+	device_t	 ntb;
+};
+
+METHOD void db_set_mask {
+	device_t	 ntb;
+	uint64_t	 bits;
+};
+
+METHOD void peer_db_set {
+	device_t	 ntb;
+	uint64_t	 bits;
+};
diff --git a/sys/dev/ntb/ntb_transport.c b/sys/dev/ntb/ntb_transport.c
new file mode 100644
index 0000000..c7bc4da
--- /dev/null
+++ b/sys/dev/ntb/ntb_transport.c
@@ -0,0 +1,1518 @@
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
+ * Copyright (C) 2013 Intel Corporation
+ * Copyright (C) 2015 EMC Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * The Non-Transparent Bridge (NTB) is a device that allows you to connect
+ * two or more systems using a PCI-e links, providing remote memory access.
+ *
+ * This module contains a transport for sending and receiving messages by
+ * writing to remote memory window(s) provided by underlying NTB device.
+ *
+ * NOTE: Much of the code in this module is shared with Linux. Any patches may
+ * be picked up and redistributed in Linux with a dual GPL/BSD license.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/ktr.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/bus.h>
+
+#include "ntb.h"
+#include "ntb_transport.h"
+
+#define KTR_NTB KTR_SPARE3
+
+#define NTB_TRANSPORT_VERSION	4
+
+static SYSCTL_NODE(_hw, OID_AUTO, ntb_transport, CTLFLAG_RW, 0, "ntb_transport");
+
+static unsigned g_ntb_transport_debug_level;
+SYSCTL_UINT(_hw_ntb_transport, OID_AUTO, debug_level, CTLFLAG_RWTUN,
+    &g_ntb_transport_debug_level, 0,
+    "ntb_transport log level -- higher is more verbose");
+#define ntb_printf(lvl, ...) do {			\
+	if ((lvl) <= g_ntb_transport_debug_level) {	\
+		printf(__VA_ARGS__);			\
+	}						\
+} while (0)
+
+static unsigned transport_mtu = 0x10000;
+
+static uint64_t max_mw_size;
+SYSCTL_UQUAD(_hw_ntb_transport, OID_AUTO, max_mw_size, CTLFLAG_RDTUN, &max_mw_size, 0,
+    "If enabled (non-zero), limit the size of large memory windows. "
+    "Both sides of the NTB MUST set the same value here.");
+
+static unsigned enable_xeon_watchdog;
+SYSCTL_UINT(_hw_ntb_transport, OID_AUTO, enable_xeon_watchdog, CTLFLAG_RDTUN,
+    &enable_xeon_watchdog, 0, "If non-zero, write a register every second to "
+    "keep a watchdog from tearing down the NTB link");
+
+STAILQ_HEAD(ntb_queue_list, ntb_queue_entry);
+
+typedef uint32_t ntb_q_idx_t;
+
+struct ntb_queue_entry {
+	/* ntb_queue list reference */
+	STAILQ_ENTRY(ntb_queue_entry) entry;
+
+	/* info on data to be transferred */
+	void		*cb_data;
+	void		*buf;
+	uint32_t	len;
+	uint32_t	flags;
+
+	struct ntb_transport_qp		*qp;
+	struct ntb_payload_header	*x_hdr;
+	ntb_q_idx_t	index;
+};
+
+struct ntb_rx_info {
+	ntb_q_idx_t	entry;
+};
+
+struct ntb_transport_qp {
+	struct ntb_transport_ctx	*transport;
+	device_t		 dev;
+
+	void			*cb_data;
+
+	bool			client_ready;
+	volatile bool		link_is_up;
+	uint8_t			qp_num;	/* Only 64 QPs are allowed.  0-63 */
+
+	struct ntb_rx_info	*rx_info;
+	struct ntb_rx_info	*remote_rx_info;
+
+	void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+	    void *data, int len);
+	struct ntb_queue_list	tx_free_q;
+	struct mtx		ntb_tx_free_q_lock;
+	caddr_t			tx_mw;
+	bus_addr_t		tx_mw_phys;
+	ntb_q_idx_t		tx_index;
+	ntb_q_idx_t		tx_max_entry;
+	uint64_t		tx_max_frame;
+
+	void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+	    void *data, int len);
+	struct ntb_queue_list	rx_post_q;
+	struct ntb_queue_list	rx_pend_q;
+	/* ntb_rx_q_lock: synchronize access to rx_XXXX_q */
+	struct mtx		ntb_rx_q_lock;
+	struct task		rxc_db_work;
+	struct taskqueue	*rxc_tq;
+	caddr_t			rx_buff;
+	ntb_q_idx_t		rx_index;
+	ntb_q_idx_t		rx_max_entry;
+	uint64_t		rx_max_frame;
+
+	void (*event_handler)(void *data, enum ntb_link_event status);
+	struct callout		link_work;
+	struct callout		rx_full;
+
+	uint64_t		last_rx_no_buf;
+
+	/* Stats */
+	uint64_t		rx_bytes;
+	uint64_t		rx_pkts;
+	uint64_t		rx_ring_empty;
+	uint64_t		rx_err_no_buf;
+	uint64_t		rx_err_oflow;
+	uint64_t		rx_err_ver;
+	uint64_t		tx_bytes;
+	uint64_t		tx_pkts;
+	uint64_t		tx_ring_full;
+	uint64_t		tx_err_no_buf;
+
+	struct mtx		tx_lock;
+};
+
+struct ntb_transport_mw {
+	vm_paddr_t	phys_addr;
+	size_t		phys_size;
+	size_t		xlat_align;
+	size_t		xlat_align_size;
+	bus_addr_t	addr_limit;
+	/* Tx buff is off vbase / phys_addr */
+	caddr_t		vbase;
+	size_t		xlat_size;
+	size_t		buff_size;
+	/* Rx buff is off virt_addr / dma_addr */
+	caddr_t		virt_addr;
+	bus_addr_t	dma_addr;
+};
+
+struct ntb_transport_child {
+	device_t	dev;
+	int		qpoff;
+	int		qpcnt;
+	struct ntb_transport_child *next;
+};
+
+struct ntb_transport_ctx {
+	device_t		 dev;
+	struct ntb_transport_child *child;
+	struct ntb_transport_mw	*mw_vec;
+	struct ntb_transport_qp	*qp_vec;
+	unsigned		mw_count;
+	unsigned		qp_count;
+	uint64_t		qp_bitmap;
+	volatile bool		link_is_up;
+	struct callout		link_work;
+	struct callout		link_watchdog;
+	struct task		link_cleanup;
+};
+
+enum {
+	NTBT_DESC_DONE_FLAG = 1 << 0,
+	NTBT_LINK_DOWN_FLAG = 1 << 1,
+};
+
+struct ntb_payload_header {
+	ntb_q_idx_t ver;
+	uint32_t len;
+	uint32_t flags;
+};
+
+enum {
+	/*
+	 * The order of this enum is part of the remote protocol.  Do not
+	 * reorder without bumping protocol version (and it's probably best
+	 * to keep the protocol in lock-step with the Linux NTB driver.
+	 */
+	NTBT_VERSION = 0,
+	NTBT_QP_LINKS,
+	NTBT_NUM_QPS,
+	NTBT_NUM_MWS,
+	/*
+	 * N.B.: transport_link_work assumes MW1 enums = MW0 + 2.
+	 */
+	NTBT_MW0_SZ_HIGH,
+	NTBT_MW0_SZ_LOW,
+	NTBT_MW1_SZ_HIGH,
+	NTBT_MW1_SZ_LOW,
+
+	/*
+	 * Some NTB-using hardware have a watchdog to work around NTB hangs; if
+	 * a register or doorbell isn't written every few seconds, the link is
+	 * torn down.  Write an otherwise unused register every few seconds to
+	 * work around this watchdog.
+	 */
+	NTBT_WATCHDOG_SPAD = 15
+};
+
+#define QP_TO_MW(nt, qp)	((qp) % nt->mw_count)
+#define NTB_QP_DEF_NUM_ENTRIES	100
+#define NTB_LINK_DOWN_TIMEOUT	10
+
+static int ntb_transport_probe(device_t dev);
+static int ntb_transport_attach(device_t dev);
+static int ntb_transport_detach(device_t dev);
+static void ntb_transport_init_queue(struct ntb_transport_ctx *nt,
+    unsigned int qp_num);
+static int ntb_process_tx(struct ntb_transport_qp *qp,
+    struct ntb_queue_entry *entry);
+static void ntb_transport_rxc_db(void *arg, int pending);
+static int ntb_process_rxc(struct ntb_transport_qp *qp);
+static void ntb_memcpy_rx(struct ntb_transport_qp *qp,
+    struct ntb_queue_entry *entry, void *offset);
+static inline void ntb_rx_copy_callback(struct ntb_transport_qp *qp,
+    void *data);
+static void ntb_complete_rxc(struct ntb_transport_qp *qp);
+static void ntb_transport_doorbell_callback(void *data, uint32_t vector);
+static void ntb_transport_event_callback(void *data);
+static void ntb_transport_link_work(void *arg);
+static int ntb_set_mw(struct ntb_transport_ctx *, int num_mw, size_t size);
+static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw);
+static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
+    unsigned int qp_num);
+static void ntb_qp_link_work(void *arg);
+static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt);
+static void ntb_transport_link_cleanup_work(void *, int);
+static void ntb_qp_link_down(struct ntb_transport_qp *qp);
+static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp);
+static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp);
+static void ntb_send_link_down(struct ntb_transport_qp *qp);
+static void ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
+    struct ntb_queue_list *list);
+static struct ntb_queue_entry *ntb_list_rm(struct mtx *lock,
+    struct ntb_queue_list *list);
+static struct ntb_queue_entry *ntb_list_mv(struct mtx *lock,
+    struct ntb_queue_list *from, struct ntb_queue_list *to);
+static void xeon_link_watchdog_hb(void *);
+
+static const struct ntb_ctx_ops ntb_transport_ops = {
+	.link_event = ntb_transport_event_callback,
+	.db_event = ntb_transport_doorbell_callback,
+};
+
+MALLOC_DEFINE(M_NTB_T, "ntb_transport", "ntb transport driver");
+
+static inline void
+iowrite32(uint32_t val, void *addr)
+{
+
+	bus_space_write_4(X86_BUS_SPACE_MEM, 0/* HACK */, (uintptr_t)addr,
+	    val);
+}
+
+/* Transport Init and teardown */
+
+static void
+xeon_link_watchdog_hb(void *arg)
+{
+	struct ntb_transport_ctx *nt;
+
+	nt = arg;
+	ntb_spad_write(nt->dev, NTBT_WATCHDOG_SPAD, 0);
+	callout_reset(&nt->link_watchdog, 1 * hz, xeon_link_watchdog_hb, nt);
+}
+
+static int
+ntb_transport_probe(device_t dev)
+{
+
+	device_set_desc(dev, "NTB Transport");
+	return (0);
+}
+
+static int
+ntb_transport_attach(device_t dev)
+{
+	struct ntb_transport_ctx *nt = device_get_softc(dev);
+	struct ntb_transport_child **cpp = &nt->child;
+	struct ntb_transport_child *nc;
+	struct ntb_transport_mw *mw;
+	uint64_t db_bitmap;
+	int rc, i, db_count, spad_count, qp, qpu, qpo, qpt;
+	char cfg[128] = "";
+	char buf[32];
+	char *n, *np, *c, *name;
+
+	nt->dev = dev;
+	nt->mw_count = ntb_mw_count(dev);
+	spad_count = ntb_spad_count(dev);
+	db_bitmap = ntb_db_valid_mask(dev);
+	db_count = flsll(db_bitmap);
+	KASSERT(db_bitmap == (1 << db_count) - 1,
+	    ("Doorbells are not sequential (%jx).\n", db_bitmap));
+
+	device_printf(dev, "%d memory windows, %d scratchpads, "
+	    "%d doorbells\n", nt->mw_count, spad_count, db_count);
+
+	if (nt->mw_count == 0) {
+		device_printf(dev, "At least 1 memory window required.\n");
+		return (ENXIO);
+	}
+	if (spad_count < 6) {
+		device_printf(dev, "At least 6 scratchpads required.\n");
+		return (ENXIO);
+	}
+	if (spad_count < 4 + 2 * nt->mw_count) {
+		nt->mw_count = (spad_count - 4) / 2;
+		device_printf(dev, "Scratchpads enough only for %d "
+		    "memory windows.\n", nt->mw_count);
+	}
+	if (db_bitmap == 0) {
+		device_printf(dev, "At least one doorbell required.\n");
+		return (ENXIO);
+	}
+
+	nt->mw_vec = malloc(nt->mw_count * sizeof(*nt->mw_vec), M_NTB_T,
+	    M_WAITOK | M_ZERO);
+	for (i = 0; i < nt->mw_count; i++) {
+		mw = &nt->mw_vec[i];
+
+		rc = ntb_mw_get_range(dev, i, &mw->phys_addr, &mw->vbase,
+		    &mw->phys_size, &mw->xlat_align, &mw->xlat_align_size,
+		    &mw->addr_limit);
+		if (rc != 0)
+			goto err;
+
+		mw->buff_size = 0;
+		mw->xlat_size = 0;
+		mw->virt_addr = NULL;
+		mw->dma_addr = 0;
+
+		rc = ntb_mw_set_wc(dev, i, VM_MEMATTR_WRITE_COMBINING);
+		if (rc)
+			ntb_printf(0, "Unable to set mw%d caching\n", i);
+	}
+
+	qpu = 0;
+	qpo = imin(db_count, nt->mw_count);
+	qpt = db_count;
+
+	snprintf(buf, sizeof(buf), "hint.%s.%d.config", device_get_name(dev),
+	    device_get_unit(dev));
+	TUNABLE_STR_FETCH(buf, cfg, sizeof(cfg));
+	n = cfg;
+	i = 0;
+	while ((c = strsep(&n, ",")) != NULL) {
+		np = c;
+		name = strsep(&np, ":");
+		if (name != NULL && name[0] == 0)
+			name = NULL;
+		qp = (np && np[0] != 0) ? strtol(np, NULL, 10) : qpo - qpu;
+		if (qp <= 0)
+			qp = 1;
+
+		if (qp > qpt - qpu) {
+			device_printf(dev, "Not enough resources for config\n");
+			break;
+		}
+
+		nc = malloc(sizeof(*nc), M_DEVBUF, M_WAITOK | M_ZERO);
+		nc->qpoff = qpu;
+		nc->qpcnt = qp;
+		nc->dev = device_add_child(dev, name, -1);
+		if (nc->dev == NULL) {
+			device_printf(dev, "Can not add child.\n");
+			break;
+		}
+		device_set_ivars(nc->dev, nc);
+		*cpp = nc;
+		cpp = &nc->next;
+
+		if (bootverbose) {
+			device_printf(dev, "%d \"%s\": queues %d",
+			    i, name, qpu);
+			if (qp > 1)
+				printf("-%d", qpu + qp - 1);
+			printf("\n");
+		}
+
+		qpu += qp;
+		i++;
+	}
+	nt->qp_count = qpu;
+
+	nt->qp_vec = malloc(nt->qp_count * sizeof(*nt->qp_vec), M_NTB_T,
+	    M_WAITOK | M_ZERO);
+
+	for (i = 0; i < nt->qp_count; i++)
+		ntb_transport_init_queue(nt, i);
+
+	callout_init(&nt->link_work, 0);
+	callout_init(&nt->link_watchdog, 0);
+	TASK_INIT(&nt->link_cleanup, 0, ntb_transport_link_cleanup_work, nt);
+
+	rc = ntb_set_ctx(dev, nt, &ntb_transport_ops);
+	if (rc != 0)
+		goto err;
+
+	nt->link_is_up = false;
+	ntb_link_enable(dev, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+
+	if (enable_xeon_watchdog != 0)
+		callout_reset(&nt->link_watchdog, 0, xeon_link_watchdog_hb, nt);
+
+	bus_generic_attach(dev);
+	return (0);
+
+err:
+	free(nt->qp_vec, M_NTB_T);
+	free(nt->mw_vec, M_NTB_T);
+	return (rc);
+}
+
+static int
+ntb_transport_detach(device_t dev)
+{
+	struct ntb_transport_ctx *nt = device_get_softc(dev);
+	struct ntb_transport_child **cpp = &nt->child;
+	struct ntb_transport_child *nc;
+	int error = 0, i;
+
+	while ((nc = *cpp) != NULL) {
+		*cpp = (*cpp)->next;
+		error = device_delete_child(dev, nc->dev);
+		if (error)
+			break;
+		free(nc, M_DEVBUF);
+	}
+	KASSERT(nt->qp_bitmap == 0,
+	    ("Some queues not freed on detach (%jx)", nt->qp_bitmap));
+
+	ntb_transport_link_cleanup(nt);
+	taskqueue_drain(taskqueue_swi, &nt->link_cleanup);
+	callout_drain(&nt->link_work);
+	callout_drain(&nt->link_watchdog);
+
+	ntb_link_disable(dev);
+	ntb_clear_ctx(dev);
+
+	for (i = 0; i < nt->mw_count; i++)
+		ntb_free_mw(nt, i);
+
+	free(nt->qp_vec, M_NTB_T);
+	free(nt->mw_vec, M_NTB_T);
+	return (0);
+}
+
+int
+ntb_transport_queue_count(device_t dev)
+{
+	struct ntb_transport_child *nc = device_get_ivars(dev);
+
+	return (nc->qpcnt);
+}
+
+static void
+ntb_transport_init_queue(struct ntb_transport_ctx *nt, unsigned int qp_num)
+{
+	struct ntb_transport_mw *mw;
+	struct ntb_transport_qp *qp;
+	vm_paddr_t mw_base;
+	uint64_t mw_size, qp_offset;
+	size_t tx_size;
+	unsigned num_qps_mw, mw_num, mw_count;
+
+	mw_count = nt->mw_count;
+	mw_num = QP_TO_MW(nt, qp_num);
+	mw = &nt->mw_vec[mw_num];
+
+	qp = &nt->qp_vec[qp_num];
+	qp->qp_num = qp_num;
+	qp->transport = nt;
+	qp->dev = nt->dev;
+	qp->client_ready = false;
+	qp->event_handler = NULL;
+	ntb_qp_link_down_reset(qp);
+
+	if (mw_num < nt->qp_count % mw_count)
+		num_qps_mw = nt->qp_count / mw_count + 1;
+	else
+		num_qps_mw = nt->qp_count / mw_count;
+
+	mw_base = mw->phys_addr;
+	mw_size = mw->phys_size;
+
+	tx_size = mw_size / num_qps_mw;
+	qp_offset = tx_size * (qp_num / mw_count);
+
+	qp->tx_mw = mw->vbase + qp_offset;
+	KASSERT(qp->tx_mw != NULL, ("uh oh?"));
+
+	/* XXX Assumes that a vm_paddr_t is equivalent to bus_addr_t */
+	qp->tx_mw_phys = mw_base + qp_offset;
+	KASSERT(qp->tx_mw_phys != 0, ("uh oh?"));
+
+	tx_size -= sizeof(struct ntb_rx_info);
+	qp->rx_info = (void *)(qp->tx_mw + tx_size);
+
+	/* Due to house-keeping, there must be at least 2 buffs */
+	qp->tx_max_frame = qmin(transport_mtu, tx_size / 2);
+	qp->tx_max_entry = tx_size / qp->tx_max_frame;
+
+	callout_init(&qp->link_work, 0);
+	callout_init(&qp->rx_full, 1);
+
+	mtx_init(&qp->ntb_rx_q_lock, "ntb rx q", NULL, MTX_SPIN);
+	mtx_init(&qp->ntb_tx_free_q_lock, "ntb tx free q", NULL, MTX_SPIN);
+	mtx_init(&qp->tx_lock, "ntb transport tx", NULL, MTX_DEF);
+	TASK_INIT(&qp->rxc_db_work, 0, ntb_transport_rxc_db, qp);
+	qp->rxc_tq = taskqueue_create("ntbt_rx", M_WAITOK,
+	    taskqueue_thread_enqueue, &qp->rxc_tq);
+	taskqueue_start_threads(&qp->rxc_tq, 1, PI_NET, "%s rx%d",
+	    device_get_nameunit(nt->dev), qp_num);
+
+	STAILQ_INIT(&qp->rx_post_q);
+	STAILQ_INIT(&qp->rx_pend_q);
+	STAILQ_INIT(&qp->tx_free_q);
+}
+
+void
+ntb_transport_free_queue(struct ntb_transport_qp *qp)
+{
+	struct ntb_transport_ctx *nt = qp->transport;
+	struct ntb_queue_entry *entry;
+
+	callout_drain(&qp->link_work);
+
+	ntb_db_set_mask(qp->dev, 1ull << qp->qp_num);
+	taskqueue_drain_all(qp->rxc_tq);
+	taskqueue_free(qp->rxc_tq);
+
+	qp->cb_data = NULL;
+	qp->rx_handler = NULL;
+	qp->tx_handler = NULL;
+	qp->event_handler = NULL;
+
+	while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q)))
+		free(entry, M_NTB_T);
+
+	while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q)))
+		free(entry, M_NTB_T);
+
+	while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
+		free(entry, M_NTB_T);
+
+	nt->qp_bitmap &= ~(1 << qp->qp_num);
+}
+
+/**
+ * ntb_transport_create_queue - Create a new NTB transport layer queue
+ * @rx_handler: receive callback function
+ * @tx_handler: transmit callback function
+ * @event_handler: event callback function
+ *
+ * Create a new NTB transport layer queue and provide the queue with a callback
+ * routine for both transmit and receive.  The receive callback routine will be
+ * used to pass up data when the transport has received it on the queue.   The
+ * transmit callback routine will be called when the transport has completed the
+ * transmission of the data on the queue and the data is ready to be freed.
+ *
+ * RETURNS: pointer to newly created ntb_queue, NULL on error.
+ */
+struct ntb_transport_qp *
+ntb_transport_create_queue(device_t dev, int q,
+    const struct ntb_queue_handlers *handlers, void *data)
+{
+	struct ntb_transport_child *nc = device_get_ivars(dev);
+	struct ntb_transport_ctx *nt = device_get_softc(device_get_parent(dev));
+	struct ntb_queue_entry *entry;
+	struct ntb_transport_qp *qp;
+	int i;
+
+	if (q < 0 || q >= nc->qpcnt)
+		return (NULL);
+
+	qp = &nt->qp_vec[nc->qpoff + q];
+	nt->qp_bitmap |= (1 << qp->qp_num);
+	qp->cb_data = data;
+	qp->rx_handler = handlers->rx_handler;
+	qp->tx_handler = handlers->tx_handler;
+	qp->event_handler = handlers->event_handler;
+
+	for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
+		entry = malloc(sizeof(*entry), M_NTB_T, M_WAITOK | M_ZERO);
+		entry->cb_data = data;
+		entry->buf = NULL;
+		entry->len = transport_mtu;
+		entry->qp = qp;
+		ntb_list_add(&qp->ntb_rx_q_lock, entry, &qp->rx_pend_q);
+	}
+
+	for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
+		entry = malloc(sizeof(*entry), M_NTB_T, M_WAITOK | M_ZERO);
+		entry->qp = qp;
+		ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+	}
+
+	ntb_db_clear(dev, 1ull << qp->qp_num);
+	return (qp);
+}
+
+/**
+ * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
+ * @qp: NTB transport layer queue to be enabled
+ *
+ * Notify NTB transport layer of client readiness to use queue
+ */
+void
+ntb_transport_link_up(struct ntb_transport_qp *qp)
+{
+	struct ntb_transport_ctx *nt = qp->transport;
+
+	qp->client_ready = true;
+
+	ntb_printf(2, "qp %d client ready\n", qp->qp_num);
+
+	if (nt->link_is_up)
+		callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
+}
+
+
+
+/* Transport Tx */
+
+/**
+ * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry
+ * @qp: NTB transport layer queue the entry is to be enqueued on
+ * @cb: per buffer pointer for callback function to use
+ * @data: pointer to data buffer that will be sent
+ * @len: length of the data buffer
+ *
+ * Enqueue a new transmit buffer onto the transport queue from which a NTB
+ * payload will be transmitted.  This assumes that a lock is being held to
+ * serialize access to the qp.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int
+ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+    unsigned int len)
+{
+	struct ntb_queue_entry *entry;
+	int rc;
+
+	if (!qp->link_is_up || len == 0) {
+		CTR0(KTR_NTB, "TX: link not up");
+		return (EINVAL);
+	}
+
+	entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
+	if (entry == NULL) {
+		CTR0(KTR_NTB, "TX: could not get entry from tx_free_q");
+		qp->tx_err_no_buf++;
+		return (EBUSY);
+	}
+	CTR1(KTR_NTB, "TX: got entry %p from tx_free_q", entry);
+
+	entry->cb_data = cb;
+	entry->buf = data;
+	entry->len = len;
+	entry->flags = 0;
+
+	mtx_lock(&qp->tx_lock);
+	rc = ntb_process_tx(qp, entry);
+	mtx_unlock(&qp->tx_lock);
+	if (rc != 0) {
+		ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+		CTR1(KTR_NTB,
+		    "TX: process_tx failed. Returning entry %p to tx_free_q",
+		    entry);
+	}
+	return (rc);
+}
+
+static void
+ntb_tx_copy_callback(void *data)
+{
+	struct ntb_queue_entry *entry = data;
+	struct ntb_transport_qp *qp = entry->qp;
+	struct ntb_payload_header *hdr = entry->x_hdr;
+
+	iowrite32(entry->flags | NTBT_DESC_DONE_FLAG, &hdr->flags);
+	CTR1(KTR_NTB, "TX: hdr %p set DESC_DONE", hdr);
+
+	ntb_peer_db_set(qp->dev, 1ull << qp->qp_num);
+
+	/*
+	 * The entry length can only be zero if the packet is intended to be a
+	 * "link down" or similar.  Since no payload is being sent in these
+	 * cases, there is nothing to add to the completion queue.
+	 */
+	if (entry->len > 0) {
+		qp->tx_bytes += entry->len;
+
+		if (qp->tx_handler)
+			qp->tx_handler(qp, qp->cb_data, entry->buf,
+			    entry->len);
+		else
+			m_freem(entry->buf);
+		entry->buf = NULL;
+	}
+
+	CTR3(KTR_NTB,
+	    "TX: entry %p sent. hdr->ver = %u, hdr->flags = 0x%x, Returning "
+	    "to tx_free_q", entry, hdr->ver, hdr->flags);
+	ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+}
+
+static void
+ntb_memcpy_tx(struct ntb_queue_entry *entry, void *offset)
+{
+
+	CTR2(KTR_NTB, "TX: copying %d bytes to offset %p", entry->len, offset);
+	if (entry->buf != NULL) {
+		m_copydata((struct mbuf *)entry->buf, 0, entry->len, offset);
+
+		/*
+		 * Ensure that the data is fully copied before setting the
+		 * flags
+		 */
+		wmb();
+	}
+
+	ntb_tx_copy_callback(entry);
+}
+
+static void
+ntb_async_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry)
+{
+	struct ntb_payload_header *hdr;
+	void *offset;
+
+	offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
+	hdr = (struct ntb_payload_header *)((char *)offset + qp->tx_max_frame -
+	    sizeof(struct ntb_payload_header));
+	entry->x_hdr = hdr;
+
+	iowrite32(entry->len, &hdr->len);
+	iowrite32(qp->tx_pkts, &hdr->ver);
+
+	ntb_memcpy_tx(entry, offset);
+}
+
+static int
+ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry)
+{
+
+	CTR3(KTR_NTB,
+	    "TX: process_tx: tx_pkts=%lu, tx_index=%u, remote entry=%u",
+	    qp->tx_pkts, qp->tx_index, qp->remote_rx_info->entry);
+	if (qp->tx_index == qp->remote_rx_info->entry) {
+		CTR0(KTR_NTB, "TX: ring full");
+		qp->tx_ring_full++;
+		return (EAGAIN);
+	}
+
+	if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
+		if (qp->tx_handler != NULL)
+			qp->tx_handler(qp, qp->cb_data, entry->buf,
+			    EIO);
+		else
+			m_freem(entry->buf);
+
+		entry->buf = NULL;
+		ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+		CTR1(KTR_NTB,
+		    "TX: frame too big. returning entry %p to tx_free_q",
+		    entry);
+		return (0);
+	}
+	CTR2(KTR_NTB, "TX: copying entry %p to index %u", entry, qp->tx_index);
+	ntb_async_tx(qp, entry);
+
+	qp->tx_index++;
+	qp->tx_index %= qp->tx_max_entry;
+
+	qp->tx_pkts++;
+
+	return (0);
+}
+
+/* Transport Rx */
+static void
+ntb_transport_rxc_db(void *arg, int pending __unused)
+{
+	struct ntb_transport_qp *qp = arg;
+	int rc;
+
+	CTR0(KTR_NTB, "RX: transport_rx");
+again:
+	while ((rc = ntb_process_rxc(qp)) == 0)
+		;
+	CTR1(KTR_NTB, "RX: process_rxc returned %d", rc);
+
+	if ((ntb_db_read(qp->dev) & (1ull << qp->qp_num)) != 0) {
+		/* If db is set, clear it and check queue once more. */
+		ntb_db_clear(qp->dev, 1ull << qp->qp_num);
+		goto again;
+	}
+}
+
+static int
+ntb_process_rxc(struct ntb_transport_qp *qp)
+{
+	struct ntb_payload_header *hdr;
+	struct ntb_queue_entry *entry;
+	caddr_t offset;
+
+	offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index;
+	hdr = (void *)(offset + qp->rx_max_frame -
+	    sizeof(struct ntb_payload_header));
+
+	CTR1(KTR_NTB, "RX: process_rxc rx_index = %u", qp->rx_index);
+	if ((hdr->flags & NTBT_DESC_DONE_FLAG) == 0) {
+		CTR0(KTR_NTB, "RX: hdr not done");
+		qp->rx_ring_empty++;
+		return (EAGAIN);
+	}
+
+	if ((hdr->flags & NTBT_LINK_DOWN_FLAG) != 0) {
+		CTR0(KTR_NTB, "RX: link down");
+		ntb_qp_link_down(qp);
+		hdr->flags = 0;
+		return (EAGAIN);
+	}
+
+	if (hdr->ver != (uint32_t)qp->rx_pkts) {
+		CTR2(KTR_NTB,"RX: ver != rx_pkts (%x != %lx). "
+		    "Returning entry to rx_pend_q", hdr->ver, qp->rx_pkts);
+		qp->rx_err_ver++;
+		return (EIO);
+	}
+
+	entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q);
+	if (entry == NULL) {
+		qp->rx_err_no_buf++;
+		CTR0(KTR_NTB, "RX: No entries in rx_pend_q");
+		return (EAGAIN);
+	}
+	callout_stop(&qp->rx_full);
+	CTR1(KTR_NTB, "RX: rx entry %p from rx_pend_q", entry);
+
+	entry->x_hdr = hdr;
+	entry->index = qp->rx_index;
+
+	if (hdr->len > entry->len) {
+		CTR2(KTR_NTB, "RX: len too long. Wanted %ju got %ju",
+		    (uintmax_t)hdr->len, (uintmax_t)entry->len);
+		qp->rx_err_oflow++;
+
+		entry->len = -EIO;
+		entry->flags |= NTBT_DESC_DONE_FLAG;
+
+		ntb_complete_rxc(qp);
+	} else {
+		qp->rx_bytes += hdr->len;
+		qp->rx_pkts++;
+
+		CTR1(KTR_NTB, "RX: received %ld rx_pkts", qp->rx_pkts);
+
+		entry->len = hdr->len;
+
+		ntb_memcpy_rx(qp, entry, offset);
+	}
+
+	qp->rx_index++;
+	qp->rx_index %= qp->rx_max_entry;
+	return (0);
+}
+
+static void
+ntb_memcpy_rx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry,
+    void *offset)
+{
+	struct ifnet *ifp = entry->cb_data;
+	unsigned int len = entry->len;
+
+	CTR2(KTR_NTB, "RX: copying %d bytes from offset %p", len, offset);
+
+	entry->buf = (void *)m_devget(offset, len, 0, ifp, NULL);
+	if (entry->buf == NULL)
+		entry->len = -ENOMEM;
+
+	/* Ensure that the data is globally visible before clearing the flag */
+	wmb();
+
+	CTR2(KTR_NTB, "RX: copied entry %p to mbuf %p.", entry, entry->buf);
+	ntb_rx_copy_callback(qp, entry);
+}
+
+static inline void
+ntb_rx_copy_callback(struct ntb_transport_qp *qp, void *data)
+{
+	struct ntb_queue_entry *entry;
+
+	entry = data;
+	entry->flags |= NTBT_DESC_DONE_FLAG;
+	ntb_complete_rxc(qp);
+}
+
+static void
+ntb_complete_rxc(struct ntb_transport_qp *qp)
+{
+	struct ntb_queue_entry *entry;
+	struct mbuf *m;
+	unsigned len;
+
+	CTR0(KTR_NTB, "RX: rx_completion_task");
+
+	mtx_lock_spin(&qp->ntb_rx_q_lock);
+
+	while (!STAILQ_EMPTY(&qp->rx_post_q)) {
+		entry = STAILQ_FIRST(&qp->rx_post_q);
+		if ((entry->flags & NTBT_DESC_DONE_FLAG) == 0)
+			break;
+
+		entry->x_hdr->flags = 0;
+		iowrite32(entry->index, &qp->rx_info->entry);
+
+		STAILQ_REMOVE_HEAD(&qp->rx_post_q, entry);
+
+		len = entry->len;
+		m = entry->buf;
+
+		/*
+		 * Re-initialize queue_entry for reuse; rx_handler takes
+		 * ownership of the mbuf.
+		 */
+		entry->buf = NULL;
+		entry->len = transport_mtu;
+		entry->cb_data = qp->cb_data;
+
+		STAILQ_INSERT_TAIL(&qp->rx_pend_q, entry, entry);
+
+		mtx_unlock_spin(&qp->ntb_rx_q_lock);
+
+		CTR2(KTR_NTB, "RX: completing entry %p, mbuf %p", entry, m);
+		if (qp->rx_handler != NULL && qp->client_ready)
+			qp->rx_handler(qp, qp->cb_data, m, len);
+		else
+			m_freem(m);
+
+		mtx_lock_spin(&qp->ntb_rx_q_lock);
+	}
+
+	mtx_unlock_spin(&qp->ntb_rx_q_lock);
+}
+
+static void
+ntb_transport_doorbell_callback(void *data, uint32_t vector)
+{
+	struct ntb_transport_ctx *nt = data;
+	struct ntb_transport_qp *qp;
+	uint64_t vec_mask;
+	unsigned qp_num;
+
+	vec_mask = ntb_db_vector_mask(nt->dev, vector);
+	vec_mask &= nt->qp_bitmap;
+	if ((vec_mask & (vec_mask - 1)) != 0)
+		vec_mask &= ntb_db_read(nt->dev);
+	while (vec_mask != 0) {
+		qp_num = ffsll(vec_mask) - 1;
+
+		qp = &nt->qp_vec[qp_num];
+		if (qp->link_is_up)
+			taskqueue_enqueue(qp->rxc_tq, &qp->rxc_db_work);
+
+		vec_mask &= ~(1ull << qp_num);
+	}
+}
+
+/* Link Event handler */
+static void
+ntb_transport_event_callback(void *data)
+{
+	struct ntb_transport_ctx *nt = data;
+
+	if (ntb_link_is_up(nt->dev, NULL, NULL)) {
+		ntb_printf(1, "HW link up\n");
+		callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt);
+	} else {
+		ntb_printf(1, "HW link down\n");
+		taskqueue_enqueue(taskqueue_swi, &nt->link_cleanup);
+	}
+}
+
+/* Link bring up */
+static void
+ntb_transport_link_work(void *arg)
+{
+	struct ntb_transport_ctx *nt = arg;
+	device_t dev = nt->dev;
+	struct ntb_transport_qp *qp;
+	uint64_t val64, size;
+	uint32_t val;
+	unsigned i;
+	int rc;
+
+	/* send the local info, in the opposite order of the way we read it */
+	for (i = 0; i < nt->mw_count; i++) {
+		size = nt->mw_vec[i].phys_size;
+
+		if (max_mw_size != 0 && size > max_mw_size)
+			size = max_mw_size;
+
+		ntb_peer_spad_write(dev, NTBT_MW0_SZ_HIGH + (i * 2),
+		    size >> 32);
+		ntb_peer_spad_write(dev, NTBT_MW0_SZ_LOW + (i * 2), size);
+	}
+	ntb_peer_spad_write(dev, NTBT_NUM_MWS, nt->mw_count);
+	ntb_peer_spad_write(dev, NTBT_NUM_QPS, nt->qp_count);
+	ntb_peer_spad_write(dev, NTBT_QP_LINKS, 0);
+	ntb_peer_spad_write(dev, NTBT_VERSION, NTB_TRANSPORT_VERSION);
+
+	/* Query the remote side for its info */
+	val = 0;
+	ntb_spad_read(dev, NTBT_VERSION, &val);
+	if (val != NTB_TRANSPORT_VERSION)
+		goto out;
+
+	ntb_spad_read(dev, NTBT_NUM_QPS, &val);
+	if (val != nt->qp_count)
+		goto out;
+
+	ntb_spad_read(dev, NTBT_NUM_MWS, &val);
+	if (val != nt->mw_count)
+		goto out;
+
+	for (i = 0; i < nt->mw_count; i++) {
+		ntb_spad_read(dev, NTBT_MW0_SZ_HIGH + (i * 2), &val);
+		val64 = (uint64_t)val << 32;
+
+		ntb_spad_read(dev, NTBT_MW0_SZ_LOW + (i * 2), &val);
+		val64 |= val;
+
+		rc = ntb_set_mw(nt, i, val64);
+		if (rc != 0)
+			goto free_mws;
+	}
+
+	nt->link_is_up = true;
+	ntb_printf(1, "transport link up\n");
+
+	for (i = 0; i < nt->qp_count; i++) {
+		qp = &nt->qp_vec[i];
+
+		ntb_transport_setup_qp_mw(nt, i);
+
+		if (qp->client_ready)
+			callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
+	}
+
+	return;
+
+free_mws:
+	for (i = 0; i < nt->mw_count; i++)
+		ntb_free_mw(nt, i);
+out:
+	if (ntb_link_is_up(dev, NULL, NULL))
+		callout_reset(&nt->link_work,
+		    NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_transport_link_work, nt);
+}
+
+static int
+ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, size_t size)
+{
+	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
+	size_t xlat_size, buff_size;
+	int rc;
+
+	if (size == 0)
+		return (EINVAL);
+
+	xlat_size = roundup(size, mw->xlat_align_size);
+	buff_size = xlat_size;
+
+	/* No need to re-setup */
+	if (mw->xlat_size == xlat_size)
+		return (0);
+
+	if (mw->buff_size != 0)
+		ntb_free_mw(nt, num_mw);
+
+	/* Alloc memory for receiving data.  Must be aligned */
+	mw->xlat_size = xlat_size;
+	mw->buff_size = buff_size;
+
+	mw->virt_addr = contigmalloc(mw->buff_size, M_NTB_T, M_ZERO, 0,
+	    mw->addr_limit, mw->xlat_align, 0);
+	if (mw->virt_addr == NULL) {
+		ntb_printf(0, "Unable to allocate MW buffer of size %zu/%zu\n",
+		    mw->buff_size, mw->xlat_size);
+		mw->xlat_size = 0;
+		mw->buff_size = 0;
+		return (ENOMEM);
+	}
+	/* TODO: replace with bus_space_* functions */
+	mw->dma_addr = vtophys(mw->virt_addr);
+
+	/*
+	 * Ensure that the allocation from contigmalloc is aligned as
+	 * requested.  XXX: This may not be needed -- brought in for parity
+	 * with the Linux driver.
+	 */
+	if (mw->dma_addr % mw->xlat_align != 0) {
+		ntb_printf(0,
+		    "DMA memory 0x%jx not aligned to BAR size 0x%zx\n",
+		    (uintmax_t)mw->dma_addr, size);
+		ntb_free_mw(nt, num_mw);
+		return (ENOMEM);
+	}
+
+	/* Notify HW the memory location of the receive buffer */
+	rc = ntb_mw_set_trans(nt->dev, num_mw, mw->dma_addr, mw->xlat_size);
+	if (rc) {
+		ntb_printf(0, "Unable to set mw%d translation\n", num_mw);
+		ntb_free_mw(nt, num_mw);
+		return (rc);
+	}
+
+	return (0);
+}
+
+static void
+ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
+{
+	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
+
+	if (mw->virt_addr == NULL)
+		return;
+
+	ntb_mw_clear_trans(nt->dev, num_mw);
+	contigfree(mw->virt_addr, mw->xlat_size, M_NTB_T);
+	mw->xlat_size = 0;
+	mw->buff_size = 0;
+	mw->virt_addr = NULL;
+}
+
+static int
+ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, unsigned int qp_num)
+{
+	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+	struct ntb_transport_mw *mw;
+	void *offset;
+	ntb_q_idx_t i;
+	size_t rx_size;
+	unsigned num_qps_mw, mw_num, mw_count;
+
+	mw_count = nt->mw_count;
+	mw_num = QP_TO_MW(nt, qp_num);
+	mw = &nt->mw_vec[mw_num];
+
+	if (mw->virt_addr == NULL)
+		return (ENOMEM);
+
+	if (mw_num < nt->qp_count % mw_count)
+		num_qps_mw = nt->qp_count / mw_count + 1;
+	else
+		num_qps_mw = nt->qp_count / mw_count;
+
+	rx_size = mw->xlat_size / num_qps_mw;
+	qp->rx_buff = mw->virt_addr + rx_size * (qp_num / mw_count);
+	rx_size -= sizeof(struct ntb_rx_info);
+
+	qp->remote_rx_info = (void*)(qp->rx_buff + rx_size);
+
+	/* Due to house-keeping, there must be at least 2 buffs */
+	qp->rx_max_frame = qmin(transport_mtu, rx_size / 2);
+	qp->rx_max_entry = rx_size / qp->rx_max_frame;
+	qp->rx_index = 0;
+
+	qp->remote_rx_info->entry = qp->rx_max_entry - 1;
+
+	/* Set up the hdr offsets with 0s */
+	for (i = 0; i < qp->rx_max_entry; i++) {
+		offset = (void *)(qp->rx_buff + qp->rx_max_frame * (i + 1) -
+		    sizeof(struct ntb_payload_header));
+		memset(offset, 0, sizeof(struct ntb_payload_header));
+	}
+
+	qp->rx_pkts = 0;
+	qp->tx_pkts = 0;
+	qp->tx_index = 0;
+
+	return (0);
+}
+
+static void
+ntb_qp_link_work(void *arg)
+{
+	struct ntb_transport_qp *qp = arg;
+	device_t dev = qp->dev;
+	struct ntb_transport_ctx *nt = qp->transport;
+	int i;
+	uint32_t val;
+
+	/* Report queues that are up on our side */
+	for (i = 0, val = 0; i < nt->qp_count; i++) {
+		if (nt->qp_vec[i].client_ready)
+			val |= (1 << i);
+	}
+	ntb_peer_spad_write(dev, NTBT_QP_LINKS, val);
+
+	/* See if the remote side is up */
+	ntb_spad_read(dev, NTBT_QP_LINKS, &val);
+	if ((val & (1ull << qp->qp_num)) != 0) {
+		ntb_printf(2, "qp %d link up\n", qp->qp_num);
+		qp->link_is_up = true;
+
+		if (qp->event_handler != NULL)
+			qp->event_handler(qp->cb_data, NTB_LINK_UP);
+
+		ntb_db_clear_mask(dev, 1ull << qp->qp_num);
+	} else if (nt->link_is_up)
+		callout_reset(&qp->link_work,
+		    NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp);
+}
+
+/* Link down event*/
+static void
+ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
+{
+	struct ntb_transport_qp *qp;
+	int i;
+
+	/* Pass along the info to any clients */
+	for (i = 0; i < nt->qp_count; i++) {
+		if ((nt->qp_bitmap & (1 << i)) != 0) {
+			qp = &nt->qp_vec[i];
+			ntb_qp_link_cleanup(qp);
+			callout_drain(&qp->link_work);
+		}
+	}
+
+	if (!nt->link_is_up)
+		callout_drain(&nt->link_work);
+
+	/*
+	 * The scratchpad registers keep the values if the remote side
+	 * goes down, blast them now to give them a sane value the next
+	 * time they are accessed
+	 */
+	ntb_spad_clear(nt->dev);
+}
+
+static void
+ntb_transport_link_cleanup_work(void *arg, int pending __unused)
+{
+
+	ntb_transport_link_cleanup(arg);
+}
+
+static void
+ntb_qp_link_down(struct ntb_transport_qp *qp)
+{
+
+	ntb_qp_link_cleanup(qp);
+}
+
+static void
+ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
+{
+
+	qp->link_is_up = false;
+	ntb_db_set_mask(qp->dev, 1ull << qp->qp_num);
+
+	qp->tx_index = qp->rx_index = 0;
+	qp->tx_bytes = qp->rx_bytes = 0;
+	qp->tx_pkts = qp->rx_pkts = 0;
+
+	qp->rx_ring_empty = 0;
+	qp->tx_ring_full = 0;
+
+	qp->rx_err_no_buf = qp->tx_err_no_buf = 0;
+	qp->rx_err_oflow = qp->rx_err_ver = 0;
+}
+
+static void
+ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
+{
+
+	callout_drain(&qp->link_work);
+	ntb_qp_link_down_reset(qp);
+
+	if (qp->event_handler != NULL)
+		qp->event_handler(qp->cb_data, NTB_LINK_DOWN);
+}
+
+/* Link commanded down */
+/**
+ * ntb_transport_link_down - Notify NTB transport to no longer enqueue data
+ * @qp: NTB transport layer queue to be disabled
+ *
+ * Notify NTB transport layer of client's desire to no longer receive data on
+ * transport queue specified.  It is the client's responsibility to ensure all
+ * entries on queue are purged or otherwise handled appropriately.
+ */
+void
+ntb_transport_link_down(struct ntb_transport_qp *qp)
+{
+	struct ntb_transport_ctx *nt = qp->transport;
+	int i;
+	uint32_t val;
+
+	qp->client_ready = false;
+	for (i = 0, val = 0; i < nt->qp_count; i++) {
+		if (nt->qp_vec[i].client_ready)
+			val |= (1 << i);
+	}
+	ntb_peer_spad_write(qp->dev, NTBT_QP_LINKS, val);
+
+	if (qp->link_is_up)
+		ntb_send_link_down(qp);
+	else
+		callout_drain(&qp->link_work);
+}
+
+/**
+ * ntb_transport_link_query - Query transport link state
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query connectivity to the remote system of the NTB transport queue
+ *
+ * RETURNS: true for link up or false for link down
+ */
+bool
+ntb_transport_link_query(struct ntb_transport_qp *qp)
+{
+
+	return (qp->link_is_up);
+}
+
+static void
+ntb_send_link_down(struct ntb_transport_qp *qp)
+{
+	struct ntb_queue_entry *entry;
+	int i, rc;
+
+	if (!qp->link_is_up)
+		return;
+
+	for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) {
+		entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
+		if (entry != NULL)
+			break;
+		pause("NTB Wait for link down", hz / 10);
+	}
+
+	if (entry == NULL)
+		return;
+
+	entry->cb_data = NULL;
+	entry->buf = NULL;
+	entry->len = 0;
+	entry->flags = NTBT_LINK_DOWN_FLAG;
+
+	mtx_lock(&qp->tx_lock);
+	rc = ntb_process_tx(qp, entry);
+	mtx_unlock(&qp->tx_lock);
+	if (rc != 0)
+		printf("ntb: Failed to send link down\n");
+
+	ntb_qp_link_down_reset(qp);
+}
+
+
+/* List Management */
+
+static void
+ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
+    struct ntb_queue_list *list)
+{
+
+	mtx_lock_spin(lock);
+	STAILQ_INSERT_TAIL(list, entry, entry);
+	mtx_unlock_spin(lock);
+}
+
+static struct ntb_queue_entry *
+ntb_list_rm(struct mtx *lock, struct ntb_queue_list *list)
+{
+	struct ntb_queue_entry *entry;
+
+	mtx_lock_spin(lock);
+	if (STAILQ_EMPTY(list)) {
+		entry = NULL;
+		goto out;
+	}
+	entry = STAILQ_FIRST(list);
+	STAILQ_REMOVE_HEAD(list, entry);
+out:
+	mtx_unlock_spin(lock);
+
+	return (entry);
+}
+
+static struct ntb_queue_entry *
+ntb_list_mv(struct mtx *lock, struct ntb_queue_list *from,
+    struct ntb_queue_list *to)
+{
+	struct ntb_queue_entry *entry;
+
+	mtx_lock_spin(lock);
+	if (STAILQ_EMPTY(from)) {
+		entry = NULL;
+		goto out;
+	}
+	entry = STAILQ_FIRST(from);
+	STAILQ_REMOVE_HEAD(from, entry);
+	STAILQ_INSERT_TAIL(to, entry, entry);
+
+out:
+	mtx_unlock_spin(lock);
+	return (entry);
+}
+
+/**
+ * ntb_transport_qp_num - Query the qp number
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query qp number of the NTB transport queue
+ *
+ * RETURNS: a zero based number specifying the qp number
+ */
+unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp)
+{
+
+	return (qp->qp_num);
+}
+
+/**
+ * ntb_transport_max_size - Query the max payload size of a qp
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query the maximum payload size permissible on the given qp
+ *
+ * RETURNS: the max payload size of a qp
+ */
+unsigned int
+ntb_transport_max_size(struct ntb_transport_qp *qp)
+{
+
+	return (qp->tx_max_frame - sizeof(struct ntb_payload_header));
+}
+
+unsigned int
+ntb_transport_tx_free_entry(struct ntb_transport_qp *qp)
+{
+	unsigned int head = qp->tx_index;
+	unsigned int tail = qp->remote_rx_info->entry;
+
+	return (tail >= head ? tail - head : qp->tx_max_entry + tail - head);
+}
+
+static device_method_t ntb_transport_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,     ntb_transport_probe),
+	DEVMETHOD(device_attach,    ntb_transport_attach),
+	DEVMETHOD(device_detach,    ntb_transport_detach),
+	DEVMETHOD_END
+};
+
+devclass_t ntb_transport_devclass;
+static DEFINE_CLASS_0(ntb_transport, ntb_transport_driver,
+    ntb_transport_methods, sizeof(struct ntb_transport_ctx));
+DRIVER_MODULE(ntb_transport, ntb_hw, ntb_transport_driver,
+    ntb_transport_devclass, NULL, NULL);
+MODULE_DEPEND(ntb_transport, ntb, 1, 1, 1);
+MODULE_VERSION(ntb_transport, 1);
diff --git a/sys/dev/ntb/ntb_transport.h b/sys/dev/ntb/ntb_transport.h
new file mode 100644
index 0000000..63cdbce
--- /dev/null
+++ b/sys/dev/ntb/ntb_transport.h
@@ -0,0 +1,61 @@
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+struct ntb_transport_qp;
+
+extern devclass_t ntb_transport_devclass;
+
+enum ntb_link_event {
+	NTB_LINK_DOWN = 0,
+	NTB_LINK_UP,
+};
+
+struct ntb_queue_handlers {
+	void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+	    void *data, int len);
+	void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+	    void *data, int len);
+	void (*event_handler)(void *data, enum ntb_link_event status);
+};
+
+int ntb_transport_queue_count(device_t dev);
+struct ntb_transport_qp *
+ntb_transport_create_queue(device_t dev, int q,
+    const struct ntb_queue_handlers *handlers, void *data);
+void ntb_transport_free_queue(struct ntb_transport_qp *qp);
+unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp);
+unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp);
+int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+			     unsigned int len);
+int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+			     unsigned int len);
+void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len);
+void ntb_transport_link_up(struct ntb_transport_qp *qp);
+void ntb_transport_link_down(struct ntb_transport_qp *qp);
+bool ntb_transport_link_query(struct ntb_transport_qp *qp);
+unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp);
diff --git a/sys/dev/pci/pci_pci.c b/sys/dev/pci/pci_pci.c
index 6e8d076..dedc55a 100644
--- a/sys/dev/pci/pci_pci.c
+++ b/sys/dev/pci/pci_pci.c
@@ -918,6 +918,7 @@ static void
 pcib_probe_hotplug(struct pcib_softc *sc)
 {
 	device_t dev;
+	uint16_t link_sta, slot_sta;
 
 	if (!pci_enable_pcie_hp)
 		return;
@@ -932,15 +933,29 @@ pcib_probe_hotplug(struct pcib_softc *sc)
 	sc->pcie_link_cap = pcie_read_config(dev, PCIER_LINK_CAP, 4);
 	sc->pcie_slot_cap = pcie_read_config(dev, PCIER_SLOT_CAP, 4);
 
+	if ((sc->pcie_slot_cap & PCIEM_SLOT_CAP_HPC) == 0)
+		return;
+
 	/*
-	 * XXX: Handling of slots with a power controller needs to be
-	 * reexamined.  Ignore hotplug on such slots for now.
+	 * Some devices report that they have an MRL when they actually
+	 * do not.  Since they always report that the MRL is open, child
+	 * devices would be ignored.  Try to detect these devices and
+	 * ignore their claim of HotPlug support.
+	 *
+	 * If there is an open MRL but the Data Link Layer is active,
+	 * the MRL is not real.
 	 */
-	if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_PCP)
-		return;
-	
-	if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_HPC)
-		sc->flags |= PCIB_HOTPLUG;
+	if ((sc->pcie_slot_cap & PCIEM_SLOT_CAP_MRLSP) != 0 &&
+	    (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) != 0) {
+		link_sta = pcie_read_config(dev, PCIER_LINK_STA, 2);
+		slot_sta = pcie_read_config(dev, PCIER_SLOT_STA, 2);
+		if ((slot_sta & PCIEM_SLOT_STA_MRLSS) != 0 &&
+		    (link_sta & PCIEM_LINK_STA_DL_ACTIVE) != 0) {
+			return;
+		}
+	}
+
+	sc->flags |= PCIB_HOTPLUG;
 }
 
 /*
@@ -966,6 +981,8 @@ pcib_pcie_hotplug_command(struct pcib_softc *sc, uint16_t val, uint16_t mask)
 	new = (ctl & ~mask) | val;
 	if (new == ctl)
 		return;
+	if (bootverbose)
+		device_printf(dev, "HotPlug command: %04x -> %04x\n", ctl, new);
 	pcie_write_config(dev, PCIER_SLOT_CTL, new, 2);
 	if (!(sc->pcie_slot_cap & PCIEM_SLOT_CAP_NCCS) &&
 	    (ctl & new) & PCIEM_SLOT_CTL_CCIE) {
@@ -1028,9 +1045,6 @@ pcib_hotplug_inserted(struct pcib_softc *sc)
 static int
 pcib_hotplug_present(struct pcib_softc *sc)
 {
-	device_t dev;
-
-	dev = sc->dev;
 
 	/* Card must be inserted. */
 	if (!pcib_hotplug_inserted(sc))
@@ -1059,7 +1073,7 @@ pcib_pcie_hotplug_update(struct pcib_softc *sc, uint16_t val, uint16_t mask,
 {
 	bool card_inserted, ei_engaged;
 
-	/* Clear DETACHING if Present Detect has cleared. */
+	/* Clear DETACHING if Presence Detect has cleared. */
 	if ((sc->pcie_slot_sta & (PCIEM_SLOT_STA_PDC | PCIEM_SLOT_STA_PDS)) ==
 	    PCIEM_SLOT_STA_PDC)
 		sc->flags &= ~PCIB_DETACHING;
@@ -1101,14 +1115,15 @@ pcib_pcie_hotplug_update(struct pcib_softc *sc, uint16_t val, uint16_t mask,
 
 	/*
 	 * Start a timer to see if the Data Link Layer times out.
-	 * Note that we only start the timer if Presence Detect
+	 * Note that we only start the timer if Presence Detect or MRL Sensor
 	 * changed on this interrupt.  Stop any scheduled timer if
 	 * the Data Link Layer is active.
 	 */
 	if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) {
 		if (card_inserted &&
 		    !(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE) &&
-		    sc->pcie_slot_sta & PCIEM_SLOT_STA_PDC) {
+		    sc->pcie_slot_sta &
+		    (PCIEM_SLOT_STA_MRLSC | PCIEM_SLOT_STA_PDC)) {
 			if (cold)
 				device_printf(sc->dev,
 				    "Data Link Layer inactive\n");
@@ -1144,6 +1159,10 @@ pcib_pcie_intr(void *arg)
 	/* Clear the events just reported. */
 	pcie_write_config(dev, PCIER_SLOT_STA, sc->pcie_slot_sta, 2);
 
+	if (bootverbose)
+		device_printf(dev, "HotPlug interrupt: %#x\n",
+		    sc->pcie_slot_sta);
+
 	if (sc->pcie_slot_sta & PCIEM_SLOT_STA_ABP) {
 		if (sc->flags & PCIB_DETACH_PENDING) {	
 			device_printf(dev,
@@ -1165,7 +1184,7 @@ pcib_pcie_intr(void *arg)
 		    sc->pcie_slot_sta & PCIEM_SLOT_STA_MRLSS ? "open" :
 		    "closed");
 	if (bootverbose && sc->pcie_slot_sta & PCIEM_SLOT_STA_PDC)
-		device_printf(dev, "Present Detect Changed to %s\n",
+		device_printf(dev, "Presence Detect Changed to %s\n",
 		    sc->pcie_slot_sta & PCIEM_SLOT_STA_PDS ? "card present" :
 		    "empty");
 	if (sc->pcie_slot_sta & PCIEM_SLOT_STA_CC)
@@ -1234,7 +1253,7 @@ pcib_pcie_cc_timeout(void *arg)
 	sta = pcie_read_config(dev, PCIER_SLOT_STA, 2);
 	if (!(sta & PCIEM_SLOT_STA_CC)) {
 		device_printf(dev,
-		    "Hotplug Command Timed Out - forcing detach\n");
+		    "HotPlug Command Timed Out - forcing detach\n");
 		sc->flags &= ~(PCIB_HOTPLUG_CMD_PENDING | PCIB_DETACH_PENDING);
 		sc->flags |= PCIB_DETACHING;
 		pcib_pcie_hotplug_update(sc, 0, 0, true);
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 575e42f..d40de0f 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -794,7 +794,7 @@ pmap_init(void)
 	 * include at least one feature that is only supported by older Intel
 	 * or newer AMD processors.
 	 */
-	if (vm_guest == VM_GUEST_VM && (cpu_feature & CPUID_SS) == 0 &&
+	if (vm_guest != VM_GUEST_NO && (cpu_feature & CPUID_SS) == 0 &&
 	    (cpu_feature2 & (CPUID2_SSSE3 | CPUID2_SSE41 | CPUID2_AESNI |
 	    CPUID2_AVX | CPUID2_XSAVE)) == 0 && (amd_feature2 & (AMDID2_XOP |
 	    AMDID2_FMA4)) == 0)
diff --git a/sys/kern/capabilities.conf b/sys/kern/capabilities.conf
index d0ea97c..24bac6b 100644
--- a/sys/kern/capabilities.conf
+++ b/sys/kern/capabilities.conf
@@ -64,9 +64,7 @@ __sysctl
 ##
 ## XXRW: Need to check this very carefully.
 ##
-_umtx_lock
 _umtx_op
-_umtx_unlock
 
 ##
 ## Allow process termination using abort2(2).
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index c7447fb..b9e41af 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -736,6 +736,7 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *
 	if (fr->fr_flags & RFPPWAIT) {
 		td->td_pflags |= TDP_RFPPWAIT;
 		td->td_rfppwait_p = p2;
+		td->td_dbgflags |= TDB_VFORK;
 	}
 	PROC_UNLOCK(p2);
 
@@ -1074,7 +1075,7 @@ fork_return(struct thread *td, struct trapframe *frame)
 			 * parent's children, do it now.
 			 */
 			dbg = p->p_pptr->p_pptr;
-			proc_set_traced(p);
+			proc_set_traced(p, true);
 			CTR2(KTR_PTRACE,
 		    "fork_return: attaching to new child pid %d: oppid %d",
 			    p->p_pid, p->p_oppid);
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 7af14a0..5a1b05e 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -2537,7 +2537,7 @@ ptracestop(struct thread *td, int sig)
 		 * a chance to report itself upon the next iteration.
 		 */
 		if ((td->td_dbgflags & TDB_FSTP) != 0 ||
-		    ((p->p_flag & P2_PTRACE_FSTP) == 0 &&
+		    ((p->p_flag2 & P2_PTRACE_FSTP) == 0 &&
 		    p->p_xthread == NULL)) {
 			p->p_xsig = sig;
 			p->p_xthread = td;
diff --git a/sys/kern/subr_gtaskqueue.c b/sys/kern/subr_gtaskqueue.c
new file mode 100644
index 0000000..2d655bd
--- /dev/null
+++ b/sys/kern/subr_gtaskqueue.c
@@ -0,0 +1,864 @@
+/*-
+ * Copyright (c) 2000 Doug Rabson
+ * Copyright (c) 2014 Jeff Roberson
+ * Copyright (c) 2016 Matthew Macy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/cpuset.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/libkern.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/gtaskqueue.h>
+#include <sys/unistd.h>
+#include <machine/stdarg.h>
+
+static MALLOC_DEFINE(M_GTASKQUEUE, "taskqueue", "Task Queues");
+static void	gtaskqueue_thread_enqueue(void *);
+static void	gtaskqueue_thread_loop(void *arg);
+
+
+struct gtaskqueue_busy {
+	struct gtask	*tb_running;
+	TAILQ_ENTRY(gtaskqueue_busy) tb_link;
+};
+
+static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
+
+struct gtaskqueue {
+	STAILQ_HEAD(, gtask)	tq_queue;
+	gtaskqueue_enqueue_fn	tq_enqueue;
+	void			*tq_context;
+	char			*tq_name;
+	TAILQ_HEAD(, gtaskqueue_busy) tq_active;
+	struct mtx		tq_mutex;
+	struct thread		**tq_threads;
+	int			tq_tcount;
+	int			tq_spin;
+	int			tq_flags;
+	int			tq_callouts;
+	taskqueue_callback_fn	tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
+	void			*tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
+};
+
+#define	TQ_FLAGS_ACTIVE		(1 << 0)
+#define	TQ_FLAGS_BLOCKED	(1 << 1)
+#define	TQ_FLAGS_UNLOCKED_ENQUEUE	(1 << 2)
+
+#define	DT_CALLOUT_ARMED	(1 << 0)
+
+#define	TQ_LOCK(tq)							\
+	do {								\
+		if ((tq)->tq_spin)					\
+			mtx_lock_spin(&(tq)->tq_mutex);			\
+		else							\
+			mtx_lock(&(tq)->tq_mutex);			\
+	} while (0)
+#define	TQ_ASSERT_LOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_OWNED)
+
+#define	TQ_UNLOCK(tq)							\
+	do {								\
+		if ((tq)->tq_spin)					\
+			mtx_unlock_spin(&(tq)->tq_mutex);		\
+		else							\
+			mtx_unlock(&(tq)->tq_mutex);			\
+	} while (0)
+#define	TQ_ASSERT_UNLOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
+
+static __inline int
+TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
+    int t)
+{
+	if (tq->tq_spin)
+		return (msleep_spin(p, m, wm, t));
+	return (msleep(p, m, pri, wm, t));
+}
+
+static struct gtaskqueue *
+_gtaskqueue_create(const char *name, int mflags,
+		 taskqueue_enqueue_fn enqueue, void *context,
+		 int mtxflags, const char *mtxname __unused)
+{
+	struct gtaskqueue *queue;
+	char *tq_name;
+
+	tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
+	if (!tq_name)
+		return (NULL);
+
+	snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
+
+	queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
+	if (!queue)
+		return (NULL);
+
+	STAILQ_INIT(&queue->tq_queue);
+	TAILQ_INIT(&queue->tq_active);
+	queue->tq_enqueue = enqueue;
+	queue->tq_context = context;
+	queue->tq_name = tq_name;
+	queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
+	queue->tq_flags |= TQ_FLAGS_ACTIVE;
+	if (enqueue == gtaskqueue_thread_enqueue)
+		queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
+	mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
+
+	return (queue);
+}
+
+
+/*
+ * Signal a taskqueue thread to terminate.
+ */
+static void
+gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
+{
+
+	while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
+		wakeup(tq);
+		TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
+	}
+}
+
+static void
+gtaskqueue_free(struct gtaskqueue *queue)
+{
+
+	TQ_LOCK(queue);
+	queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
+	gtaskqueue_terminate(queue->tq_threads, queue);
+	KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
+	KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
+	mtx_destroy(&queue->tq_mutex);
+	free(queue->tq_threads, M_GTASKQUEUE);
+	free(queue->tq_name, M_GTASKQUEUE);
+	free(queue, M_GTASKQUEUE);
+}
+
+int
+grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
+{
+	TQ_LOCK(queue);
+	if (gtask->ta_flags & TASK_ENQUEUED) {
+		TQ_UNLOCK(queue);
+		return (0);
+	}
+	STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
+	gtask->ta_flags |= TASK_ENQUEUED;
+	TQ_UNLOCK(queue);
+	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
+		queue->tq_enqueue(queue->tq_context);
+	return (0);
+}
+
+static void
+gtaskqueue_task_nop_fn(void *context)
+{
+}
+
+/*
+ * Block until all currently queued tasks in this taskqueue
+ * have begun execution.  Tasks queued during execution of
+ * this function are ignored.
+ */
+static void
+gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
+{
+	struct gtask t_barrier;
+
+	if (STAILQ_EMPTY(&queue->tq_queue))
+		return;
+
+	/*
+	 * Enqueue our barrier after all current tasks, but with
+	 * the highest priority so that newly queued tasks cannot
+	 * pass it.  Because of the high priority, we can not use
+	 * taskqueue_enqueue_locked directly (which drops the lock
+	 * anyway) so just insert it at tail while we have the
+	 * queue lock.
+	 */
+	GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
+	STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
+	t_barrier.ta_flags |= TASK_ENQUEUED;
+
+	/*
+	 * Once the barrier has executed, all previously queued tasks
+	 * have completed or are currently executing.
+	 */
+	while (t_barrier.ta_flags & TASK_ENQUEUED)
+		TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
+}
+
+/*
+ * Block until all currently executing tasks for this taskqueue
+ * complete.  Tasks that begin execution during the execution
+ * of this function are ignored.
+ */
+static void
+gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
+{
+	struct gtaskqueue_busy tb_marker, *tb_first;
+
+	if (TAILQ_EMPTY(&queue->tq_active))
+		return;
+
+	/* Block taskq_terminate().*/
+	queue->tq_callouts++;
+
+	/*
+	 * Wait for all currently executing taskqueue threads
+	 * to go idle.
+	 */
+	tb_marker.tb_running = TB_DRAIN_WAITER;
+	TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
+	while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
+		TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
+	TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
+
+	/*
+	 * Wakeup any other drain waiter that happened to queue up
+	 * without any intervening active thread.
+	 */
+	tb_first = TAILQ_FIRST(&queue->tq_active);
+	if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
+		wakeup(tb_first);
+
+	/* Release taskqueue_terminate(). */
+	queue->tq_callouts--;
+	if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
+		wakeup_one(queue->tq_threads);
+}
+
+void
+gtaskqueue_block(struct gtaskqueue *queue)
+{
+
+	TQ_LOCK(queue);
+	queue->tq_flags |= TQ_FLAGS_BLOCKED;
+	TQ_UNLOCK(queue);
+}
+
+void
+gtaskqueue_unblock(struct gtaskqueue *queue)
+{
+
+	TQ_LOCK(queue);
+	queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
+	if (!STAILQ_EMPTY(&queue->tq_queue))
+		queue->tq_enqueue(queue->tq_context);
+	TQ_UNLOCK(queue);
+}
+
+static void
+gtaskqueue_run_locked(struct gtaskqueue *queue)
+{
+	struct gtaskqueue_busy tb;
+	struct gtaskqueue_busy *tb_first;
+	struct gtask *gtask;
+
+	KASSERT(queue != NULL, ("tq is NULL"));
+	TQ_ASSERT_LOCKED(queue);
+	tb.tb_running = NULL;
+
+	while (STAILQ_FIRST(&queue->tq_queue)) {
+		TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
+
+		/*
+		 * Carefully remove the first task from the queue and
+		 * clear its TASK_ENQUEUED flag
+		 */
+		gtask = STAILQ_FIRST(&queue->tq_queue);
+		KASSERT(gtask != NULL, ("task is NULL"));
+		STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
+		gtask->ta_flags &= ~TASK_ENQUEUED;
+		tb.tb_running = gtask;
+		TQ_UNLOCK(queue);
+
+		KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
+		gtask->ta_func(gtask->ta_context);
+
+		TQ_LOCK(queue);
+		tb.tb_running = NULL;
+		wakeup(gtask);
+
+		TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
+		tb_first = TAILQ_FIRST(&queue->tq_active);
+		if (tb_first != NULL &&
+		    tb_first->tb_running == TB_DRAIN_WAITER)
+			wakeup(tb_first);
+	}
+}
+
+static int
+task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
+{
+	struct gtaskqueue_busy *tb;
+
+	TQ_ASSERT_LOCKED(queue);
+	TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
+		if (tb->tb_running == gtask)
+			return (1);
+	}
+	return (0);
+}
+
+static int
+gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
+{
+
+	if (gtask->ta_flags & TASK_ENQUEUED)
+		STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
+	gtask->ta_flags &= ~TASK_ENQUEUED;
+	return (task_is_running(queue, gtask) ? EBUSY : 0);
+}
+
+int
+gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
+{
+	int error;
+
+	TQ_LOCK(queue);
+	error = gtaskqueue_cancel_locked(queue, gtask);
+	TQ_UNLOCK(queue);
+
+	return (error);
+}
+
+void
+gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
+{
+
+	if (!queue->tq_spin)
+		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
+
+	TQ_LOCK(queue);
+	while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
+		TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
+	TQ_UNLOCK(queue);
+}
+
+void
+gtaskqueue_drain_all(struct gtaskqueue *queue)
+{
+
+	if (!queue->tq_spin)
+		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
+
+	TQ_LOCK(queue);
+	gtaskqueue_drain_tq_queue(queue);
+	gtaskqueue_drain_tq_active(queue);
+	TQ_UNLOCK(queue);
+}
+
+static int
+_gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
+    cpuset_t *mask, const char *name, va_list ap)
+{
+	char ktname[MAXCOMLEN + 1];
+	struct thread *td;
+	struct gtaskqueue *tq;
+	int i, error;
+
+	if (count <= 0)
+		return (EINVAL);
+
+	vsnprintf(ktname, sizeof(ktname), name, ap);
+	tq = *tqp;
+
+	tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
+	    M_NOWAIT | M_ZERO);
+	if (tq->tq_threads == NULL) {
+		printf("%s: no memory for %s threads\n", __func__, ktname);
+		return (ENOMEM);
+	}
+
+	for (i = 0; i < count; i++) {
+		if (count == 1)
+			error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
+			    &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
+		else
+			error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
+			    &tq->tq_threads[i], RFSTOPPED, 0,
+			    "%s_%d", ktname, i);
+		if (error) {
+			/* should be ok to continue, taskqueue_free will dtrt */
+			printf("%s: kthread_add(%s): error %d", __func__,
+			    ktname, error);
+			tq->tq_threads[i] = NULL;		/* paranoid */
+		} else
+			tq->tq_tcount++;
+	}
+	for (i = 0; i < count; i++) {
+		if (tq->tq_threads[i] == NULL)
+			continue;
+		td = tq->tq_threads[i];
+		if (mask) {
+			error = cpuset_setthread(td->td_tid, mask);
+			/*
+			 * Failing to pin is rarely an actual fatal error;
+			 * it'll just affect performance.
+			 */
+			if (error)
+				printf("%s: curthread=%llu: can't pin; "
+				    "error=%d\n",
+				    __func__,
+				    (unsigned long long) td->td_tid,
+				    error);
+		}
+		thread_lock(td);
+		sched_prio(td, pri);
+		sched_add(td, SRQ_BORING);
+		thread_unlock(td);
+	}
+
+	return (0);
+}
+
+static int
+gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
+    const char *name, ...)
+{
+	va_list ap;
+	int error;
+
+	va_start(ap, name);
+	error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
+	va_end(ap);
+	return (error);
+}
+
+static inline void
+gtaskqueue_run_callback(struct gtaskqueue *tq,
+    enum taskqueue_callback_type cb_type)
+{
+	taskqueue_callback_fn tq_callback;
+
+	TQ_ASSERT_UNLOCKED(tq);
+	tq_callback = tq->tq_callbacks[cb_type];
+	if (tq_callback != NULL)
+		tq_callback(tq->tq_cb_contexts[cb_type]);
+}
+
+static void
+gtaskqueue_thread_loop(void *arg)
+{
+	struct gtaskqueue **tqp, *tq;
+
+	tqp = arg;
+	tq = *tqp;
+	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
+	TQ_LOCK(tq);
+	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
+		/* XXX ? */
+		gtaskqueue_run_locked(tq);
+		/*
+		 * Because taskqueue_run() can drop tq_mutex, we need to
+		 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
+		 * meantime, which means we missed a wakeup.
+		 */
+		if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
+			break;
+		TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
+	}
+	gtaskqueue_run_locked(tq);
+	/*
+	 * This thread is on its way out, so just drop the lock temporarily
+	 * in order to call the shutdown callback.  This allows the callback
+	 * to look at the taskqueue, even just before it dies.
+	 */
+	TQ_UNLOCK(tq);
+	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
+	TQ_LOCK(tq);
+
+	/* rendezvous with thread that asked us to terminate */
+	tq->tq_tcount--;
+	wakeup_one(tq->tq_threads);
+	TQ_UNLOCK(tq);
+	kthread_exit();
+}
+
+static void
+gtaskqueue_thread_enqueue(void *context)
+{
+	struct gtaskqueue **tqp, *tq;
+
+	tqp = context;
+	tq = *tqp;
+	wakeup_one(tq);
+}
+
+
+static struct gtaskqueue *
+gtaskqueue_create_fast(const char *name, int mflags,
+		 taskqueue_enqueue_fn enqueue, void *context)
+{
+	return _gtaskqueue_create(name, mflags, enqueue, context,
+			MTX_SPIN, "fast_taskqueue");
+}
+
+
+struct taskqgroup_cpu {
+	LIST_HEAD(, grouptask)	tgc_tasks;
+	struct gtaskqueue	*tgc_taskq;
+	int	tgc_cnt;
+	int	tgc_cpu;
+};
+
+struct taskqgroup {
+	struct taskqgroup_cpu tqg_queue[MAXCPU];
+	struct mtx	tqg_lock;
+	char *		tqg_name;
+	int		tqg_adjusting;
+	int		tqg_stride;
+	int		tqg_cnt;
+};
+
+struct taskq_bind_task {
+	struct gtask bt_task;
+	int	bt_cpuid;
+};
+
+static void
+taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx)
+{
+	struct taskqgroup_cpu *qcpu;
+
+	qcpu = &qgroup->tqg_queue[idx];
+	LIST_INIT(&qcpu->tgc_tasks);
+	qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
+	    taskqueue_thread_enqueue, &qcpu->tgc_taskq);
+	gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
+	    "%s_%d", qgroup->tqg_name, idx);
+	qcpu->tgc_cpu = idx * qgroup->tqg_stride;
+}
+
+static void
+taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
+{
+
+	gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
+}
+
+/*
+ * Find the taskq with least # of tasks that doesn't currently have any
+ * other queues from the uniq identifier.
+ */
+static int
+taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
+{
+	struct grouptask *n;
+	int i, idx, mincnt;
+	int strict;
+
+	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
+	if (qgroup->tqg_cnt == 0)
+		return (0);
+	idx = -1;
+	mincnt = INT_MAX;
+	/*
+	 * Two passes;  First scan for a queue with the least tasks that
+	 * does not already service this uniq id.  If that fails simply find
+	 * the queue with the least total tasks;
+	 */
+	for (strict = 1; mincnt == INT_MAX; strict = 0) {
+		for (i = 0; i < qgroup->tqg_cnt; i++) {
+			if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
+				continue;
+			if (strict) {
+				LIST_FOREACH(n,
+				    &qgroup->tqg_queue[i].tgc_tasks, gt_list)
+					if (n->gt_uniq == uniq)
+						break;
+				if (n != NULL)
+					continue;
+			}
+			mincnt = qgroup->tqg_queue[i].tgc_cnt;
+			idx = i;
+		}
+	}
+	if (idx == -1)
+		panic("taskqgroup_find: Failed to pick a qid.");
+
+	return (idx);
+}
+
+void
+taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
+    void *uniq, int irq, char *name)
+{
+	cpuset_t mask;
+	int qid;
+
+	gtask->gt_uniq = uniq;
+	gtask->gt_name = name;
+	gtask->gt_irq = irq;
+	gtask->gt_cpu = -1;
+	mtx_lock(&qgroup->tqg_lock);
+	qid = taskqgroup_find(qgroup, uniq);
+	qgroup->tqg_queue[qid].tgc_cnt++;
+	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
+	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
+	if (irq != -1 && smp_started) {
+		CPU_ZERO(&mask);
+		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
+		mtx_unlock(&qgroup->tqg_lock);
+		intr_setaffinity(irq, &mask);
+	} else
+		mtx_unlock(&qgroup->tqg_lock);
+}
+
+int
+taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
+	void *uniq, int cpu, int irq, char *name)
+{
+	cpuset_t mask;
+	int i, qid;
+
+	qid = -1;
+	gtask->gt_uniq = uniq;
+	gtask->gt_name = name;
+	gtask->gt_irq = irq;
+	gtask->gt_cpu = cpu;
+	mtx_lock(&qgroup->tqg_lock);
+	if (smp_started) {
+		for (i = 0; i < qgroup->tqg_cnt; i++)
+			if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
+				qid = i;
+				break;
+			}
+		if (qid == -1) {
+			mtx_unlock(&qgroup->tqg_lock);
+			return (EINVAL);
+		}
+	} else
+		qid = 0;
+	qgroup->tqg_queue[qid].tgc_cnt++;
+	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
+	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
+	if (irq != -1 && smp_started) {
+		CPU_ZERO(&mask);
+		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
+		mtx_unlock(&qgroup->tqg_lock);
+		intr_setaffinity(irq, &mask);
+	} else
+		mtx_unlock(&qgroup->tqg_lock);
+	return (0);
+}
+
+void
+taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
+{
+	int i;
+
+	mtx_lock(&qgroup->tqg_lock);
+	for (i = 0; i < qgroup->tqg_cnt; i++)
+		if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
+			break;
+	if (i == qgroup->tqg_cnt)
+		panic("taskqgroup_detach: task not in group\n");
+	qgroup->tqg_queue[i].tgc_cnt--;
+	LIST_REMOVE(gtask, gt_list);
+	mtx_unlock(&qgroup->tqg_lock);
+	gtask->gt_taskqueue = NULL;
+}
+
+static void
+taskqgroup_binder(void *ctx)
+{
+	struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
+	cpuset_t mask;
+	int error;
+
+	CPU_ZERO(&mask);
+	CPU_SET(gtask->bt_cpuid, &mask);
+	error = cpuset_setthread(curthread->td_tid, &mask);
+	thread_lock(curthread);
+	sched_bind(curthread, gtask->bt_cpuid);
+	thread_unlock(curthread);
+
+	if (error)
+		printf("taskqgroup_binder: setaffinity failed: %d\n",
+		    error);
+	free(gtask, M_DEVBUF);
+}
+
+static void
+taskqgroup_bind(struct taskqgroup *qgroup)
+{
+	struct taskq_bind_task *gtask;
+	int i;
+
+	/*
+	 * Bind taskqueue threads to specific CPUs, if they have been assigned
+	 * one.
+	 */
+	for (i = 0; i < qgroup->tqg_cnt; i++) {
+		gtask = malloc(sizeof (*gtask), M_DEVBUF, M_NOWAIT);
+		GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
+		gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
+		grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
+		    &gtask->bt_task);
+	}
+}
+
+static int
+_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
+{
+	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
+	cpuset_t mask;
+	struct grouptask *gtask;
+	int i, old_cnt, qid;
+
+	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
+
+	if (cnt < 1 || cnt * stride > mp_ncpus || !smp_started) {
+		printf("taskqgroup_adjust failed cnt: %d stride: %d mp_ncpus: %d smp_started: %d\n",
+			   cnt, stride, mp_ncpus, smp_started);
+		return (EINVAL);
+	}
+	if (qgroup->tqg_adjusting) {
+		printf("taskqgroup_adjust failed: adjusting\n");
+		return (EBUSY);
+	}
+	qgroup->tqg_adjusting = 1;
+	old_cnt = qgroup->tqg_cnt;
+	mtx_unlock(&qgroup->tqg_lock);
+	/*
+	 * Set up queue for tasks added before boot.
+	 */
+	if (old_cnt == 0) {
+		LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
+		    grouptask, gt_list);
+		qgroup->tqg_queue[0].tgc_cnt = 0;
+	}
+
+	/*
+	 * If new taskq threads have been added.
+	 */
+	for (i = old_cnt; i < cnt; i++)
+		taskqgroup_cpu_create(qgroup, i);
+	mtx_lock(&qgroup->tqg_lock);
+	qgroup->tqg_cnt = cnt;
+	qgroup->tqg_stride = stride;
+
+	/*
+	 * Adjust drivers to use new taskqs.
+	 */
+	for (i = 0; i < old_cnt; i++) {
+		while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
+			LIST_REMOVE(gtask, gt_list);
+			qgroup->tqg_queue[i].tgc_cnt--;
+			LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
+		}
+	}
+
+	while ((gtask = LIST_FIRST(&gtask_head))) {
+		LIST_REMOVE(gtask, gt_list);
+		if (gtask->gt_cpu == -1)
+			qid = taskqgroup_find(qgroup, gtask->gt_uniq);
+		else {
+			for (i = 0; i < qgroup->tqg_cnt; i++)
+				if (qgroup->tqg_queue[i].tgc_cpu == gtask->gt_cpu) {
+					qid = i;
+					break;
+				}
+		}
+		qgroup->tqg_queue[qid].tgc_cnt++;
+		LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
+		    gt_list);
+		gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
+	}
+	/*
+	 * Set new CPU and IRQ affinity
+	 */
+	for (i = 0; i < cnt; i++) {
+		qgroup->tqg_queue[i].tgc_cpu = i * qgroup->tqg_stride;
+		CPU_ZERO(&mask);
+		CPU_SET(qgroup->tqg_queue[i].tgc_cpu, &mask);
+		LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list) {
+			if (gtask->gt_irq == -1)
+				continue;
+			intr_setaffinity(gtask->gt_irq, &mask);
+		}
+	}
+	mtx_unlock(&qgroup->tqg_lock);
+
+	/*
+	 * If taskq thread count has been reduced.
+	 */
+	for (i = cnt; i < old_cnt; i++)
+		taskqgroup_cpu_remove(qgroup, i);
+
+	mtx_lock(&qgroup->tqg_lock);
+	qgroup->tqg_adjusting = 0;
+
+	taskqgroup_bind(qgroup);
+
+	return (0);
+}
+
+int
+taskqgroup_adjust(struct taskqgroup *qgroup, int cpu, int stride)
+{
+	int error;
+
+	mtx_lock(&qgroup->tqg_lock);
+	error = _taskqgroup_adjust(qgroup, cpu, stride);
+	mtx_unlock(&qgroup->tqg_lock);
+
+	return (error);
+}
+
+struct taskqgroup *
+taskqgroup_create(char *name)
+{
+	struct taskqgroup *qgroup;
+
+	qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
+	mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
+	qgroup->tqg_name = name;
+	LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
+
+	return (qgroup);
+}
+
+void
+taskqgroup_destroy(struct taskqgroup *qgroup)
+{
+
+}
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
index 65fb3e7..e48072c 100644
--- a/sys/kern/subr_param.c
+++ b/sys/kern/subr_param.c
@@ -148,6 +148,7 @@ static const char *const vm_guest_sysctl_names[] = {
 	"xen",
 	"hv",
 	"vmware",
+	"kvm",
 	NULL
 };
 CTASSERT(nitems(vm_guest_sysctl_names) - 1 == VM_LAST);
diff --git a/sys/kern/subr_syscall.c b/sys/kern/subr_syscall.c
index aad2a1e..3e2a3b3 100644
--- a/sys/kern/subr_syscall.c
+++ b/sys/kern/subr_syscall.c
@@ -242,5 +242,13 @@ again:
 			cv_timedwait(&p2->p_pwait, &p2->p_mtx, hz);
 		}
 		PROC_UNLOCK(p2);
+
+		if (td->td_dbgflags & TDB_VFORK) {
+			PROC_LOCK(p);
+			if (p->p_ptevents & PTRACE_VFORK)
+				ptracestop(td, SIGTRAP);
+			td->td_dbgflags &= ~TDB_VFORK;
+			PROC_UNLOCK(p);
+		}
 	}
 }
diff --git a/sys/kern/subr_taskqueue.c b/sys/kern/subr_taskqueue.c
index 12124b8..5a20148 100644
--- a/sys/kern/subr_taskqueue.c
+++ b/sys/kern/subr_taskqueue.c
@@ -261,22 +261,6 @@ taskqueue_enqueue_locked(struct taskqueue *queue, struct task *task)
 }
 
 int
-grouptaskqueue_enqueue(struct taskqueue *queue, struct task *task)
-{
-	TQ_LOCK(queue);
-	if (task->ta_pending) {
-		TQ_UNLOCK(queue);
-		return (0);
-	}
-	STAILQ_INSERT_TAIL(&queue->tq_queue, task, ta_link);
-	task->ta_pending = 1;
-	TQ_UNLOCK(queue);
-	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
-		queue->tq_enqueue(queue->tq_context);
-	return (0);
-}
-
-int
 taskqueue_enqueue(struct taskqueue *queue, struct task *task)
 {
 	int res;
@@ -806,347 +790,3 @@ taskqueue_member(struct taskqueue *queue, struct thread *td)
 	}
 	return (ret);
 }
-
-struct taskqgroup_cpu {
-	LIST_HEAD(, grouptask)	tgc_tasks;
-	struct taskqueue	*tgc_taskq;
-	int	tgc_cnt;
-	int	tgc_cpu;
-};
-
-struct taskqgroup {
-	struct taskqgroup_cpu tqg_queue[MAXCPU];
-	struct mtx	tqg_lock;
-	char *		tqg_name;
-	int		tqg_adjusting;
-	int		tqg_stride;
-	int		tqg_cnt;
-};
-
-struct taskq_bind_task {
-	struct task bt_task;
-	int	bt_cpuid;
-};
-
-static void
-taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx)
-{
-	struct taskqgroup_cpu *qcpu;
-	int i, j;
-
-	qcpu = &qgroup->tqg_queue[idx];
-	LIST_INIT(&qcpu->tgc_tasks);
-	qcpu->tgc_taskq = taskqueue_create_fast(NULL, M_WAITOK,
-	    taskqueue_thread_enqueue, &qcpu->tgc_taskq);
-	taskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
-	    "%s_%d", qgroup->tqg_name, idx);
-
-	for (i = CPU_FIRST(), j = 0; j < idx * qgroup->tqg_stride;
-	    j++, i = CPU_NEXT(i)) {
-		/*
-		 * Wait: evaluate the idx * qgroup->tqg_stride'th CPU,
-		 * potentially wrapping the actual count
-		 */
-	}
-	qcpu->tgc_cpu = i;
-}
-
-static void
-taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
-{
-
-	taskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
-}
-
-/*
- * Find the taskq with least # of tasks that doesn't currently have any
- * other queues from the uniq identifier.
- */
-static int
-taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
-{
-	struct grouptask *n;
-	int i, idx, mincnt;
-	int strict;
-
-	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
-	if (qgroup->tqg_cnt == 0)
-		return (0);
-	idx = -1;
-	mincnt = INT_MAX;
-	/*
-	 * Two passes;  First scan for a queue with the least tasks that
-	 * does not already service this uniq id.  If that fails simply find
-	 * the queue with the least total tasks;
-	 */
-	for (strict = 1; mincnt == INT_MAX; strict = 0) {
-		for (i = 0; i < qgroup->tqg_cnt; i++) {
-			if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
-				continue;
-			if (strict) {
-				LIST_FOREACH(n,
-				    &qgroup->tqg_queue[i].tgc_tasks, gt_list)
-					if (n->gt_uniq == uniq)
-						break;
-				if (n != NULL)
-					continue;
-			}
-			mincnt = qgroup->tqg_queue[i].tgc_cnt;
-			idx = i;
-		}
-	}
-	if (idx == -1)
-		panic("taskqgroup_find: Failed to pick a qid.");
-
-	return (idx);
-}
-
-void
-taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
-    void *uniq, int irq, char *name)
-{
-	cpuset_t mask;
-	int qid;
-
-	gtask->gt_uniq = uniq;
-	gtask->gt_name = name;
-	gtask->gt_irq = irq;
-	gtask->gt_cpu = -1;
-	mtx_lock(&qgroup->tqg_lock);
-	qid = taskqgroup_find(qgroup, uniq);
-	qgroup->tqg_queue[qid].tgc_cnt++;
-	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
-	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
-	if (irq != -1 && smp_started) {
-		CPU_ZERO(&mask);
-		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
-		mtx_unlock(&qgroup->tqg_lock);
-		intr_setaffinity(irq, &mask);
-	} else
-		mtx_unlock(&qgroup->tqg_lock);
-}
-
-int
-taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
-	void *uniq, int cpu, int irq, char *name)
-{
-	cpuset_t mask;
-	int i, qid;
-
-	qid = -1;
-	gtask->gt_uniq = uniq;
-	gtask->gt_name = name;
-	gtask->gt_irq = irq;
-	gtask->gt_cpu = cpu;
-	mtx_lock(&qgroup->tqg_lock);
-	if (smp_started) {
-		for (i = 0; i < qgroup->tqg_cnt; i++)
-			if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
-				qid = i;
-				break;
-			}
-		if (qid == -1) {
-			mtx_unlock(&qgroup->tqg_lock);
-			return (EINVAL);
-		}
-	} else
-		qid = 0;
-	qgroup->tqg_queue[qid].tgc_cnt++;
-	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
-	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
-	if (irq != -1 && smp_started) {
-		CPU_ZERO(&mask);
-		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
-		mtx_unlock(&qgroup->tqg_lock);
-		intr_setaffinity(irq, &mask);
-	} else
-		mtx_unlock(&qgroup->tqg_lock);
-	return (0);
-}
-
-void
-taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
-{
-	int i;
-
-	mtx_lock(&qgroup->tqg_lock);
-	for (i = 0; i < qgroup->tqg_cnt; i++)
-		if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
-			break;
-	if (i == qgroup->tqg_cnt)
-		panic("taskqgroup_detach: task not in group\n");
-	qgroup->tqg_queue[i].tgc_cnt--;
-	LIST_REMOVE(gtask, gt_list);
-	mtx_unlock(&qgroup->tqg_lock);
-	gtask->gt_taskqueue = NULL;
-}
-
-static void
-taskqgroup_binder(void *ctx, int pending)
-{
-	struct taskq_bind_task *task = (struct taskq_bind_task *)ctx;
-	cpuset_t mask;
-	int error;
-
-	CPU_ZERO(&mask);
-	CPU_SET(task->bt_cpuid, &mask);
-	error = cpuset_setthread(curthread->td_tid, &mask);
-	thread_lock(curthread);
-	sched_bind(curthread, task->bt_cpuid);
-	thread_unlock(curthread);
-
-	if (error)
-		printf("taskqgroup_binder: setaffinity failed: %d\n",
-		    error);
-	free(task, M_DEVBUF);
-}
-
-static void
-taskqgroup_bind(struct taskqgroup *qgroup)
-{
-	struct taskq_bind_task *task;
-	int i;
-
-	/*
-	 * Bind taskqueue threads to specific CPUs, if they have been assigned
-	 * one.
-	 */
-	for (i = 0; i < qgroup->tqg_cnt; i++) {
-		task = malloc(sizeof (*task), M_DEVBUF, M_NOWAIT);
-		TASK_INIT(&task->bt_task, 0, taskqgroup_binder, task);
-		task->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
-		taskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
-		    &task->bt_task);
-	}
-}
-
-static int
-_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
-{
-	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
-	cpuset_t mask;
-	struct grouptask *gtask;
-	int i, k, old_cnt, qid, cpu;
-
-	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
-
-	if (cnt < 1 || cnt * stride > mp_ncpus || !smp_started) {
-		printf("taskqgroup_adjust failed cnt: %d stride: %d "
-		    "mp_ncpus: %d smp_started: %d\n", cnt, stride, mp_ncpus,
-		    smp_started);
-		return (EINVAL);
-	}
-	if (qgroup->tqg_adjusting) {
-		printf("taskqgroup_adjust failed: adjusting\n");
-		return (EBUSY);
-	}
-	qgroup->tqg_adjusting = 1;
-	old_cnt = qgroup->tqg_cnt;
-	mtx_unlock(&qgroup->tqg_lock);
-	/*
-	 * Set up queue for tasks added before boot.
-	 */
-	if (old_cnt == 0) {
-		LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
-		    grouptask, gt_list);
-		qgroup->tqg_queue[0].tgc_cnt = 0;
-	}
-
-	/*
-	 * If new taskq threads have been added.
-	 */
-	for (i = old_cnt; i < cnt; i++)
-		taskqgroup_cpu_create(qgroup, i);
-	mtx_lock(&qgroup->tqg_lock);
-	qgroup->tqg_cnt = cnt;
-	qgroup->tqg_stride = stride;
-
-	/*
-	 * Adjust drivers to use new taskqs.
-	 */
-	for (i = 0; i < old_cnt; i++) {
-		while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
-			LIST_REMOVE(gtask, gt_list);
-			qgroup->tqg_queue[i].tgc_cnt--;
-			LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
-		}
-	}
-
-	while ((gtask = LIST_FIRST(&gtask_head))) {
-		LIST_REMOVE(gtask, gt_list);
-		if (gtask->gt_cpu == -1)
-			qid = taskqgroup_find(qgroup, gtask->gt_uniq);
-		else {
-			for (i = 0; i < qgroup->tqg_cnt; i++)
-				if (qgroup->tqg_queue[i].tgc_cpu == gtask->gt_cpu) {
-					qid = i;
-					break;
-				}
-		}
-		qgroup->tqg_queue[qid].tgc_cnt++;
-		LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
-		    gt_list);
-		gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
-	}
-	/*
-	 * Set new CPU and IRQ affinity
-	 */
-	cpu = CPU_FIRST();
-	for (i = 0; i < cnt; i++) {
-		qgroup->tqg_queue[i].tgc_cpu = cpu;
-		for (k = 0; k < qgroup->tqg_stride; k++)
-			cpu = CPU_NEXT(cpu);
-		CPU_ZERO(&mask);
-		CPU_SET(qgroup->tqg_queue[i].tgc_cpu, &mask);
-		LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list) {
-			if (gtask->gt_irq == -1)
-				continue;
-			intr_setaffinity(gtask->gt_irq, &mask);
-		}
-	}
-	mtx_unlock(&qgroup->tqg_lock);
-
-	/*
-	 * If taskq thread count has been reduced.
-	 */
-	for (i = cnt; i < old_cnt; i++)
-		taskqgroup_cpu_remove(qgroup, i);
-
-	mtx_lock(&qgroup->tqg_lock);
-	qgroup->tqg_adjusting = 0;
-
-	taskqgroup_bind(qgroup);
-
-	return (0);
-}
-
-int
-taskqgroup_adjust(struct taskqgroup *qgroup, int cpu, int stride)
-{
-	int error;
-
-	mtx_lock(&qgroup->tqg_lock);
-	error = _taskqgroup_adjust(qgroup, cpu, stride);
-	mtx_unlock(&qgroup->tqg_lock);
-
-	return (error);
-}
-
-struct taskqgroup *
-taskqgroup_create(char *name)
-{
-	struct taskqgroup *qgroup;
-
-	qgroup = malloc(sizeof(*qgroup), M_TASKQUEUE, M_WAITOK | M_ZERO);
-	mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
-	qgroup->tqg_name = name;
-	LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
-
-	return (qgroup);
-}
-
-void
-taskqgroup_destroy(struct taskqgroup *qgroup)
-{
-
-}
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index 751db15..69a3e4b 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -693,12 +693,13 @@ sys_ptrace(struct thread *td, struct ptrace_args *uap)
 #endif
 
 void
-proc_set_traced(struct proc *p)
+proc_set_traced(struct proc *p, bool stop)
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	p->p_flag |= P_TRACED;
-	p->p_flag2 |= P2_PTRACE_FSTP;
+	if (stop)
+		p->p_flag2 |= P2_PTRACE_FSTP;
 	p->p_ptevents = PTRACE_DEFAULT;
 	p->p_oppid = p->p_pptr->p_pid;
 }
@@ -910,7 +911,7 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 	switch (req) {
 	case PT_TRACE_ME:
 		/* set my trace flag and "owner" so it can read/write me */
-		proc_set_traced(p);
+		proc_set_traced(p, false);
 		if (p->p_flag & P_PPWAIT)
 			p->p_flag |= P_PPTRACE;
 		CTR1(KTR_PTRACE, "PT_TRACE_ME: pid %d", p->p_pid);
@@ -927,7 +928,7 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 		 * The old parent is remembered so we can put things back
 		 * on a "detach".
 		 */
-		proc_set_traced(p);
+		proc_set_traced(p, true);
 		if (p->p_pptr != td->td_proc) {
 			proc_reparent(p, td->td_proc);
 		}
@@ -1000,7 +1001,7 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 		}
 		tmp = *(int *)addr;
 		if ((tmp & ~(PTRACE_EXEC | PTRACE_SCE | PTRACE_SCX |
-		    PTRACE_FORK | PTRACE_LWP)) != 0) {
+		    PTRACE_FORK | PTRACE_LWP | PTRACE_VFORK)) != 0) {
 			error = EINVAL;
 			break;
 		}
@@ -1321,7 +1322,11 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 		if (td2->td_dbgflags & TDB_FORK) {
 			pl->pl_flags |= PL_FLAG_FORKED;
 			pl->pl_child_pid = td2->td_dbg_forked;
-		}
+			if (td2->td_dbgflags & TDB_VFORK)
+				pl->pl_flags |= PL_FLAG_VFORKED;
+		} else if ((td2->td_dbgflags & (TDB_SCX | TDB_VFORK)) ==
+		    TDB_VFORK)
+			pl->pl_flags |= PL_FLAG_VFORK_DONE;
 		if (td2->td_dbgflags & TDB_CHILD)
 			pl->pl_flags |= PL_FLAG_CHILD;
 		if (td2->td_dbgflags & TDB_BORN)
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index b48eea1..dce0e1a 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -1441,7 +1441,7 @@ static struct aiocb_ops aiocb_ops_osigevent = {
  */
 int
 aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
-	int type, struct aiocb_ops *ops)
+    int type, struct aiocb_ops *ops)
 {
 	struct proc *p = td->td_proc;
 	cap_rights_t rights;
@@ -2476,14 +2476,9 @@ static int
 kern_aio_fsync(struct thread *td, int op, struct aiocb *ujob,
     struct aiocb_ops *ops)
 {
-	struct proc *p = td->td_proc;
-	struct kaioinfo *ki;
 
 	if (op != O_SYNC) /* XXX lack of O_DSYNC */
 		return (EINVAL);
-	ki = p->p_aioinfo;
-	if (ki == NULL)
-		aio_init_aioinfo(p);
 	return (aio_aqueue(td, ujob, NULL, LIO_SYNC, ops));
 }
 
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index a7977bf..d9182f2 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -640,7 +640,6 @@ int
 vop_stdfsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
-		struct ucred *a_cred;
 		int a_waitfor;
 		struct thread *a_td;
 	} */ *ap;
diff --git a/sys/modules/ntb/Makefile b/sys/modules/ntb/Makefile
index a5169a0..3eaf751 100644
--- a/sys/modules/ntb/Makefile
+++ b/sys/modules/ntb/Makefile
@@ -1,5 +1,5 @@
 # $FreeBSD$
 
-SUBDIR=	ntb_hw if_ntb
+SUBDIR=	ntb ntb_hw ntb_transport if_ntb
 
 .include <bsd.subdir.mk>
diff --git a/sys/modules/ntb/ntb/Makefile b/sys/modules/ntb/ntb/Makefile
new file mode 100644
index 0000000..c3f647b
--- /dev/null
+++ b/sys/modules/ntb/ntb/Makefile
@@ -0,0 +1,9 @@
+# $FreeBSD$
+
+.PATH:  ${.CURDIR}/../../../dev/ntb
+
+KMOD    = ntb
+SRCS    = ntb.c ntb_if.c
+SRCS += device_if.h bus_if.h ntb_if.h
+
+.include <bsd.kmod.mk>
diff --git a/sys/modules/ntb/ntb_hw/Makefile b/sys/modules/ntb/ntb_hw/Makefile
index fc46b46..f3e7645 100644
--- a/sys/modules/ntb/ntb_hw/Makefile
+++ b/sys/modules/ntb/ntb_hw/Makefile
@@ -4,6 +4,6 @@
 
 KMOD    = ntb_hw
 SRCS    = ntb_hw.c
-SRCS += device_if.h bus_if.h pci_if.h
+SRCS += device_if.h bus_if.h pci_if.h ntb_if.h
 
 .include <bsd.kmod.mk>
diff --git a/sys/modules/ntb/ntb_transport/Makefile b/sys/modules/ntb/ntb_transport/Makefile
new file mode 100644
index 0000000..2a8fcbc
--- /dev/null
+++ b/sys/modules/ntb/ntb_transport/Makefile
@@ -0,0 +1,9 @@
+# $FreeBSD$
+
+.PATH:  ${.CURDIR}/../../../dev/ntb
+
+KMOD    = ntb_transport
+SRCS    = ntb_transport.c
+SRCS += device_if.h bus_if.h ntb_if.h
+
+.include <bsd.kmod.mk>
diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c
index 8109479..3b723c1 100644
--- a/sys/net/if_bridge.c
+++ b/sys/net/if_bridge.c
@@ -166,7 +166,8 @@ __FBSDID("$FreeBSD$");
 /*
  * List of capabilities to possibly mask on the member interface.
  */
-#define	BRIDGE_IFCAPS_MASK		(IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM)
+#define	BRIDGE_IFCAPS_MASK		(IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM|\
+					 IFCAP_TXCSUM_IPV6)
 
 /*
  * List of capabilities to strip
diff --git a/sys/net/if_epair.c b/sys/net/if_epair.c
index 86e8379..6ac65c6 100644
--- a/sys/net/if_epair.c
+++ b/sys/net/if_epair.c
@@ -807,9 +807,9 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	 * cache locality but we can at least allow parallelism.
 	 */
 	sca->cpuid =
-	    netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount());
+	    netisr_get_cpuid(sca->ifp->if_index);
 	scb->cpuid =
-	    netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount());
+	    netisr_get_cpuid(scb->ifp->if_index);
 
 	/* Initialise pseudo media types. */
 	ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status);
diff --git a/sys/net/ifdi_if.m b/sys/net/ifdi_if.m
index 60629e4..301a6ee 100644
--- a/sys/net/ifdi_if.m
+++ b/sys/net/ifdi_if.m
@@ -60,9 +60,10 @@ CODE {
 		return (0);
 	}
 
-	static void
+	static int
 	null_queue_intr_enable(if_ctx_t _ctx __unused, uint16_t _qid __unused)
 	{
+		return (ENOTSUP);
 	}
 
 	static void
@@ -194,7 +195,7 @@ METHOD void intr_disable {
 	if_ctx_t _ctx;
 };
 
-METHOD void queue_intr_enable {
+METHOD int queue_intr_enable {
 	if_ctx_t _ctx;
 	uint16_t _qid;
 } DEFAULT null_queue_intr_enable;
diff --git a/sys/net/iflib.c b/sys/net/iflib.c
index c11da16..0d54f2c 100644
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/taskqueue.h>
+#include <sys/limits.h>
 
 
 #include <net/if.h>
@@ -101,7 +102,6 @@ __FBSDID("$FreeBSD$");
  * Enable mbuf vectors for compressing long mbuf chains
  */
 
-
 /*
  * NB:
  * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead
@@ -181,8 +181,10 @@ struct iflib_ctx {
 	struct sysctl_oid *ifc_sysctl_node;
 	uint16_t ifc_sysctl_ntxqs;
 	uint16_t ifc_sysctl_nrxqs;
-	uint16_t ifc_sysctl_ntxds;
-	uint16_t ifc_sysctl_nrxds;
+	uint16_t ifc_sysctl_qs_eq_override;
+
+	uint16_t ifc_sysctl_ntxds[8];
+	uint16_t ifc_sysctl_nrxds[8];
 	struct if_txrx ifc_txrx;
 #define isc_txd_encap  ifc_txrx.ift_txd_encap
 #define isc_txd_flush  ifc_txrx.ift_txd_flush
@@ -294,10 +296,11 @@ typedef struct iflib_sw_tx_desc_array {
 
 #define IFLIB_RESTART_BUDGET		8
 
-#define	IFC_LEGACY		0x1
-#define	IFC_QFLUSH		0x2
-#define	IFC_MULTISEG		0x4
-#define	IFC_DMAR		0x8
+#define	IFC_LEGACY		0x01
+#define	IFC_QFLUSH		0x02
+#define	IFC_MULTISEG		0x04
+#define	IFC_DMAR		0x08
+#define	IFC_SC_ALLOCATED	0x10
 
 #define CSUM_OFFLOAD		(CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \
 				 CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \
@@ -311,6 +314,7 @@ struct iflib_txq {
 	uint8_t		ift_db_pending;
 	uint8_t		ift_db_pending_queued;
 	uint8_t		ift_npending;
+	uint8_t		ift_br_offset;
 	/* implicit pad */
 	uint64_t	ift_processed;
 	uint64_t	ift_cleaned;
@@ -414,6 +418,7 @@ struct iflib_rxq {
 	uint16_t	ifr_cq_cidx;
 	uint16_t	ifr_cq_pidx;
 	uint8_t		ifr_cq_gen;
+	uint8_t		ifr_fl_offset;
 
 	if_ctx_t	ifr_ctx;
 	iflib_fl_t	ifr_fl;
@@ -604,7 +609,7 @@ static void iflib_tx_structures_free(if_ctx_t ctx);
 static void iflib_rx_structures_free(if_ctx_t ctx);
 static int iflib_queues_alloc(if_ctx_t ctx);
 static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq);
-static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx);
+static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget);
 static int iflib_qset_structures_setup(if_ctx_t ctx);
 static int iflib_msix_init(if_ctx_t ctx);
 static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, char *str);
@@ -875,7 +880,7 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags)
 		for (fl = rxq->ifr_fl, i = 0; i < rxq->ifr_nfl; i++, fl++) {
 			nic_i = fl->ifl_cidx;
 			nm_i = netmap_idx_n2k(kring, nic_i);
-			avail = ctx->isc_rxd_available(ctx->ifc_softc, kring->ring_id, nic_i);
+			avail = ctx->isc_rxd_available(ctx->ifc_softc, kring->ring_id, nic_i, INT_MAX);
 			for (n = 0; avail > 0; n++, avail--) {
 				error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
 				if (error)
@@ -930,7 +935,7 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags)
 			/*
 			 * XXX we should be batching this operation - TODO
 			 */
-			ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i, &paddr, &vaddr, 1);
+			ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i, &paddr, &vaddr, 1, fl->ifl_buf_size);
 			bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds[nic_i].ifsd_map,
 			    BUS_DMASYNC_PREREAD);
 			nm_i = nm_next(nm_i, lim);
@@ -958,6 +963,7 @@ static int
 iflib_netmap_attach(if_ctx_t ctx)
 {
 	struct netmap_adapter na;
+	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 
 	bzero(&na, sizeof(na));
 
@@ -966,8 +972,8 @@ iflib_netmap_attach(if_ctx_t ctx)
 	MPASS(ctx->ifc_softc_ctx.isc_ntxqsets);
 	MPASS(ctx->ifc_softc_ctx.isc_nrxqsets);
 
-	na.num_tx_desc = ctx->ifc_sctx->isc_ntxd;
-	na.num_rx_desc = ctx->ifc_sctx->isc_ntxd;
+	na.num_tx_desc = scctx->isc_ntxd[0];
+	na.num_rx_desc = scctx->isc_nrxd[0];
 	na.nm_txsync = iflib_netmap_txsync;
 	na.nm_rxsync = iflib_netmap_rxsync;
 	na.nm_register = iflib_netmap_register;
@@ -986,7 +992,7 @@ iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq)
 	if (slot == 0)
 		return;
 
-	for (int i = 0; i < ctx->ifc_sctx->isc_ntxd; i++) {
+	for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) {
 
 		/*
 		 * In netmap mode, set the map for the packet buffer.
@@ -1011,7 +1017,7 @@ iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq)
 	if (slot == 0)
 		return;
 	sd = rxq->ifr_fl[0].ifl_sds;
-	nrxd = ctx->ifc_sctx->isc_nrxd;
+	nrxd = ctx->ifc_softc_ctx.isc_nrxd[0];
 	for (int i = 0; i < nrxd; i++, sd++) {
 			int sj = netmap_idx_n2k(&na->rx_rings[rxq->ifr_id], i);
 			uint64_t paddr;
@@ -1021,7 +1027,7 @@ iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq)
 			vaddr = addr = PNMB(na, slot + sj, &paddr);
 			netmap_load_map(na, rxq->ifr_fl[0].ifl_ifdi->idi_tag, sd->ifsd_map, addr);
 			/* Update descriptor and the cached value */
-			ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, i, &paddr, &vaddr, 1);
+			ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, i, &paddr, &vaddr, 1, rxq->ifr_fl[0].ifl_buf_size);
 	}
 	/* preserve queue */
 	if (ctx->ifc_ifp->if_capenable & IFCAP_NETMAP) {
@@ -1236,7 +1242,8 @@ iflib_txsd_alloc(iflib_txq_t txq)
 
 	nsegments = scctx->isc_tx_nsegments;
 	ntsosegments = scctx->isc_tx_tso_segments_max;
-	MPASS(sctx->isc_ntxd > 0);
+	MPASS(scctx->isc_ntxd[0] > 0);
+	MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0);
 	MPASS(nsegments > 0);
 	MPASS(ntsosegments > 0);
 	/*
@@ -1259,13 +1266,11 @@ iflib_txsd_alloc(iflib_txq_t txq)
 					  sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize);
 		goto fail;
 	}
-#ifdef INVARIANTS
+#ifdef IFLIB_DIAGNOSTICS
 	device_printf(dev,"maxsize: %zd nsegments: %d maxsegsize: %zd\n",
 		      sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize);
+
 #endif
-	device_printf(dev,"TSO maxsize: %d ntsosegments: %d maxsegsize: %d\n",
-		      scctx->isc_tx_tso_size_max, ntsosegments,
-		      scctx->isc_tx_tso_segsize_max);
 	if ((err = bus_dma_tag_create(bus_get_dma_tag(dev),
 			       1, 0,			/* alignment, bounds */
 			       BUS_SPACE_MAXADDR,	/* lowaddr */
@@ -1282,21 +1287,21 @@ iflib_txsd_alloc(iflib_txq_t txq)
 
 		goto fail;
 	}
-#ifdef INVARIANTS
+#ifdef IFLIB_DIAGNOSTICS
 	device_printf(dev,"TSO maxsize: %d ntsosegments: %d maxsegsize: %d\n",
 		      scctx->isc_tx_tso_size_max, ntsosegments,
 		      scctx->isc_tx_tso_segsize_max);
 #endif
 	if (!(txq->ift_sds.ifsd_flags =
 	    (uint8_t *) malloc(sizeof(uint8_t) *
-	    sctx->isc_ntxd, M_IFLIB, M_NOWAIT | M_ZERO))) {
+	    scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
 		err = ENOMEM;
 		goto fail;
 	}
 	if (!(txq->ift_sds.ifsd_m =
 	    (struct mbuf **) malloc(sizeof(struct mbuf *) *
-	    sctx->isc_ntxd, M_IFLIB, M_NOWAIT | M_ZERO))) {
+	    scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
 		err = ENOMEM;
 		goto fail;
@@ -1308,13 +1313,13 @@ iflib_txsd_alloc(iflib_txq_t txq)
 		return (0);
 
 	if (!(txq->ift_sds.ifsd_map =
-	    (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * sctx->isc_ntxd, M_IFLIB, M_NOWAIT | M_ZERO))) {
+	    (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate tx_buffer map memory\n");
 		err = ENOMEM;
 		goto fail;
 	}
 
-	for (int i = 0; i < sctx->isc_ntxd; i++) {
+	for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) {
 		err = bus_dmamap_create(txq->ift_desc_tag, 0, &txq->ift_sds.ifsd_map[i]);
 		if (err != 0) {
 			device_printf(dev, "Unable to create TX DMA map\n");
@@ -1348,9 +1353,8 @@ static void
 iflib_txq_destroy(iflib_txq_t txq)
 {
 	if_ctx_t ctx = txq->ift_ctx;
-	if_shared_ctx_t sctx = ctx->ifc_sctx;
 
-	for (int i = 0; i < sctx->isc_ntxd; i++)
+	for (int i = 0; i < txq->ift_size; i++)
 		iflib_txsd_destroy(ctx, txq, i);
 	if (txq->ift_sds.ifsd_map != NULL) {
 		free(txq->ift_sds.ifsd_map, M_IFLIB);
@@ -1390,7 +1394,7 @@ iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i)
 		bus_dmamap_unload(txq->ift_desc_tag,
 				  txq->ift_sds.ifsd_map[i]);
 	}
-	m_freem(*mp);
+	m_free(*mp);
 	DBG_COUNTER_INC(tx_frees);
 	*mp = NULL;
 }
@@ -1399,7 +1403,7 @@ static int
 iflib_txq_setup(iflib_txq_t txq)
 {
 	if_ctx_t ctx = txq->ift_ctx;
-	if_shared_ctx_t sctx = ctx->ifc_sctx;
+	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	iflib_dma_info_t di;
 	int i;
 
@@ -1408,7 +1412,7 @@ iflib_txq_setup(iflib_txq_t txq)
 
 	/* Reset indices */
 	txq->ift_cidx_processed = txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0;
-	txq->ift_size = sctx->isc_ntxd;
+	txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset];
 
 	for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++)
 		bzero((void *)di->idi_vaddr, di->idi_size);
@@ -1433,22 +1437,25 @@ iflib_rxsd_alloc(iflib_rxq_t rxq)
 {
 	if_ctx_t ctx = rxq->ifr_ctx;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
+	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	device_t dev = ctx->ifc_dev;
 	iflib_fl_t fl;
 	iflib_rxsd_t	rxsd;
 	int			err;
 
-	MPASS(sctx->isc_nrxd > 0);
+	MPASS(scctx->isc_nrxd[0] > 0);
+	MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0);
 
 	fl = rxq->ifr_fl;
 	for (int i = 0; i <  rxq->ifr_nfl; i++, fl++) {
 		fl->ifl_sds = malloc(sizeof(struct iflib_sw_rx_desc) *
-							 sctx->isc_nrxd, M_IFLIB, M_WAITOK | M_ZERO);
+		    scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB,
+		    M_WAITOK | M_ZERO);
 		if (fl->ifl_sds == NULL) {
 			device_printf(dev, "Unable to allocate rx sw desc memory\n");
 			return (ENOMEM);
 		}
-		fl->ifl_size = sctx->isc_nrxd; /* this isn't necessarily the same */
+		fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */
 		err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
 					 1, 0,			/* alignment, bounds */
 					 BUS_SPACE_MAXADDR,	/* lowaddr */
@@ -1468,7 +1475,7 @@ iflib_rxsd_alloc(iflib_rxq_t rxq)
 		}
 
 		rxsd = fl->ifl_sds;
-		for (int i = 0; i < sctx->isc_nrxd; i++, rxsd++) {
+		for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++, rxsd++) {
 			err = bus_dmamap_create(fl->ifl_desc_tag, 0, &rxsd->ifsd_map);
 			if (err) {
 				device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
@@ -1626,7 +1633,7 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
 		}
 		if (n == 0 || i == IFLIB_MAX_RX_REFRESH) {
 			ctx->isc_rxd_refill(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx,
-								 fl->ifl_bus_addrs, fl->ifl_vm_addrs, i);
+								 fl->ifl_bus_addrs, fl->ifl_vm_addrs, i, fl->ifl_buf_size);
 			i = 0;
 			pidx = fl->ifl_pidx;
 		}
@@ -1854,7 +1861,11 @@ iflib_init_locked(if_ctx_t ctx)
 	for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
 		iflib_netmap_rxq_init(ctx, rxq);
 	}
+#ifdef INVARIANTS
+	i = if_getdrvflags(ifp);
+#endif
 	IFDI_INIT(ctx);
+	MPASS(if_getdrvflags(ifp) == i);
 	for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
 		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
 			if (iflib_fl_setup(fl)) {
@@ -1902,7 +1913,6 @@ iflib_stop(if_ctx_t ctx)
 	iflib_txq_t txq = ctx->ifc_txqs;
 	iflib_rxq_t rxq = ctx->ifc_rxqs;
 	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
-	if_shared_ctx_t sctx = ctx->ifc_sctx;
 	iflib_dma_info_t di;
 	iflib_fl_t fl;
 	int i, j;
@@ -1920,7 +1930,7 @@ iflib_stop(if_ctx_t ctx)
 		/* clean any enqueued buffers */
 		iflib_txq_check_drain(txq, 0);
 		/* Free any existing tx buffers. */
-		for (j = 0; j < sctx->isc_ntxd; j++) {
+		for (j = 0; j < txq->ift_size; j++) {
 			iflib_txsd_free(ctx, txq, j);
 		}
 		txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0;
@@ -1990,13 +2000,24 @@ assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri)
 	caddr_t cl;
 
 	i = 0;
+	mh = NULL;
 	do {
 		sd = rxd_frag_to_sd(rxq, &ri->iri_frags[i], &cltype, TRUE);
 
 		MPASS(sd->ifsd_cl != NULL);
 		MPASS(sd->ifsd_m != NULL);
+
+		/* Don't include zero-length frags */
+		if (ri->iri_frags[i].irf_len == 0) {
+			/* XXX we can save the cluster here, but not the mbuf */
+			m_init(sd->ifsd_m, M_NOWAIT, MT_DATA, 0);
+			m_free(sd->ifsd_m);
+			sd->ifsd_m = NULL;
+			continue;
+		}
+
 		m = sd->ifsd_m;
-		if (i == 0) {
+		if (mh == NULL) {
 			flags = M_PKTHDR|M_EXT;
 			mh = mt = m;
 			padlen = ri->iri_pad;
@@ -2019,14 +2040,12 @@ assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri)
 		 */
 		m->m_data += padlen;
 		ri->iri_len -= padlen;
-		m->m_len = ri->iri_len;
+		m->m_len = ri->iri_frags[i].irf_len;
 	} while (++i < ri->iri_nfrags);
 
 	return (mh);
 }
 
-
-
 /*
  * Process one software descriptor
  */
@@ -2037,13 +2056,14 @@ iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri)
 	iflib_rxsd_t sd;
 
 	/* should I merge this back in now that the two paths are basically duplicated? */
-	if (ri->iri_len <= IFLIB_RX_COPY_THRESH) {
+	if (ri->iri_nfrags == 1 &&
+	    ri->iri_frags[0].irf_len <= IFLIB_RX_COPY_THRESH) {
 		sd = rxd_frag_to_sd(rxq, &ri->iri_frags[0], NULL, FALSE);
 		m = sd->ifsd_m;
 		sd->ifsd_m = NULL;
 		m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR);
 		memcpy(m->m_data, sd->ifsd_cl, ri->iri_len);
-		m->m_len = ri->iri_len;
+		m->m_len = ri->iri_frags[0].irf_len;
        } else {
 		m = assemble_segments(rxq, ri);
 	}
@@ -2063,13 +2083,13 @@ iflib_rxeof(iflib_rxq_t rxq, int budget)
 {
 	if_ctx_t ctx = rxq->ifr_ctx;
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
+	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	int avail, i;
 	uint16_t *cidxp;
 	struct if_rxd_info ri;
 	int err, budget_left, rx_bytes, rx_pkts;
 	iflib_fl_t fl;
 	struct ifnet *ifp;
-	struct lro_entry *queued;
 	int lro_enabled;
 	/*
 	 * XXX early demux data packets so that if_input processing only handles
@@ -2084,11 +2104,11 @@ iflib_rxeof(iflib_rxq_t rxq, int budget)
 	mh = mt = NULL;
 	MPASS(budget > 0);
 	rx_pkts	= rx_bytes = 0;
-	if (sctx->isc_flags & IFLIB_HAS_CQ)
+	if (sctx->isc_flags & IFLIB_HAS_RXCQ)
 		cidxp = &rxq->ifr_cq_cidx;
 	else
 		cidxp = &rxq->ifr_fl[0].ifl_cidx;
-	if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp)) == 0) {
+	if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) {
 		for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++)
 			__iflib_fl_refill_lt(ctx, fl, budget + 8);
 		DBG_COUNTER_INC(rx_unavail);
@@ -2112,10 +2132,11 @@ iflib_rxeof(iflib_rxq_t rxq, int budget)
 
 		/* in lieu of handling correctly - make sure it isn't being unhandled */
 		MPASS(err == 0);
-		if (sctx->isc_flags & IFLIB_HAS_CQ) {
-			/* we know we consumed _one_ CQ entry */
-			if (++rxq->ifr_cq_cidx == sctx->isc_nrxd) {
-				rxq->ifr_cq_cidx = 0;
+		if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
+			*cidxp = ri.iri_cidx;
+			/* Update our consumer index */
+			while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) {
+				rxq->ifr_cq_cidx -= scctx->isc_nrxd[0];
 				rxq->ifr_cq_gen = 0;
 			}
 			/* was this only a completion queue message? */
@@ -2128,7 +2149,7 @@ iflib_rxeof(iflib_rxq_t rxq, int budget)
 		/* will advance the cidx on the corresponding free lists */
 		m = iflib_rxd_pkt_get(rxq, &ri);
 		if (avail == 0 && budget_left)
-			avail = iflib_rxd_avail(ctx, rxq, *cidxp);
+			avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left);
 
 		if (__predict_false(m == NULL)) {
 			DBG_COUNTER_INC(rx_mbuf_null);
@@ -2148,7 +2169,6 @@ iflib_rxeof(iflib_rxq_t rxq, int budget)
 
 	ifp = ctx->ifc_ifp;
 	lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO);
-
 	while (mh != NULL) {
 		m = mh;
 		mh = mh->m_nextpkt;
@@ -2162,32 +2182,33 @@ iflib_rxeof(iflib_rxq_t rxq, int budget)
 		DBG_COUNTER_INC(rx_if_input);
 		ifp->if_input(ifp, m);
 	}
+
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes);
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts);
 
 	/*
 	 * Flush any outstanding LRO work
 	 */
-	while ((queued = LIST_FIRST(&rxq->ifr_lc.lro_active)) != NULL) {
-		LIST_REMOVE(queued, next);
 #if defined(INET6) || defined(INET)
-		tcp_lro_flush(&rxq->ifr_lc, queued);
+	tcp_lro_flush_all(&rxq->ifr_lc);
 #endif
-	}
-	return (iflib_rxd_avail(ctx, rxq, *cidxp));
+	if (avail)
+		return true;
+	return (iflib_rxd_avail(ctx, rxq, *cidxp, 1));
 }
 
 #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags)
 #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG)
-#define TXQ_MAX_DB_DEFERRED(ctx) (ctx->ifc_sctx->isc_ntxd >> 5)
-#define TXQ_MAX_DB_CONSUMED(ctx) (ctx->ifc_sctx->isc_ntxd >> 4)
+#define TXQ_MAX_DB_DEFERRED(size) (size >> 5)
+#define TXQ_MAX_DB_CONSUMED(size) (size >> 4)
 
 static __inline void
 iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring)
 {
 	uint32_t dbval;
 
-	if (ring || txq->ift_db_pending >= TXQ_MAX_DB_DEFERRED(ctx)) {
+	if (ring || txq->ift_db_pending >=
+	    TXQ_MAX_DB_DEFERRED(txq->ift_size)) {
 
 		/* the lock will only ever be contended in the !min_latency case */
 		if (!TXDB_TRYLOCK(txq))
@@ -2233,9 +2254,9 @@ static int
 iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
 {
 	struct ether_vlan_header *eh;
-	struct mbuf *m;
+	struct mbuf *m, *n;
 
-	m = *mp;
+	n = m = *mp;
 	/*
 	 * Determine where frame payload starts.
 	 * Jump over vlan headers if already present,
@@ -2261,7 +2282,6 @@ iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
 	{
 		struct ip *ip = NULL;
 		struct tcphdr *th = NULL;
-		struct mbuf *n;
 		int minthlen;
 
 		minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th));
@@ -2403,37 +2423,31 @@ collapse_pkthdr(struct mbuf *m0)
 
 /*
  * If dodgy hardware rejects the scatter gather chain we've handed it
- * we'll need to rebuild the mbuf chain before we can call m_defrag
+ * we'll need to remove the mbuf chain from ifsg_m[] before we can add the
+ * m_defrag'd mbufs
  */
 static __noinline struct mbuf *
-iflib_rebuild_mbuf(iflib_txq_t txq)
+iflib_remove_mbuf(iflib_txq_t txq)
 {
-
-	int ntxd, mhlen, len, i, pidx;
+	int ntxd, i, pidx;
 	struct mbuf *m, *mh, **ifsd_m;
-	if_shared_ctx_t		sctx;
 
 	pidx = txq->ift_pidx;
 	ifsd_m = txq->ift_sds.ifsd_m;
-	sctx = txq->ift_ctx->ifc_sctx;
-	ntxd = sctx->isc_ntxd;
+	ntxd = txq->ift_size;
 	mh = m = ifsd_m[pidx];
 	ifsd_m[pidx] = NULL;
 #if MEMORY_LOGGING
 	txq->ift_dequeued++;
 #endif
-	len = m->m_len;
-	mhlen = m->m_pkthdr.len;
 	i = 1;
 
-	while (len < mhlen && (m->m_next == NULL)) {
-		m->m_next = ifsd_m[(pidx + i) & (ntxd-1)];
+	while (m) {
 		ifsd_m[(pidx + i) & (ntxd -1)] = NULL;
 #if MEMORY_LOGGING
 		txq->ift_dequeued++;
 #endif
 		m = m->m_next;
-		len += m->m_len;
 		i++;
 	}
 	return (mh);
@@ -2446,6 +2460,7 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map,
 {
 	if_ctx_t ctx;
 	if_shared_ctx_t		sctx;
+	if_softc_ctx_t		scctx;
 	int i, next, pidx, mask, err, maxsegsz, ntxd, count;
 	struct mbuf *m, *tmp, **ifsd_m, **mp;
 
@@ -2459,8 +2474,9 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map,
 
 	ctx = txq->ift_ctx;
 	sctx = ctx->ifc_sctx;
+	scctx = &ctx->ifc_softc_ctx;
 	ifsd_m = txq->ift_sds.ifsd_m;
-	ntxd = sctx->isc_ntxd;
+	ntxd = txq->ift_size;
 	pidx = txq->ift_pidx;
 	if (map != NULL) {
 		uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags;
@@ -2472,13 +2488,12 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map,
 		ifsd_flags[pidx] |= TX_SW_DESC_MAPPED;
 		i = 0;
 		next = pidx;
-		mask = (sctx->isc_ntxd-1);
+		mask = (txq->ift_size-1);
 		m = *m0;
 		do {
 			mp = &ifsd_m[next];
 			*mp = m;
 			m = m->m_next;
-			(*mp)->m_next = NULL;
 			if (__predict_false((*mp)->m_len == 0)) {
 				m_free(*mp);
 				*mp = NULL;
@@ -2529,13 +2544,12 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map,
 			count++;
 			tmp = m;
 			m = m->m_next;
-			tmp->m_next = NULL;
 		} while (m != NULL);
 		*nsegs = i;
 	}
 	return (0);
 err:
-	*m0 = iflib_rebuild_mbuf(txq);
+	*m0 = iflib_remove_mbuf(txq);
 	return (EFBIG);
 }
 
@@ -2558,7 +2572,7 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp)
 	sctx = ctx->ifc_sctx;
 	scctx = &ctx->ifc_softc_ctx;
 	segs = txq->ift_segs;
-	ntxd = sctx->isc_ntxd;
+	ntxd = txq->ift_size;
 	m_head = *m_headp;
 	map = NULL;
 
@@ -2645,14 +2659,14 @@ defrag:
 		if (map != NULL)
 			bus_dmamap_unload(desc_tag, map);
 		DBG_COUNTER_INC(encap_txq_avail_fail);
-		if (txq->ift_task.gt_task.ta_pending == 0)
+		if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0)
 			GROUPTASK_ENQUEUE(&txq->ift_task);
 		return (ENOBUFS);
 	}
 	pi.ipi_segs = segs;
 	pi.ipi_nsegs = nsegs;
 
-	MPASS(pidx >= 0 && pidx < sctx->isc_ntxd);
+	MPASS(pidx >= 0 && pidx < txq->ift_size);
 #ifdef PKT_DEBUG
 	print_pkt(&pi);
 #endif
@@ -2661,11 +2675,12 @@ defrag:
 						BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 		DBG_COUNTER_INC(tx_encap);
-		MPASS(pi.ipi_new_pidx >= 0 && pi.ipi_new_pidx < sctx->isc_ntxd);
+		MPASS(pi.ipi_new_pidx >= 0 &&
+		    pi.ipi_new_pidx < txq->ift_size);
 
 		ndesc = pi.ipi_new_pidx - pi.ipi_pidx;
 		if (pi.ipi_new_pidx < pi.ipi_pidx) {
-			ndesc += sctx->isc_ntxd;
+			ndesc += txq->ift_size;
 			txq->ift_gen = 1;
 		}
 		MPASS(pi.ipi_new_pidx != pidx);
@@ -2678,7 +2693,7 @@ defrag:
 		txq->ift_pidx = pi.ipi_new_pidx;
 		txq->ift_npending += pi.ipi_ndescs;
 	} else if (__predict_false(err == EFBIG && remap < 2)) {
-		*m_headp = m_head = iflib_rebuild_mbuf(txq);
+		*m_headp = m_head = iflib_remove_mbuf(txq);
 		remap = 1;
 		txq->ift_txd_encap_efbig++;
 		goto defrag;
@@ -2700,7 +2715,7 @@ defrag_failed:
 
 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets)
 #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets)
-#define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NRXQSETS(ctx)) + FIRST_QSET(ctx))
+#define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx))
 #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments))
 #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh)
 #define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max)
@@ -2712,7 +2727,7 @@ defrag_failed:
  *
  * ORing with 2 assures that min occupancy is never less than 2 without any conditional logic
  */
-#define TXQ_MIN_OCCUPANCY(ctx) ((ctx->ifc_sctx->isc_ntxd >> 6)| 0x2)
+#define TXQ_MIN_OCCUPANCY(size) ((size >> 6)| 0x2)
 
 static inline int
 iflib_txq_min_occupancy(iflib_txq_t txq)
@@ -2720,7 +2735,9 @@ iflib_txq_min_occupancy(iflib_txq_t txq)
 	if_ctx_t ctx;
 
 	ctx = txq->ift_ctx;
-	return (get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen) < TXQ_MIN_OCCUPANCY(ctx) + MAX_TX_DESC(ctx));
+	return (get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx,
+	    txq->ift_gen) < TXQ_MIN_OCCUPANCY(txq->ift_size) +
+	    MAX_TX_DESC(ctx));
 }
 
 static void
@@ -2734,7 +2751,7 @@ iflib_tx_desc_free(iflib_txq_t txq, int n)
 
 	cidx = txq->ift_cidx;
 	gen = txq->ift_gen;
-	qsize = txq->ift_ctx->ifc_sctx->isc_ntxd;
+	qsize = txq->ift_size;
 	mask = qsize-1;
 	hasmap = txq->ift_sds.ifsd_map != NULL;
 	ifsd_flags = txq->ift_sds.ifsd_flags;
@@ -2760,7 +2777,7 @@ iflib_tx_desc_free(iflib_txq_t txq, int n)
 				/* XXX we don't support any drivers that batch packets yet */
 				MPASS(m->m_nextpkt == NULL);
 
-				m_freem(m);
+				m_free(m);
 				ifsd_m[cidx] = NULL;
 #if MEMORY_LOGGING
 				txq->ift_dequeued++;
@@ -2856,7 +2873,7 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
 	if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) {
 		DBG_COUNTER_INC(txq_drain_flushing);
 		for (i = 0; i < avail; i++) {
-			m_freem(r->items[(cidx + i) & (r->size-1)]);
+			m_free(r->items[(cidx + i) & (r->size-1)]);
 			r->items[(cidx + i) & (r->size-1)] = NULL;
 		}
 		return (avail);
@@ -2903,7 +2920,7 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
 		if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
 			break;
 
-		if (desc_used > TXQ_MAX_DB_CONSUMED(ctx))
+		if (desc_used > TXQ_MAX_DB_CONSUMED(txq->ift_size))
 			break;
 	}
 
@@ -2924,7 +2941,7 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
 }
 
 static void
-_task_fn_tx(void *context, int pending)
+_task_fn_tx(void *context)
 {
 	iflib_txq_t txq = context;
 	if_ctx_t ctx = txq->ift_ctx;
@@ -2935,11 +2952,12 @@ _task_fn_tx(void *context, int pending)
 }
 
 static void
-_task_fn_rx(void *context, int pending)
+_task_fn_rx(void *context)
 {
 	iflib_rxq_t rxq = context;
 	if_ctx_t ctx = rxq->ifr_ctx;
 	bool more;
+	int rc;
 
 	DBG_COUNTER_INC(task_fn_rxs);
 	if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
@@ -2950,7 +2968,8 @@ _task_fn_rx(void *context, int pending)
 			IFDI_INTR_ENABLE(ctx);
 		else {
 			DBG_COUNTER_INC(rx_intr_enables);
-			IFDI_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
+			rc = IFDI_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
+			KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver"));
 		}
 	}
 	if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
@@ -2960,7 +2979,7 @@ _task_fn_rx(void *context, int pending)
 }
 
 static void
-_task_fn_admin(void *context, int pending)
+_task_fn_admin(void *context)
 {
 	if_ctx_t ctx = context;
 	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
@@ -2990,7 +3009,7 @@ _task_fn_admin(void *context, int pending)
 
 
 static void
-_task_fn_iov(void *context, int pending)
+_task_fn_iov(void *context)
 {
 	if_ctx_t ctx = context;
 
@@ -3049,8 +3068,7 @@ iflib_if_transmit(if_t ifp, struct mbuf *m)
 	if_ctx_t	ctx = if_getsoftc(ifp);
 
 	iflib_txq_t txq;
-	struct mbuf *marr[8], **mp, *next;
-	int err, i, count, qidx;
+	int err, qidx;
 
 	if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) {
 		DBG_COUNTER_INC(tx_frees);
@@ -3058,6 +3076,7 @@ iflib_if_transmit(if_t ifp, struct mbuf *m)
 		return (0);
 	}
 
+	MPASS(m->m_nextpkt == NULL);
 	qidx = 0;
 	if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m))
 		qidx = QIDX(ctx, m);
@@ -3077,6 +3096,7 @@ iflib_if_transmit(if_t ifp, struct mbuf *m)
 		return (ENOBUFS);
 	}
 #endif
+#ifdef notyet
 	qidx = count = 0;
 	mp = marr;
 	next = m;
@@ -3098,22 +3118,21 @@ iflib_if_transmit(if_t ifp, struct mbuf *m)
 		next = next->m_nextpkt;
 		mp[i]->m_nextpkt = NULL;
 	}
+#endif
 	DBG_COUNTER_INC(tx_seen);
-	err = ifmp_ring_enqueue(txq->ift_br[0], (void **)mp, count, TX_BATCH_SIZE);
+	err = ifmp_ring_enqueue(txq->ift_br[0], (void **)&m, 1, TX_BATCH_SIZE);
 
-	if (iflib_txq_can_drain(txq->ift_br[0]))
-		GROUPTASK_ENQUEUE(&txq->ift_task);
 	if (err) {
+		GROUPTASK_ENQUEUE(&txq->ift_task);
 		/* support forthcoming later */
 #ifdef DRIVER_BACKPRESSURE
 		txq->ift_closed = TRUE;
 #endif
-		for (i = 0; i < count; i++)
-			m_freem(mp[i]);
 		ifmp_ring_check_drainage(txq->ift_br[0], TX_BATCH_SIZE);
+		m_freem(m);
+	} else if (TXQ_AVAIL(txq) < (txq->ift_size >> 1)) {
+		GROUPTASK_ENQUEUE(&txq->ift_task);
 	}
-	if (count > nitems(marr))
-		free(mp, M_IFLIB);
 
 	return (err);
 }
@@ -3138,13 +3157,13 @@ iflib_if_qflush(if_t ifp)
 	if_qflush(ifp);
 }
 
-#define IFCAP_REINIT (IFCAP_HWCSUM|IFCAP_TSO4|IFCAP_TSO6|IFCAP_VLAN_HWTAGGING|IFCAP_VLAN_MTU | \
-		      IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO)
 
-#define IFCAP_FLAGS (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \
+#define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \
 		     IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING |	\
 		     IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO)
 
+#define IFCAP_REINIT IFCAP_FLAGS
+
 static int
 iflib_if_ioctl(if_t ifp, u_long command, caddr_t data)
 {
@@ -3428,6 +3447,9 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
 	if_ctx_t ctx;
 	if_t ifp;
 	if_softc_ctx_t scctx;
+	int i;
+	uint16_t main_txq;
+	uint16_t main_rxq;
 
 
 	ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO);
@@ -3435,6 +3457,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
 	if (sc == NULL) {
 		sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO);
 		device_set_softc(dev, ctx);
+		ctx->ifc_flags |= IFC_SC_ALLOCATED;
 	}
 
 	ctx->ifc_sctx = sctx;
@@ -3447,28 +3470,112 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
 		return (err);
 	}
 	iflib_add_device_sysctl_pre(ctx);
+
+	scctx = &ctx->ifc_softc_ctx;
+	/*
+	 * XXX sanity check that ntxd & nrxd are a power of 2
+	 */
+	if (ctx->ifc_sysctl_ntxqs != 0)
+		scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs;
+	if (ctx->ifc_sysctl_nrxqs != 0)
+		scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs;
+
+	for (i = 0; i < sctx->isc_ntxqs; i++) {
+		if (ctx->ifc_sysctl_ntxds[i] != 0)
+			scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i];
+		else
+			scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i];
+	}
+
+	for (i = 0; i < sctx->isc_nrxqs; i++) {
+		if (ctx->ifc_sysctl_nrxds[i] != 0)
+			scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i];
+		else
+			scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i];
+	}
+
+	for (i = 0; i < sctx->isc_nrxqs; i++) {
+		if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) {
+			device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n",
+				      i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]);
+			scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i];
+		}
+		if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) {
+			device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n",
+				      i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]);
+			scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i];
+		}
+	}
+
+	for (i = 0; i < sctx->isc_ntxqs; i++) {
+		if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) {
+			device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n",
+				      i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]);
+			scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i];
+		}
+		if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) {
+			device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n",
+				      i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]);
+			scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i];
+		}
+	}
+
 	if ((err = IFDI_ATTACH_PRE(ctx)) != 0) {
 		device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err);
 		return (err);
 	}
+	if (scctx->isc_ntxqsets_max)
+		scctx->isc_ntxqsets = min(scctx->isc_ntxqsets, scctx->isc_ntxqsets_max);
+	if (scctx->isc_nrxqsets_max)
+		scctx->isc_nrxqsets = min(scctx->isc_nrxqsets, scctx->isc_nrxqsets_max);
+
 #ifdef ACPI_DMAR
 	if (dmar_get_dma_tag(device_get_parent(dev), dev) != NULL)
 		ctx->ifc_flags |= IFC_DMAR;
 #endif
 
-	scctx = &ctx->ifc_softc_ctx;
 	msix_bar = scctx->isc_msix_bar;
 
-	if (scctx->isc_tx_nsegments > sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION)
-		scctx->isc_tx_nsegments = max(1, sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION);
-	if (scctx->isc_tx_tso_segments_max > sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION)
-		scctx->isc_tx_tso_segments_max = max(1, sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION);
-
 	ifp = ctx->ifc_ifp;
 
-	/*
-	 * XXX sanity check that ntxd & nrxd are a power of 2
-	 */
+	if(sctx->isc_flags & IFLIB_HAS_TXCQ)
+		main_txq = 1;
+	else
+		main_txq = 0;
+
+	if(sctx->isc_flags & IFLIB_HAS_RXCQ)
+		main_rxq = 1;
+	else
+		main_rxq = 0;
+
+	/* XXX change for per-queue sizes */
+	device_printf(dev, "using %d tx descriptors and %d rx descriptors\n",
+		      scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]);
+	for (i = 0; i < sctx->isc_nrxqs; i++) {
+		if (!powerof2(scctx->isc_nrxd[i])) {
+			/* round down instead? */
+			device_printf(dev, "# rx descriptors must be a power of 2\n");
+			err = EINVAL;
+			goto fail;
+		}
+	}
+	for (i = 0; i < sctx->isc_ntxqs; i++) {
+		if (!powerof2(scctx->isc_ntxd[i])) {
+			device_printf(dev,
+			    "# tx descriptors must be a power of 2");
+			err = EINVAL;
+			goto fail;
+		}
+	}
+
+	if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] /
+	    MAX_SINGLE_PACKET_FRACTION)
+		scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] /
+		    MAX_SINGLE_PACKET_FRACTION);
+	if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] /
+	    MAX_SINGLE_PACKET_FRACTION)
+		scctx->isc_tx_tso_segments_max = max(1,
+		    scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION);
 
 	/*
 	 * Protect the stack against modern hardware
@@ -3482,7 +3589,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
 	ifp->if_hw_tsomaxsegsize = scctx->isc_tx_tso_segsize_max;
 	if (scctx->isc_rss_table_size == 0)
 		scctx->isc_rss_table_size = 64;
-	scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1;;
+	scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1;
 	/*
 	** Now setup MSI or MSI/X, should
 	** return us the number of supported
@@ -3520,7 +3627,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
 			MPASS(msix == 1);
 			rid = 1;
 		}
-		if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx, &rid, "irq0")) != 0) {
+		if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) {
 			device_printf(dev, "iflib_legacy_setup failed %d\n", err);
 			goto fail_intr_free;
 		}
@@ -3536,6 +3643,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
 	}
 	*ctxp = ctx;
 
+	if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter);
 	iflib_add_device_sysctl_post(ctx);
 	return (0);
 fail_detach:
@@ -3599,7 +3707,7 @@ iflib_device_deregister(if_ctx_t ctx)
 		led_destroy(ctx->ifc_led_dev);
 	/* XXX drain any dependent tasks */
 	tqg = qgroup_if_io_tqg;
-	for (txq = ctx->ifc_txqs, i = 0, rxq = ctx->ifc_rxqs; i < NTXQSETS(ctx); i++, txq++) {
+	for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) {
 		callout_drain(&txq->ift_timer);
 		callout_drain(&txq->ift_db_check);
 		if (txq->ift_task.gt_uniq != NULL)
@@ -3616,6 +3724,7 @@ iflib_device_deregister(if_ctx_t ctx)
 		taskqgroup_detach(tqg, &ctx->ifc_vflr_task);
 
 	IFDI_DETACH(ctx);
+	device_set_softc(ctx->ifc_dev, NULL);
 	if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) {
 		pci_release_msi(dev);
 	}
@@ -3633,6 +3742,9 @@ iflib_device_deregister(if_ctx_t ctx)
 
 	iflib_tx_structures_free(ctx);
 	iflib_rx_structures_free(ctx);
+	if (ctx->ifc_flags & IFC_SC_ALLOCATED)
+		free(ctx->ifc_softc, M_IFLIB);
+	free(ctx, M_IFLIB);
 	return (0);
 }
 
@@ -3782,7 +3894,13 @@ _iflib_assert(if_shared_ctx_t sctx)
 	MPASS(sctx->isc_txrx->ift_rxd_pkt_get);
 	MPASS(sctx->isc_txrx->ift_rxd_refill);
 	MPASS(sctx->isc_txrx->ift_rxd_flush);
-	MPASS(sctx->isc_nrxd);
+
+	MPASS(sctx->isc_nrxd_min[0]);
+	MPASS(sctx->isc_nrxd_max[0]);
+	MPASS(sctx->isc_nrxd_default[0]);
+	MPASS(sctx->isc_ntxd_min[0]);
+	MPASS(sctx->isc_ntxd_max[0]);
+	MPASS(sctx->isc_ntxd_default[0]);
 }
 
 static int
@@ -3796,7 +3914,6 @@ iflib_register(if_ctx_t ctx)
 	_iflib_assert(sctx);
 
 	CTX_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev));
-	MPASS(ctx->ifc_flags == 0);
 
 	ifp = ctx->ifc_ifp = if_gethandle(IFT_ETHER);
 	if (ifp == NULL) {
@@ -3818,7 +3935,6 @@ iflib_register(if_ctx_t ctx)
 	if_setioctlfn(ifp, iflib_if_ioctl);
 	if_settransmitfn(ifp, iflib_if_transmit);
 	if_setqflushfn(ifp, iflib_if_qflush);
-	if_setgetcounterfn(ifp, iflib_if_get_counter);
 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
 
 	if_setcapabilities(ifp, 0);
@@ -3842,16 +3958,17 @@ static int
 iflib_queues_alloc(if_ctx_t ctx)
 {
 	if_shared_ctx_t sctx = ctx->ifc_sctx;
+	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
 	device_t dev = ctx->ifc_dev;
-	int nrxqsets = ctx->ifc_softc_ctx.isc_nrxqsets;
-	int ntxqsets = ctx->ifc_softc_ctx.isc_ntxqsets;
+	int nrxqsets = scctx->isc_nrxqsets;
+	int ntxqsets = scctx->isc_ntxqsets;
 	iflib_txq_t txq;
 	iflib_rxq_t rxq;
 	iflib_fl_t fl = NULL;
-	int i, j, cpu, err, txconf, rxconf, fl_ifdi_offset;
+	int i, j, cpu, err, txconf, rxconf;
 	iflib_dma_info_t ifdip;
-	uint32_t *rxqsizes = sctx->isc_rxqsizes;
-	uint32_t *txqsizes = sctx->isc_txqsizes;
+	uint32_t *rxqsizes = scctx->isc_rxqsizes;
+	uint32_t *txqsizes = scctx->isc_txqsizes;
 	uint8_t nrxqs = sctx->isc_nrxqs;
 	uint8_t ntxqs = sctx->isc_ntxqs;
 	int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1;
@@ -3860,10 +3977,11 @@ iflib_queues_alloc(if_ctx_t ctx)
 	struct ifmp_ring **brscp;
 	int nbuf_rings = 1; /* XXX determine dynamically */
 
-	KASSERT(ntxqs > 0, ("number of queues must be at least 1"));
-	KASSERT(nrxqs > 0, ("number of queues must be at least 1"));
+	KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1"));
+	KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1"));
 
 	brscp = NULL;
+	txq = NULL;
 	rxq = NULL;
 
 /* Allocate the TX ring struct memory */
@@ -3891,8 +4009,6 @@ iflib_queues_alloc(if_ctx_t ctx)
 
 	ctx->ifc_txqs = txq;
 	ctx->ifc_rxqs = rxq;
-	txq = NULL;
-	rxq = NULL;
 
 	/*
 	 * XXX handle allocation failure
@@ -3916,6 +4032,11 @@ iflib_queues_alloc(if_ctx_t ctx)
 		}
 		txq->ift_ctx = ctx;
 		txq->ift_id = i;
+		if (sctx->isc_flags & IFLIB_HAS_TXCQ) {
+			txq->ift_br_offset = 1;
+		} else {
+			txq->ift_br_offset = 0;
+		}
 		/* XXX fix this */
 		txq->ift_timer.c_cpu = cpu;
 		txq->ift_db_check.c_cpu = cpu;
@@ -3970,10 +4091,10 @@ iflib_queues_alloc(if_ctx_t ctx)
 		}
 		rxq->ifr_ctx = ctx;
 		rxq->ifr_id = i;
-		if (sctx->isc_flags & IFLIB_HAS_CQ) {
-			fl_ifdi_offset = 1;
+		if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
+			rxq->ifr_fl_offset = 1;
 		} else {
-			fl_ifdi_offset = 0;
+			rxq->ifr_fl_offset = 0;
 		}
 		rxq->ifr_nfl = nfree_lists;
 		if (!(fl =
@@ -3986,7 +4107,8 @@ iflib_queues_alloc(if_ctx_t ctx)
 		for (j = 0; j < nfree_lists; j++) {
 			rxq->ifr_fl[j].ifl_rxq = rxq;
 			rxq->ifr_fl[j].ifl_id = j;
-			rxq->ifr_fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + fl_ifdi_offset];
+			rxq->ifr_fl[j].ifl_ifdi =
+			    &rxq->ifr_ifdi[j + rxq->ifr_fl_offset];
 		}
         /* Allocate receive buffers for the ring*/
 		if (iflib_rxsd_alloc(rxq)) {
@@ -4106,12 +4228,13 @@ iflib_rx_structures_setup(if_ctx_t ctx)
 	for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) {
 #if defined(INET6) || defined(INET)
 		tcp_lro_free(&rxq->ifr_lc);
-		if ((err = tcp_lro_init(&rxq->ifr_lc)) != 0) {
+		if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp,
+		    TCP_LRO_ENTRIES, min(1024,
+		    ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) {
 			device_printf(ctx->ifc_dev, "LRO Initialization failed!\n");
 			goto fail;
 		}
 		rxq->ifr_lro_enabled = TRUE;
-		rxq->ifr_lc.ifp = ctx->ifc_ifp;
 #endif
 		IFDI_RXQ_SETUP(ctx, rxq->ifr_id);
 	}
@@ -4142,7 +4265,7 @@ iflib_rx_structures_free(if_ctx_t ctx)
 {
 	iflib_rxq_t rxq = ctx->ifc_rxqs;
 
-	for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, rxq++) {
+	for (int i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) {
 		iflib_rx_sds_free(rxq);
 	}
 }
@@ -4193,7 +4316,7 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
 	struct taskqgroup *tqg;
 	iflib_filter_info_t info;
 	cpuset_t cpus;
-	task_fn_t *fn;
+	gtask_fn_t *fn;
 	int tqrid, err;
 	void *q;
 
@@ -4254,7 +4377,7 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type,  void
 {
 	struct grouptask *gtask;
 	struct taskqgroup *tqg;
-	task_fn_t *fn;
+	gtask_fn_t *fn;
 	void *q;
 
 	switch (type) {
@@ -4310,7 +4433,7 @@ iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *
 	iflib_filter_info_t info;
 	struct grouptask *gtask;
 	struct taskqgroup *tqg;
-	task_fn_t *fn;
+	gtask_fn_t *fn;
 	int tqrid;
 	void *q;
 	int err;
@@ -4385,7 +4508,7 @@ iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name)
 }
 
 void
-iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, task_fn_t *fn,
+iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn,
 	char *name)
 {
 
@@ -4394,14 +4517,21 @@ iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, task_fn_t *fn,
 }
 
 void
-iflib_link_state_change(if_ctx_t ctx, int link_state)
+iflib_config_gtask_deinit(struct grouptask *gtask)
+{
+
+	taskqgroup_detach(qgroup_if_config_tqg, gtask);	
+}
+
+void
+iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate)
 {
 	if_t ifp = ctx->ifc_ifp;
 	iflib_txq_t txq = ctx->ifc_txqs;
 
-#if 0
+
 	if_setbaudrate(ifp, baudrate);
-#endif
+
 	/* If link down, disable watchdog */
 	if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) {
 		for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++)
@@ -4431,10 +4561,11 @@ iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq)
 }
 
 static int
-iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx)
+iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget)
 {
 
-	return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx));
+	return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx,
+	    budget));
 }
 
 void
@@ -4468,8 +4599,9 @@ iflib_msix_init(if_ctx_t ctx)
 	int iflib_num_tx_queues, iflib_num_rx_queues;
 	int err, admincnt, bar;
 
-	iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs;
-	iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs;
+	iflib_num_tx_queues = scctx->isc_ntxqsets;
+	iflib_num_rx_queues = scctx->isc_nrxqsets;
+
 	bar = ctx->ifc_softc_ctx.isc_msix_bar;
 	admincnt = sctx->isc_admin_intrcnt;
 	/* Override by tuneable */
@@ -4549,18 +4681,31 @@ iflib_msix_init(if_ctx_t ctx)
 	if (queues > rss_getnumbuckets())
 		queues = rss_getnumbuckets();
 #endif
-	if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queues)
-		queues = rx_queues = iflib_num_rx_queues;
+	if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt)
+		rx_queues = iflib_num_rx_queues;
 	else
 		rx_queues = queues;
+	/*
+	 * We want this to be all logical CPUs by default
+	 */
 	if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues)
 		tx_queues = iflib_num_tx_queues;
 	else
-		tx_queues = queues;
+		tx_queues = mp_ncpus;
+
+	if (ctx->ifc_sysctl_qs_eq_override == 0) {
+#ifdef INVARIANTS
+		if (tx_queues != rx_queues)
+			device_printf(dev, "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n",
+				      min(rx_queues, tx_queues), min(rx_queues, tx_queues));
+#endif
+		tx_queues = min(rx_queues, tx_queues);
+		rx_queues = min(rx_queues, tx_queues);
+	}
 
 	device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues);
 
-	vectors = queues + admincnt;
+	vectors = rx_queues + admincnt;
 	if ((err = pci_alloc_msix(dev, &vectors)) == 0) {
 		device_printf(dev,
 					  "Using MSIX interrupts with %d vectors\n", vectors);
@@ -4568,6 +4713,7 @@ iflib_msix_init(if_ctx_t ctx)
 		scctx->isc_nrxqsets = rx_queues;
 		scctx->isc_ntxqsets = tx_queues;
 		scctx->isc_intr = IFLIB_INTR_MSIX;
+
 		return (vectors);
 	} else {
 		device_printf(dev, "failed to allocate %d msix vectors, err: %d - using MSI\n", vectors, err);
@@ -4617,7 +4763,58 @@ mp_ring_state_handler(SYSCTL_HANDLER_ARGS)
         return(rc);
 }
 
+enum iflib_ndesc_handler {
+	IFLIB_NTXD_HANDLER,
+	IFLIB_NRXD_HANDLER,
+};
 
+static int
+mp_ndesc_handler(SYSCTL_HANDLER_ARGS)
+{
+	if_ctx_t ctx = (void *)arg1;
+	enum iflib_ndesc_handler type = arg2;
+	char buf[256] = {0};
+	uint16_t *ndesc;
+	char *p, *next;
+	int nqs, rc, i;
+
+	MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER);
+
+	nqs = 8;
+	switch(type) {
+	case IFLIB_NTXD_HANDLER:
+		ndesc = ctx->ifc_sysctl_ntxds;
+		if (ctx->ifc_sctx)
+			nqs = ctx->ifc_sctx->isc_ntxqs;
+		break;
+	case IFLIB_NRXD_HANDLER:
+		ndesc = ctx->ifc_sysctl_nrxds;
+		if (ctx->ifc_sctx)
+			nqs = ctx->ifc_sctx->isc_nrxqs;
+		break;
+	}
+	if (nqs == 0)
+		nqs = 8;
+
+	for (i=0; i<8; i++) {
+		if (i >= nqs)
+			break;
+		if (i)
+			strcat(buf, ",");
+		sprintf(strchr(buf, 0), "%d", ndesc[i]);
+	}
+
+	rc = sysctl_handle_string(oidp, buf, sizeof(buf), req);
+	if (rc || req->newptr == NULL)
+		return rc;
+
+	for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p;
+	    i++, p = strsep(&next, " ,")) {
+		ndesc[i] = strtoul(p, NULL, 10);
+	}
+
+	return(rc);
+}
 
 #define NAME_BUFLEN 32
 static void
@@ -4634,19 +4831,29 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx)
 						      CTLFLAG_RD, NULL, "IFLIB fields");
 	oid_list = SYSCTL_CHILDREN(node);
 
+	SYSCTL_ADD_STRING(ctx_list, oid_list, OID_AUTO, "driver_version",
+		       CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 0,
+		       "driver version");
+
 	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs",
 		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0,
 			"# of txqs to use, 0 => use default #");
 	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs",
-		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0,
-			"# of txqs to use, 0 => use default #");
-	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxds",
-		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxds, 0,
-			"# of tx descriptors to use, 0 => use default #");
-	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxds",
-		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxds, 0,
-			"# of rx descriptors to use, 0 => use default #");
-
+		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0,
+			"# of rxqs to use, 0 => use default #");
+	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable",
+		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0,
+                       "permit #txq != #rxq");
+
+	/* XXX change for per-queue sizes */
+	SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",
+		       CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER,
+                       mp_ndesc_handler, "A",
+                       "list of # of tx descriptors to use, 0 = use default #");
+	SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds",
+		       CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER,
+                       mp_ndesc_handler, "A",
+                       "list of # of rx descriptors to use, 0 = use default #");
 }
 
 static void
@@ -4700,7 +4907,7 @@ iflib_add_device_sysctl_post(if_ctx_t ctx)
 				   &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail",
 				   CTLFLAG_RD,
-				   &txq->ift_mbuf_defrag_failed, "# of times no descriptors were available");
+				   &txq->ift_no_desc_avail, "# of times no descriptors were available");
 		SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed",
 				   CTLFLAG_RD,
 				   &txq->ift_map_failed, "# of times dma map failed");
@@ -4763,7 +4970,7 @@ iflib_add_device_sysctl_post(if_ctx_t ctx)
 		queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf,
 					     CTLFLAG_RD, NULL, "Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
-		if (sctx->isc_flags & IFLIB_HAS_CQ) {
+		if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
 			SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_pidx",
 				       CTLFLAG_RD,
 				       &rxq->ifr_cq_pidx, 1, "Producer Index");
diff --git a/sys/net/iflib.h b/sys/net/iflib.h
index c301b91..cf4a786 100644
--- a/sys/net/iflib.h
+++ b/sys/net/iflib.h
@@ -35,6 +35,7 @@
 #include <machine/bus.h>
 #include <sys/bus_dma.h>
 #include <sys/nv.h>
+#include <sys/gtaskqueue.h>
 
 
 /*
@@ -63,12 +64,14 @@ typedef struct if_int_delay_info  *if_int_delay_info_t;
 typedef struct if_rxd_frag {
 	uint8_t irf_flid;
 	uint16_t irf_idx;
+	uint16_t irf_len;
 } *if_rxd_frag_t;
 
 typedef struct if_rxd_info {
 	/* set by iflib */
 	uint16_t iri_qsidx;		/* qset index */
 	uint16_t iri_vtag;		/* vlan tag - if flag set */
+	/* XXX redundant with the new irf_len field */
 	uint16_t iri_len;		/* packet length */
 	uint16_t iri_cidx;		/* consumer index of cq */
 	struct ifnet *iri_ifp;		/* some drivers >1 interface per softc */
@@ -156,10 +159,11 @@ typedef struct if_txrx {
 	void (*ift_txd_flush) (void *, uint16_t, uint32_t);
 	int (*ift_txd_credits_update) (void *, uint16_t, uint32_t, bool);
 
-	int (*ift_rxd_available) (void *, uint16_t qsidx, uint32_t pidx);
+	int (*ift_rxd_available) (void *, uint16_t qsidx, uint32_t pidx,
+	    int budget);
 	int (*ift_rxd_pkt_get) (void *, if_rxd_info_t ri);
 	void (*ift_rxd_refill) (void * , uint16_t qsidx, uint8_t flidx, uint32_t pidx,
-							uint64_t *paddrs, caddr_t *vaddrs, uint16_t count);
+							uint64_t *paddrs, caddr_t *vaddrs, uint16_t count, uint16_t buf_size);
 	void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, uint32_t pidx);
 	int (*ift_legacy_intr) (void *);
 } *if_txrx_t;
@@ -170,11 +174,20 @@ typedef struct if_softc_ctx {
 	int isc_ntxqsets;
 	int isc_msix_bar;		/* can be model specific - initialize in attach_pre */
 	int isc_tx_nsegments;		/* can be model specific - initialize in attach_pre */
+	int isc_ntxd[8];
+	int isc_nrxd[8];
+
+	uint32_t isc_txqsizes[8];
+	uint32_t isc_rxqsizes[8];
+	int isc_max_txqsets;
+	int isc_max_rxqsets;
 	int isc_tx_tso_segments_max;
 	int isc_tx_tso_size_max;
 	int isc_tx_tso_segsize_max;
 	int isc_rss_table_size;
 	int isc_rss_table_mask;
+	int isc_nrxqsets_max;
+	int isc_ntxqsets_max;
 
 	iflib_intr_mode_t isc_intr;
 	uint16_t isc_max_frame_size; /* set at init time by driver */
@@ -188,8 +201,6 @@ struct if_shared_ctx {
 	int isc_magic;
 	if_txrx_t isc_txrx;
 	driver_t *isc_driver;
-	int isc_ntxd;
-	int isc_nrxd;
 	int isc_nfl;
 	int isc_flags;
 	bus_size_t isc_q_align;
@@ -199,14 +210,11 @@ struct if_shared_ctx {
 	bus_size_t isc_rx_maxsegsize;
 	int isc_rx_nsegments;
 	int isc_rx_process_limit;
-
-
-	uint32_t isc_txqsizes[8];
 	int isc_ntxqs;			/* # of tx queues per tx qset - usually 1 */
-	uint32_t isc_rxqsizes[8];
 	int isc_nrxqs;			/* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */
 	int isc_admin_intrcnt;		/* # of admin/link interrupts */
 
+
 	int isc_tx_reclaim_thresh;
 
 	/* fields necessary for probe */
@@ -215,6 +223,12 @@ struct if_shared_ctx {
 /* optional function to transform the read values to match the table*/
 	void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id,
 				   uint16_t *subdevice_id, uint16_t *rev_id);
+	int isc_nrxd_min[8];
+	int isc_nrxd_default[8];
+	int isc_nrxd_max[8];
+	int isc_ntxd_min[8];
+	int isc_ntxd_default[8];
+	int isc_ntxd_max[8];
 };
 
 typedef struct iflib_dma_info {
@@ -240,9 +254,9 @@ typedef enum {
 
 
 /*
- * Interface has a separate command queue
+ * Interface has a separate command queue for RX
  */
-#define IFLIB_HAS_CQ		0x1
+#define IFLIB_HAS_RXCQ		0x1
 /*
  * Driver has already allocated vectors
  */
@@ -252,6 +266,10 @@ typedef enum {
  * Interface is a virtual function
  */
 #define IFLIB_IS_VF		0x4
+/*
+ * Interface has a separate command queue for TX
+ */
+#define IFLIB_HAS_TXCQ		0x8
 
 
 /*
@@ -308,7 +326,10 @@ void iflib_irq_free(if_ctx_t ctx, if_irq_t irq);
 void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name);
 
 void iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask,
-			     task_fn_t *fn, char *name);
+			     gtask_fn_t *fn, char *name);
+
+void iflib_config_gtask_deinit(struct grouptask *gtask);
+
 
 
 void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid);
@@ -317,7 +338,7 @@ void iflib_admin_intr_deferred(if_ctx_t ctx);
 void iflib_iov_intr_deferred(if_ctx_t ctx);
 
 
-void iflib_link_state_change(if_ctx_t ctx, int linkstate);
+void iflib_link_state_change(if_ctx_t ctx, int linkstate, uint64_t baudrate);
 
 int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags);
 void iflib_dma_free(iflib_dma_info_t dma);
diff --git a/sys/net/netisr.c b/sys/net/netisr.c
index 318e1a3..931d86c 100644
--- a/sys/net/netisr.c
+++ b/sys/net/netisr.c
@@ -272,10 +272,7 @@ u_int
 netisr_get_cpuid(u_int cpunumber)
 {
 
-	KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber,
-	    nws_count));
-
-	return (nws_array[cpunumber]);
+	return (nws_array[cpunumber % nws_count]);
 }
 
 /*
@@ -810,10 +807,12 @@ netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy,
 		 * dispatch.  In the queued case, fall back on the SOURCE
 		 * policy.
 		 */
-		if (*cpuidp != NETISR_CPUID_NONE)
+		if (*cpuidp != NETISR_CPUID_NONE) {
+			*cpuidp = netisr_get_cpuid(*cpuidp);
 			return (m);
+		}
 		if (dispatch_policy == NETISR_DISPATCH_HYBRID) {
-			*cpuidp = curcpu;
+			*cpuidp = netisr_get_cpuid(curcpu);
 			return (m);
 		}
 		policy = NETISR_POLICY_SOURCE;
diff --git a/sys/net80211/ieee80211_ioctl.c b/sys/net80211/ieee80211_ioctl.c
index b65f23a..5d323c0 100644
--- a/sys/net80211/ieee80211_ioctl.c
+++ b/sys/net80211/ieee80211_ioctl.c
@@ -3394,10 +3394,12 @@ ieee80211_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 			 * Check if the MAC address was changed
 			 * via SIOCSIFLLADDR ioctl.
 			 */
+			if_addr_rlock(ifp);
 			if ((ifp->if_flags & IFF_UP) == 0 &&
 			    !IEEE80211_ADDR_EQ(vap->iv_myaddr, IF_LLADDR(ifp)))
 				IEEE80211_ADDR_COPY(vap->iv_myaddr,
 				    IF_LLADDR(ifp));
+			if_addr_runlock(ifp);
 		}
 		break;
 	case SIOCADDMULTI:
diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c
index 621b25e..88c0895 100644
--- a/sys/netinet/sctp_output.c
+++ b/sys/netinet/sctp_output.c
@@ -12639,7 +12639,10 @@ sctp_lower_sosend(struct socket *so,
 		}
 		SCTP_INP_RUNLOCK(inp);
 	} else if (sinfo_assoc_id) {
-		stcb = sctp_findassociation_ep_asocid(inp, sinfo_assoc_id, 0);
+		stcb = sctp_findassociation_ep_asocid(inp, sinfo_assoc_id, 1);
+		if (stcb != NULL) {
+			hold_tcblock = 1;
+		}
 	} else if (addr) {
 		/*-
 		 * Since we did not use findep we must
@@ -13404,6 +13407,10 @@ skip_preblock:
 			}
 		}
 		SCTP_TCB_SEND_LOCK(stcb);
+		if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+			SCTP_TCB_SEND_UNLOCK(stcb);
+			goto out_unlocked;
+		}
 		if (sp) {
 			if (sp->msg_is_complete == 0) {
 				strm->last_msg_incomplete = 1;
diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c
index fc2b9bb..77dd634 100644
--- a/sys/netinet/sctp_usrreq.c
+++ b/sys/netinet/sctp_usrreq.c
@@ -1506,11 +1506,6 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
 		sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
 	}
 	SCTP_TCB_UNLOCK(stcb);
-	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
-		stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
-		/* Set the connected flag so we can queue data */
-		soisconnecting(so);
-	}
 out_now:
 	if (creat_lock_on) {
 		SCTP_ASOC_CREATE_UNLOCK(inp);
diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c
index 148b047..4d5d497 100644
--- a/sys/netinet/sctputil.c
+++ b/sys/netinet/sctputil.c
@@ -1280,6 +1280,7 @@ sctp_iterator_work(struct sctp_iterator *it)
 
 	SCTP_INP_INFO_RLOCK();
 	SCTP_ITERATOR_LOCK();
+	sctp_it_ctl.cur_it = it;
 	if (it->inp) {
 		SCTP_INP_RLOCK(it->inp);
 		SCTP_INP_DECR_REF(it->inp);
@@ -1287,6 +1288,7 @@ sctp_iterator_work(struct sctp_iterator *it)
 	if (it->inp == NULL) {
 		/* iterator is complete */
 done_with_iterator:
+		sctp_it_ctl.cur_it = NULL;
 		SCTP_ITERATOR_UNLOCK();
 		SCTP_INP_INFO_RUNLOCK();
 		if (it->function_atend != NULL) {
@@ -1427,13 +1429,11 @@ sctp_iterator_worker(void)
 
 	sctp_it_ctl.iterator_running = 1;
 	TAILQ_FOREACH_SAFE(it, &sctp_it_ctl.iteratorhead, sctp_nxt_itr, nit) {
-		sctp_it_ctl.cur_it = it;
 		/* now lets work on this one */
 		TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
 		SCTP_IPI_ITERATOR_WQ_UNLOCK();
 		CURVNET_SET(it->vn);
 		sctp_iterator_work(it);
-		sctp_it_ctl.cur_it = NULL;
 		CURVNET_RESTORE();
 		SCTP_IPI_ITERATOR_WQ_LOCK();
 		/* sa_ignore FREED_MEMORY */
@@ -6356,7 +6356,7 @@ sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
 	struct sctp_tcb *stcb = NULL;
 	unsigned int incr, at, i;
 
-	at = incr = 0;
+	at = 0;
 	sa = addr;
 	*error = *num_v6 = *num_v4 = 0;
 	/* account and validate addresses */
@@ -6364,6 +6364,7 @@ sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
 		switch (sa->sa_family) {
 #ifdef INET
 		case AF_INET:
+			incr = (unsigned int)sizeof(struct sockaddr_in);
 			if (sa->sa_len != incr) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 				*error = EINVAL;
@@ -6371,7 +6372,6 @@ sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
 				return (NULL);
 			}
 			(*num_v4) += 1;
-			incr = (unsigned int)sizeof(struct sockaddr_in);
 			break;
 #endif
 #ifdef INET6
@@ -6387,6 +6387,7 @@ sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
 					*bad_addr = 1;
 					return (NULL);
 				}
+				incr = (unsigned int)sizeof(struct sockaddr_in6);
 				if (sa->sa_len != incr) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
 					*error = EINVAL;
@@ -6394,7 +6395,6 @@ sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
 					return (NULL);
 				}
 				(*num_v6) += 1;
-				incr = (unsigned int)sizeof(struct sockaddr_in6);
 				break;
 			}
 #endif
diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c
index b44fb9e..0904e86 100644
--- a/sys/netinet6/udp6_usrreq.c
+++ b/sys/netinet6/udp6_usrreq.c
@@ -898,7 +898,7 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
 
 		UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
 		UDPSTAT_INC(udps_opackets);
-		error = ip6_output(m, optp, &inp->inp_route6, flags,
+		error = ip6_output(m, optp, NULL, flags,
 		    inp->in6p_moptions, NULL, inp);
 		break;
 	case AF_INET:
diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c
index 52dfe7c..43ff56a 100644
--- a/sys/netpfil/ipfw/ip_fw_sockopt.c
+++ b/sys/netpfil/ipfw/ip_fw_sockopt.c
@@ -1414,8 +1414,10 @@ manage_sets(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
 
 	if (rh->range.head.length != sizeof(ipfw_range_tlv))
 		return (1);
-	if (rh->range.set >= IPFW_MAX_SETS ||
-	    rh->range.new_set >= IPFW_MAX_SETS)
+	/* enable_sets() expects bitmasks. */
+	if (op3->opcode != IP_FW_SET_ENABLE &&
+	    (rh->range.set >= IPFW_MAX_SETS ||
+	    rh->range.new_set >= IPFW_MAX_SETS))
 		return (EINVAL);
 
 	ret = 0;
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index da7d2c1..17797fa 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -2764,8 +2764,8 @@ pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
 	switch (af) {
 #ifdef INET
 	case AF_INET:
-		if ((a->addr32[0] < b->addr32[0]) ||
-		    (a->addr32[0] > e->addr32[0]))
+		if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) ||
+		    (ntohl(a->addr32[0]) > ntohl(e->addr32[0])))
 			return (0);
 		break;
 #endif /* INET */
@@ -2775,15 +2775,15 @@ pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
 
 		/* check a >= b */
 		for (i = 0; i < 4; ++i)
-			if (a->addr32[i] > b->addr32[i])
+			if (ntohl(a->addr32[i]) > ntohl(b->addr32[i]))
 				break;
-			else if (a->addr32[i] < b->addr32[i])
+			else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i]))
 				return (0);
 		/* check a <= e */
 		for (i = 0; i < 4; ++i)
-			if (a->addr32[i] < e->addr32[i])
+			if (ntohl(a->addr32[i]) < ntohl(e->addr32[i]))
 				break;
-			else if (a->addr32[i] > e->addr32[i])
+			else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i]))
 				return (0);
 		break;
 	}
diff --git a/sys/sys/_task.h b/sys/sys/_task.h
index ce89781..d3be719 100644
--- a/sys/sys/_task.h
+++ b/sys/sys/_task.h
@@ -42,6 +42,7 @@
  * (q)	taskqueue lock
  */
 typedef void task_fn_t(void *context, int pending);
+typedef void gtask_fn_t(void *context);
 
 struct task {
 	STAILQ_ENTRY(task) ta_link;	/* (q) link for queue */
@@ -51,8 +52,16 @@ struct task {
 	void	*ta_context;		/* (c) argument for handler */
 };
 
+struct gtask {
+	STAILQ_ENTRY(gtask) ta_link;	/* (q) link for queue */
+	uint16_t ta_flags;		/* (q) state flags */
+	u_short	ta_priority;		/* (c) Priority */
+	gtask_fn_t *ta_func;		/* (c) task handler */
+	void	*ta_context;		/* (c) argument for handler */
+};
+
 struct grouptask {
-	struct	task		gt_task;
+	struct	gtask		gt_task;
 	void			*gt_taskqueue;
 	LIST_ENTRY(grouptask)	gt_list;
 	void			*gt_uniq;
diff --git a/sys/sys/gtaskqueue.h b/sys/sys/gtaskqueue.h
new file mode 100644
index 0000000..88d4b54
--- /dev/null
+++ b/sys/sys/gtaskqueue.h
@@ -0,0 +1,125 @@
+/*-
+ * Copyright (c) 2014 Jeffrey Roberson <jeff@freebsd.org>
+ * Copyright (c) 2016 Matthew Macy <mmacy@nextbsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_GTASKQUEUE_H_
+#define _SYS_GTASKQUEUE_H_
+#include <sys/taskqueue.h>
+
+#ifndef _KERNEL
+#error "no user-servicable parts inside"
+#endif
+
+struct gtaskqueue;
+typedef void (*gtaskqueue_enqueue_fn)(void *context);
+
+/*
+ * Taskqueue groups.  Manages dynamic thread groups and irq binding for
+ * device and other tasks.
+ */
+
+void	gtaskqueue_block(struct gtaskqueue *queue);
+void	gtaskqueue_unblock(struct gtaskqueue *queue);
+
+int	gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask);
+void	gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *task);
+void	gtaskqueue_drain_all(struct gtaskqueue *queue);
+
+int grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *task);
+void	taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *grptask,
+	    void *uniq, int irq, char *name);
+int		taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *grptask,
+		void *uniq, int cpu, int irq, char *name);
+void	taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask);
+struct taskqgroup *taskqgroup_create(char *name);
+void	taskqgroup_destroy(struct taskqgroup *qgroup);
+int	taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride);
+
+#define TASK_ENQUEUED			0x1
+#define TASK_SKIP_WAKEUP		0x2
+
+
+#define GTASK_INIT(task, flags, priority, func, context) do {	\
+	(task)->ta_flags = flags;				\
+	(task)->ta_priority = (priority);		\
+	(task)->ta_func = (func);			\
+	(task)->ta_context = (context);			\
+} while (0)
+
+#define	GROUPTASK_INIT(gtask, priority, func, context)	\
+	GTASK_INIT(&(gtask)->gt_task, TASK_SKIP_WAKEUP, priority, func, context)
+
+#define	GROUPTASK_ENQUEUE(gtask)			\
+	grouptaskqueue_enqueue((gtask)->gt_taskqueue, &(gtask)->gt_task)
+
+#define TASKQGROUP_DECLARE(name)			\
+extern struct taskqgroup *qgroup_##name
+
+
+#ifdef EARLY_AP_STARTUP
+#define TASKQGROUP_DEFINE(name, cnt, stride)				\
+									\
+struct taskqgroup *qgroup_##name;					\
+									\
+static void								\
+taskqgroup_define_##name(void *arg)					\
+{									\
+	qgroup_##name = taskqgroup_create(#name);			\
+	taskqgroup_adjust(qgroup_##name, (cnt), (stride));		\
+}									\
+									\
+SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST,		\
+	taskqgroup_define_##name, NULL)
+#else
+#define TASKQGROUP_DEFINE(name, cnt, stride)				\
+									\
+struct taskqgroup *qgroup_##name;					\
+									\
+static void								\
+taskqgroup_define_##name(void *arg)					\
+{									\
+	qgroup_##name = taskqgroup_create(#name);			\
+}									\
+									\
+SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST,		\
+	taskqgroup_define_##name, NULL);				\
+									\
+static void								\
+taskqgroup_adjust_##name(void *arg)					\
+{									\
+	taskqgroup_adjust(qgroup_##name, (cnt), (stride));		\
+}									\
+									\
+SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY,		\
+	taskqgroup_adjust_##name, NULL);				\
+									\
+struct __hack
+#endif
+TASKQGROUP_DECLARE(net);
+
+#endif /* !_SYS_GTASKQUEUE_H_ */
diff --git a/sys/sys/param.h b/sys/sys/param.h
index 0343508..ee25c7a 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -58,7 +58,7 @@
  *		in the range 5 to 9.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1100500	/* Master, propagated to newvers */
+#define __FreeBSD_version 1100501	/* Master, propagated to newvers */
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index e25a64b..153b9f9 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -422,6 +422,7 @@ do {									\
 #define	TDB_CHILD	0x00000100 /* New child indicator for ptrace() */
 #define	TDB_BORN	0x00000200 /* New LWP indicator for ptrace() */
 #define	TDB_EXIT	0x00000400 /* Exiting LWP indicator for ptrace() */
+#define	TDB_VFORK	0x00000800 /* vfork indicator for ptrace() */
 #define	TDB_FSTP	0x00001000 /* The thread is PT_ATTACH leader */
 
 /*
@@ -1004,7 +1005,7 @@ void	proc_linkup(struct proc *p, struct thread *td);
 struct proc *proc_realparent(struct proc *child);
 void	proc_reap(struct thread *td, struct proc *p, int *status, int options);
 void	proc_reparent(struct proc *child, struct proc *newparent);
-void	proc_set_traced(struct proc *p);
+void	proc_set_traced(struct proc *p, bool stop);
 struct	pstats *pstats_alloc(void);
 void	pstats_fork(struct pstats *src, struct pstats *dst);
 void	pstats_free(struct pstats *ps);
diff --git a/sys/sys/ptrace.h b/sys/sys/ptrace.h
index 0cf25e8..e2045c8 100644
--- a/sys/sys/ptrace.h
+++ b/sys/sys/ptrace.h
@@ -89,6 +89,7 @@
 #define	PTRACE_SYSCALL	(PTRACE_SCE | PTRACE_SCX)
 #define	PTRACE_FORK	0x0008
 #define	PTRACE_LWP	0x0010
+#define	PTRACE_VFORK	0x0020
 
 #define	PTRACE_DEFAULT	(PTRACE_EXEC)
 
@@ -124,6 +125,8 @@ struct ptrace_lwpinfo {
 #define	PL_FLAG_CHILD	0x80	/* I am from child */
 #define	PL_FLAG_BORN	0x100	/* new LWP */
 #define	PL_FLAG_EXITED	0x200	/* exiting LWP */
+#define	PL_FLAG_VFORKED	0x400	/* new child via vfork */
+#define	PL_FLAG_VFORK_DONE 0x800 /* vfork parent has resumed */
 	sigset_t	pl_sigmask;	/* LWP signal mask */
 	sigset_t	pl_siglist;	/* LWP pending signal */
 	struct __siginfo pl_siginfo;	/* siginfo for signal */
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index f47ba2d..caffd7b 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -74,7 +74,7 @@ extern int vm_guest;		/* Running as virtual machine guest? */
  * Keep in sync with vm_guest_sysctl_names[].
  */
 enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN, VM_GUEST_HV,
-		VM_GUEST_VMWARE, VM_LAST };
+		VM_GUEST_VMWARE, VM_GUEST_KVM, VM_LAST };
 
 #if defined(WITNESS) || defined(INVARIANT_SUPPORT)
 void	kassert_panic(const char *fmt, ...)  __printflike(1, 2);
diff --git a/sys/sys/taskqueue.h b/sys/sys/taskqueue.h
index c986ffb..a6c6655 100644
--- a/sys/sys/taskqueue.h
+++ b/sys/sys/taskqueue.h
@@ -204,78 +204,4 @@ struct taskqueue *taskqueue_create_fast(const char *name, int mflags,
 				    taskqueue_enqueue_fn enqueue,
 				    void *context);
 
-/*
- * Taskqueue groups.  Manages dynamic thread groups and irq binding for
- * device and other tasks.
- */
-int grouptaskqueue_enqueue(struct taskqueue *queue, struct task *task);
-void	taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
-	    void *uniq, int irq, char *name);
-int		taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
-		void *uniq, int cpu, int irq, char *name);
-void	taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask);
-struct taskqgroup *taskqgroup_create(char *name);
-void	taskqgroup_destroy(struct taskqgroup *qgroup);
-int	taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride);
-
-#define TASK_SKIP_WAKEUP		0x1
-
-#define GTASK_INIT(task, priority, func, context) do {	\
-	(task)->ta_pending = 0;				\
-	(task)->ta_priority = (priority);		\
-	(task)->ta_func = (func);			\
-	(task)->ta_context = (context);			\
-} while (0)
-
-#define	GROUPTASK_INIT(gtask, priority, func, context)	\
-	GTASK_INIT(&(gtask)->gt_task, priority, func, context)
-
-#define	GROUPTASK_ENQUEUE(gtask)			\
-	grouptaskqueue_enqueue((gtask)->gt_taskqueue, &(gtask)->gt_task)
-
-#define TASKQGROUP_DECLARE(name)			\
-extern struct taskqgroup *qgroup_##name
-
-#ifdef EARLY_AP_STARTUP
-#define TASKQGROUP_DEFINE(name, cnt, stride)				\
-									\
-struct taskqgroup *qgroup_##name;					\
-									\
-static void								\
-taskqgroup_define_##name(void *arg)					\
-{									\
-	qgroup_##name = taskqgroup_create(#name);			\
-	taskqgroup_adjust(qgroup_##name, (cnt), (stride));		\
-}									\
-									\
-SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST,		\
-	taskqgroup_define_##name, NULL)
-#else
-#define TASKQGROUP_DEFINE(name, cnt, stride)				\
-									\
-struct taskqgroup *qgroup_##name;					\
-									\
-static void								\
-taskqgroup_define_##name(void *arg)					\
-{									\
-	qgroup_##name = taskqgroup_create(#name);			\
-}									\
-									\
-SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST,		\
-	taskqgroup_define_##name, NULL);				\
-									\
-static void								\
-taskqgroup_adjust_##name(void *arg)					\
-{									\
-	taskqgroup_adjust(qgroup_##name, (cnt), (stride));		\
-}									\
-									\
-SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY,		\
-	taskqgroup_adjust_##name, NULL);				\
-									\
-struct __hack
-#endif
-
-TASKQGROUP_DECLARE(net);
-
 #endif /* !_SYS_TASKQUEUE_H_ */
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index 8551085..04e0ae9 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -255,6 +255,8 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
 		}
 		pref = newb + fs->fs_frag;
 		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
 		*allocblk++ = nb;
 		*lbns_remfree++ = indirs[1].in_lbn;
 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
@@ -309,7 +311,7 @@ retry:
 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
 			brelse(bp);
-			if (++reclaimed == 1) {
+			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
 				UFS_LOCK(ump);
 				softdep_request_cleanup(fs, vp, cred,
 				    FLUSH_BLOCKS_WAIT);
@@ -325,6 +327,8 @@ retry:
 		}
 		pref = newb + fs->fs_frag;
 		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
 		*allocblk++ = nb;
 		*lbns_remfree++ = indirs[i].in_lbn;
 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
@@ -386,7 +390,7 @@ retry:
 		    flags | IO_BUFLOCKED, cred, &newb);
 		if (error) {
 			brelse(bp);
-			if (++reclaimed == 1) {
+			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
 				UFS_LOCK(ump);
 				softdep_request_cleanup(fs, vp, cred,
 				    FLUSH_BLOCKS_WAIT);
@@ -401,6 +405,8 @@ retry:
 			goto fail;
 		}
 		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
 		*allocblk++ = nb;
 		*lbns_remfree++ = lbn;
 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
@@ -478,10 +484,16 @@ fail:
 		 * We shall not leave the freed blocks on the vnode
 		 * buffer object lists.
 		 */
-		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
+		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+		    GB_NOCREAT | GB_UNMAPPED);
 		if (bp != NULL) {
-			bp->b_flags |= (B_INVAL | B_RELBUF);
-			bp->b_flags &= ~B_ASYNC;
+			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
+			    ("mismatch1 l %jd %jd b %ju %ju",
+			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
+			    (uintmax_t)bp->b_blkno,
+			    (uintmax_t)fsbtodb(fs, *blkp)));
+			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
+			bp->b_flags &= ~(B_ASYNC | B_CACHE);
 			brelse(bp);
 		}
 		deallocated += fs->fs_bsize;
@@ -524,6 +536,18 @@ fail:
 	 * cleared, free the blocks.
 	 */
 	for (blkp = allociblk; blkp < allocblk; blkp++) {
+#ifdef INVARIANTS
+		if (blkp == allociblk)
+			lbns_remfree = lbns;
+		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+		    GB_NOCREAT | GB_UNMAPPED);
+		if (bp != NULL) {
+			panic("zombie1 %jd %ju %ju",
+			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
+			    (uintmax_t)fsbtodb(fs, *blkp));
+		}
+		lbns_remfree++;
+#endif
 		ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
 		    ip->i_number, vp->v_type, NULL);
 	}
@@ -818,6 +842,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
 		}
 		pref = newb + fs->fs_frag;
 		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
 		*allocblk++ = nb;
 		*lbns_remfree++ = indirs[1].in_lbn;
 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
@@ -873,7 +899,7 @@ retry:
 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
 			brelse(bp);
-			if (++reclaimed == 1) {
+			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
 				UFS_LOCK(ump);
 				softdep_request_cleanup(fs, vp, cred,
 				    FLUSH_BLOCKS_WAIT);
@@ -889,6 +915,8 @@ retry:
 		}
 		pref = newb + fs->fs_frag;
 		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
 		*allocblk++ = nb;
 		*lbns_remfree++ = indirs[i].in_lbn;
 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
@@ -951,7 +979,7 @@ retry:
 		    flags | IO_BUFLOCKED, cred, &newb);
 		if (error) {
 			brelse(bp);
-			if (++reclaimed == 1) {
+			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
 				UFS_LOCK(ump);
 				softdep_request_cleanup(fs, vp, cred,
 				    FLUSH_BLOCKS_WAIT);
@@ -966,6 +994,8 @@ retry:
 			goto fail;
 		}
 		nb = newb;
+		MPASS(allocblk < allociblk + nitems(allociblk));
+		MPASS(lbns_remfree < lbns + nitems(lbns));
 		*allocblk++ = nb;
 		*lbns_remfree++ = lbn;
 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
@@ -1049,10 +1079,16 @@ fail:
 		 * We shall not leave the freed blocks on the vnode
 		 * buffer object lists.
 		 */
-		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
+		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+		    GB_NOCREAT | GB_UNMAPPED);
 		if (bp != NULL) {
-			bp->b_flags |= (B_INVAL | B_RELBUF);
-			bp->b_flags &= ~B_ASYNC;
+			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
+			    ("mismatch2 l %jd %jd b %ju %ju",
+			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
+			    (uintmax_t)bp->b_blkno,
+			    (uintmax_t)fsbtodb(fs, *blkp)));
+			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
+			bp->b_flags &= ~(B_ASYNC | B_CACHE);
 			brelse(bp);
 		}
 		deallocated += fs->fs_bsize;
@@ -1095,6 +1131,18 @@ fail:
 	 * cleared, free the blocks.
 	 */
 	for (blkp = allociblk; blkp < allocblk; blkp++) {
+#ifdef INVARIANTS
+		if (blkp == allociblk)
+			lbns_remfree = lbns;
+		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+		    GB_NOCREAT | GB_UNMAPPED);
+		if (bp != NULL) {
+			panic("zombie2 %jd %ju %ju",
+			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
+			    (uintmax_t)fsbtodb(fs, *blkp));
+		}
+		lbns_remfree++;
+#endif
 		ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
 		    ip->i_number, vp->v_type, NULL);
 	}
diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c
index 48e07ea..152866f 100644
--- a/sys/x86/x86/identcpu.c
+++ b/sys/x86/x86/identcpu.c
@@ -1300,6 +1300,8 @@ identify_hypervisor(void)
 				vm_guest = VM_GUEST_VMWARE;
 			else if (strcmp(hv_vendor, "Microsoft Hv") == 0)
 				vm_guest = VM_GUEST_HV;
+			else if (strcmp(hv_vendor, "KVMKVMKVM") == 0)
+				vm_guest = VM_GUEST_KVM;
 		}
 		return;
 	}
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index 203e9d0..cd774df 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -499,8 +499,7 @@ native_lapic_init(vm_paddr_t addr)
 	ver = lapic_read32(LAPIC_VERSION);
 	if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) {
 		lapic_eoi_suppression = 1;
-		if (vm_guest == VM_GUEST_VM &&
-		    !strcmp(hv_vendor, "KVMKVMKVM")) {
+		if (vm_guest == VM_GUEST_KVM) {
 			if (bootverbose)
 				printf(
 		       "KVM -- disabling lapic eoi suppression\n");