summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/pmap.c2
-rw-r--r--sys/arm/allwinner/a10_ehci.c2
-rw-r--r--sys/boot/efi/loader/Makefile12
-rw-r--r--sys/conf/files1
-rw-r--r--sys/conf/files.amd645
-rw-r--r--sys/conf/files.i3865
-rw-r--r--sys/conf/kern.post.mk11
-rw-r--r--sys/dev/ahci/ahci.c4
-rw-r--r--sys/dev/ahci/ahci_pci.c2
-rw-r--r--sys/dev/e1000/e1000_api.c4
-rw-r--r--sys/dev/e1000/e1000_hw.h8
-rw-r--r--sys/dev/e1000/e1000_ich8lan.c17
-rw-r--r--sys/dev/e1000/e1000_ich8lan.h2
-rw-r--r--sys/dev/e1000/e1000_phy.c22
-rw-r--r--sys/dev/e1000/if_em.c6
-rw-r--r--sys/dev/filemon/filemon.c2
-rw-r--r--sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c155
-rw-r--r--sys/dev/hyperv/storvsc/hv_vstorage.h6
-rw-r--r--sys/dev/ntb/if_ntb/if_ntb.c1822
-rw-r--r--sys/dev/ntb/ntb.c462
-rw-r--r--sys/dev/ntb/ntb.h409
-rw-r--r--sys/dev/ntb/ntb_hw/ntb_hw.c1356
-rw-r--r--sys/dev/ntb/ntb_hw/ntb_hw.h125
-rw-r--r--sys/dev/ntb/ntb_hw/ntb_regs.h3
-rw-r--r--sys/dev/ntb/ntb_if.m210
-rw-r--r--sys/dev/ntb/ntb_transport.c1518
-rw-r--r--sys/dev/ntb/ntb_transport.h61
-rw-r--r--sys/dev/pci/pci_pci.c49
-rw-r--r--sys/i386/i386/pmap.c2
-rw-r--r--sys/kern/capabilities.conf2
-rw-r--r--sys/kern/kern_fork.c3
-rw-r--r--sys/kern/kern_sig.c2
-rw-r--r--sys/kern/subr_gtaskqueue.c864
-rw-r--r--sys/kern/subr_param.c1
-rw-r--r--sys/kern/subr_syscall.c8
-rw-r--r--sys/kern/subr_taskqueue.c360
-rw-r--r--sys/kern/sys_process.c17
-rw-r--r--sys/kern/vfs_aio.c7
-rw-r--r--sys/kern/vfs_default.c1
-rw-r--r--sys/modules/ntb/Makefile2
-rw-r--r--sys/modules/ntb/ntb/Makefile9
-rw-r--r--sys/modules/ntb/ntb_hw/Makefile2
-rw-r--r--sys/modules/ntb/ntb_transport/Makefile9
-rw-r--r--sys/net/if_bridge.c3
-rw-r--r--sys/net/if_epair.c4
-rw-r--r--sys/net/ifdi_if.m5
-rw-r--r--sys/net/iflib.c541
-rw-r--r--sys/net/iflib.h45
-rw-r--r--sys/net/netisr.c11
-rw-r--r--sys/net80211/ieee80211_ioctl.c2
-rw-r--r--sys/netinet/sctp_output.c9
-rw-r--r--sys/netinet/sctp_usrreq.c5
-rw-r--r--sys/netinet/sctputil.c10
-rw-r--r--sys/netinet6/udp6_usrreq.c2
-rw-r--r--sys/netpfil/ipfw/ip_fw_sockopt.c6
-rw-r--r--sys/netpfil/pf/pf.c12
-rw-r--r--sys/sys/_task.h11
-rw-r--r--sys/sys/gtaskqueue.h125
-rw-r--r--sys/sys/param.h2
-rw-r--r--sys/sys/proc.h3
-rw-r--r--sys/sys/ptrace.h3
-rw-r--r--sys/sys/systm.h2
-rw-r--r--sys/sys/taskqueue.h74
-rw-r--r--sys/ufs/ffs/ffs_balloc.c68
-rw-r--r--sys/x86/x86/identcpu.c2
-rw-r--r--sys/x86/x86/local_apic.c3
66 files changed, 5261 insertions, 3257 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 6062bfd..f87d3b5 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1224,7 +1224,7 @@ pmap_init(void)
* include at least one feature that is only supported by older Intel
* or newer AMD processors.
*/
- if (vm_guest == VM_GUEST_VM && (cpu_feature & CPUID_SS) == 0 &&
+ if (vm_guest != VM_GUEST_NO && (cpu_feature & CPUID_SS) == 0 &&
(cpu_feature2 & (CPUID2_SSSE3 | CPUID2_SSE41 | CPUID2_AESNI |
CPUID2_AVX | CPUID2_XSAVE)) == 0 && (amd_feature2 & (AMDID2_XOP |
AMDID2_FMA4)) == 0)
diff --git a/sys/arm/allwinner/a10_ehci.c b/sys/arm/allwinner/a10_ehci.c
index 95e5990..3ad3d98 100644
--- a/sys/arm/allwinner/a10_ehci.c
+++ b/sys/arm/allwinner/a10_ehci.c
@@ -360,7 +360,7 @@ static device_method_t ehci_methods[] = {
static driver_t ehci_driver = {
.name = "ehci",
.methods = ehci_methods,
- .size = sizeof(ehci_softc_t),
+ .size = sizeof(struct aw_ehci_softc),
};
static devclass_t ehci_devclass;
diff --git a/sys/boot/efi/loader/Makefile b/sys/boot/efi/loader/Makefile
index 6b1b4ac..2f54032 100644
--- a/sys/boot/efi/loader/Makefile
+++ b/sys/boot/efi/loader/Makefile
@@ -63,6 +63,18 @@ CFLAGS+= -DNO_PCI -DEFI
LIBSTAND= ${.OBJDIR}/../../../../lib/libstand/libstand.a
.endif
+.if !defined(BOOT_HIDE_SERIAL_NUMBERS)
+# Export serial numbers, UUID, and asset tag from loader.
+CFLAGS+= -DSMBIOS_SERIAL_NUMBERS
+.if defined(BOOT_LITTLE_ENDIAN_UUID)
+# Use little-endian UUID format as defined in SMBIOS 2.6.
+CFLAGS+= -DSMBIOS_LITTLE_ENDIAN_UUID
+.elif defined(BOOT_NETWORK_ENDIAN_UUID)
+# Use network-endian UUID format for backward compatibility.
+CFLAGS+= -DSMBIOS_NETWORK_ENDIAN_UUID
+.endif
+.endif
+
.if ${MK_FORTH} != "no"
BOOT_FORTH= yes
CFLAGS+= -DBOOT_FORTH
diff --git a/sys/conf/files b/sys/conf/files
index 58bc490..448060c 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3345,6 +3345,7 @@ kern/subr_disk.c standard
kern/subr_eventhandler.c standard
kern/subr_fattime.c standard
kern/subr_firmware.c optional firmware
+kern/subr_gtaskqueue.c standard
kern/subr_hash.c standard
kern/subr_hints.c standard
kern/subr_kdb.c standard
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index defd5ac..186ea7d 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -287,7 +287,10 @@ dev/hyperv/vmbus/amd64/hyperv_machdep.c optional hyperv
dev/hyperv/vmbus/amd64/vmbus_vector.S optional hyperv
dev/nfe/if_nfe.c optional nfe pci
dev/ntb/if_ntb/if_ntb.c optional if_ntb
-dev/ntb/ntb_hw/ntb_hw.c optional if_ntb | ntb_hw
+dev/ntb/ntb_transport.c optional if_ntb
+dev/ntb/ntb.c optional if_ntb | ntb_hw
+dev/ntb/ntb_if.m optional if_ntb | ntb_hw
+dev/ntb/ntb_hw/ntb_hw.c optional ntb_hw
dev/nvd/nvd.c optional nvd nvme
dev/nvme/nvme.c optional nvme
dev/nvme/nvme_ctrlr.c optional nvme
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index bf6f1f0..85df7d0 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -277,7 +277,10 @@ dev/mse/mse.c optional mse
dev/mse/mse_isa.c optional mse isa
dev/nfe/if_nfe.c optional nfe pci
dev/ntb/if_ntb/if_ntb.c optional if_ntb
-dev/ntb/ntb_hw/ntb_hw.c optional if_ntb | ntb_hw
+dev/ntb/ntb_transport.c optional if_ntb
+dev/ntb/ntb.c optional if_ntb | ntb_hw
+dev/ntb/ntb_if.m optional if_ntb | ntb_hw
+dev/ntb/ntb_hw/ntb_hw.c optional ntb_hw
dev/nvd/nvd.c optional nvd nvme
dev/nvme/nvme.c optional nvme
dev/nvme/nvme_ctrlr.c optional nvme
diff --git a/sys/conf/kern.post.mk b/sys/conf/kern.post.mk
index 4cb60c0..f44645c 100644
--- a/sys/conf/kern.post.mk
+++ b/sys/conf/kern.post.mk
@@ -65,6 +65,10 @@ OSRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \
# Keep the related ports builds in the obj directory so that they are only rebuilt once per kernel build
WRKDIRPREFIX?= ${MAKEOBJDIRPREFIX}${SRC_BASE}/sys/${KERNCONF}
PORTSMODULESENV=\
+ env \
+ -u CC \
+ -u CXX \
+ -u CPP \
PATH=${PATH}:${LOCALBASE}/bin:${LOCALBASE}/sbin \
SRC_BASE=${SRC_BASE} \
OSVERSION=${OSRELDATE} \
@@ -192,12 +196,13 @@ ${SYSTEM_OBJS} genassym.o vers.o: opt_global.h
.if !empty(.MAKE.MODE:Unormal:Mmeta) && empty(.MAKE.MODE:Unormal:Mnofilemon)
_meta_filemon= 1
.endif
-# Skip reading .depend when not needed to speed up tree-walks
-# and simple lookups.
+# Skip reading .depend when not needed to speed up tree-walks and simple
+# lookups. For install, only do this if no other targets are specified.
# Also skip generating or including .depend.* files if in meta+filemon mode
# since it will track dependencies itself. OBJS_DEPEND_GUESS is still used.
.if !empty(.MAKEFLAGS:M-V${_V_READ_DEPEND}) || make(obj) || make(clean*) || \
- make(install*) || make(kernel-obj) || make(kernel-clean*) || \
+ ${.TARGETS:M*install*} == ${.TARGETS} || \
+ make(kernel-obj) || make(kernel-clean*) || \
make(kernel-install*) || defined(_meta_filemon)
_SKIP_READ_DEPEND= 1
.MAKE.DEPENDFILE= /dev/null
diff --git a/sys/dev/ahci/ahci.c b/sys/dev/ahci/ahci.c
index ec42b3c..ee0e957 100644
--- a/sys/dev/ahci/ahci.c
+++ b/sys/dev/ahci/ahci.c
@@ -416,7 +416,8 @@ ahci_setup_interrupt(device_t dev)
else if (ctlr->numirqs == 1 || i >= ctlr->channels ||
(ctlr->ccc && i == ctlr->cccv))
ctlr->irqs[i].mode = AHCI_IRQ_MODE_ALL;
- else if (i == ctlr->numirqs - 1)
+ else if (ctlr->channels > ctlr->numirqs &&
+ i == ctlr->numirqs - 1)
ctlr->irqs[i].mode = AHCI_IRQ_MODE_AFTER;
else
ctlr->irqs[i].mode = AHCI_IRQ_MODE_ONE;
@@ -465,6 +466,7 @@ ahci_intr(void *data)
} else { /* AHCI_IRQ_MODE_AFTER */
unit = irq->r_irq_rid - 1;
is = ATA_INL(ctlr->r_mem, AHCI_IS);
+ is &= (0xffffffff << unit);
}
/* CCC interrupt is edge triggered. */
if (ctlr->ccc)
diff --git a/sys/dev/ahci/ahci_pci.c b/sys/dev/ahci/ahci_pci.c
index 6b6a1a6..4a44484 100644
--- a/sys/dev/ahci/ahci_pci.c
+++ b/sys/dev/ahci/ahci_pci.c
@@ -187,7 +187,7 @@ static const struct {
{0xa10f8086, 0x00, "Intel Sunrise Point (RAID)", 0},
{0x23238086, 0x00, "Intel DH89xxCC", 0},
{0x2360197b, 0x00, "JMicron JMB360", 0},
- {0x2361197b, 0x00, "JMicron JMB361", AHCI_Q_NOFORCE},
+ {0x2361197b, 0x00, "JMicron JMB361", AHCI_Q_NOFORCE | AHCI_Q_1CH},
{0x2362197b, 0x00, "JMicron JMB362", 0},
{0x2363197b, 0x00, "JMicron JMB363", AHCI_Q_NOFORCE},
{0x2365197b, 0x00, "JMicron JMB365", AHCI_Q_NOFORCE},
diff --git a/sys/dev/e1000/e1000_api.c b/sys/dev/e1000/e1000_api.c
index 28379cc..52e2609 100644
--- a/sys/dev/e1000/e1000_api.c
+++ b/sys/dev/e1000/e1000_api.c
@@ -304,6 +304,10 @@ s32 e1000_set_mac_type(struct e1000_hw *hw)
case E1000_DEV_ID_PCH_SPT_I219_LM2:
case E1000_DEV_ID_PCH_SPT_I219_V2:
case E1000_DEV_ID_PCH_LBG_I219_LM3:
+ case E1000_DEV_ID_PCH_SPT_I219_LM4:
+ case E1000_DEV_ID_PCH_SPT_I219_V4:
+ case E1000_DEV_ID_PCH_SPT_I219_LM5:
+ case E1000_DEV_ID_PCH_SPT_I219_V5:
mac->type = e1000_pch_spt;
break;
case E1000_DEV_ID_82575EB_COPPER:
diff --git a/sys/dev/e1000/e1000_hw.h b/sys/dev/e1000/e1000_hw.h
index 1792e14..e1464a7 100644
--- a/sys/dev/e1000/e1000_hw.h
+++ b/sys/dev/e1000/e1000_hw.h
@@ -142,6 +142,10 @@ struct e1000_hw;
#define E1000_DEV_ID_PCH_SPT_I219_LM2 0x15B7 /* Sunrise Point-H PCH */
#define E1000_DEV_ID_PCH_SPT_I219_V2 0x15B8 /* Sunrise Point-H PCH */
#define E1000_DEV_ID_PCH_LBG_I219_LM3 0x15B9 /* LEWISBURG PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_LM4 0x15D7
+#define E1000_DEV_ID_PCH_SPT_I219_V4 0x15D8
+#define E1000_DEV_ID_PCH_SPT_I219_LM5 0x15E3
+#define E1000_DEV_ID_PCH_SPT_I219_V5 0x15D6
#define E1000_DEV_ID_82576 0x10C9
#define E1000_DEV_ID_82576_FIBER 0x10E6
#define E1000_DEV_ID_82576_SERDES 0x10E7
@@ -957,9 +961,13 @@ struct e1000_dev_spec_ich8lan {
E1000_MUTEX nvm_mutex;
E1000_MUTEX swflag_mutex;
bool nvm_k1_enabled;
+ bool disable_k1_off;
bool eee_disable;
u16 eee_lp_ability;
enum e1000_ulp_state ulp_state;
+ bool ulp_capability_disabled;
+ bool during_suspend_flow;
+ bool during_dpg_exit;
};
struct e1000_dev_spec_82575 {
diff --git a/sys/dev/e1000/e1000_ich8lan.c b/sys/dev/e1000/e1000_ich8lan.c
index ae97a8c..4c50ce2 100644
--- a/sys/dev/e1000/e1000_ich8lan.c
+++ b/sys/dev/e1000/e1000_ich8lan.c
@@ -288,7 +288,7 @@ static void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw)
mac_reg &= ~E1000_CTRL_LANPHYPC_VALUE;
E1000_WRITE_REG(hw, E1000_CTRL, mac_reg);
E1000_WRITE_FLUSH(hw);
- usec_delay(10);
+ msec_delay(1);
mac_reg &= ~E1000_CTRL_LANPHYPC_OVERRIDE;
E1000_WRITE_REG(hw, E1000_CTRL, mac_reg);
E1000_WRITE_FLUSH(hw);
@@ -1625,7 +1625,17 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
hw->phy.ops.write_reg_locked(hw,
I217_PLL_CLOCK_GATE_REG,
phy_reg);
- }
+
+ if (speed == SPEED_1000) {
+ hw->phy.ops.read_reg_locked(hw, HV_PM_CTRL,
+ &phy_reg);
+
+ phy_reg |= HV_PM_CTRL_K1_CLK_REQ;
+
+ hw->phy.ops.write_reg_locked(hw, HV_PM_CTRL,
+ phy_reg);
+ }
+ }
hw->phy.ops.release(hw);
if (ret_val)
@@ -1718,7 +1728,8 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
u32 pcieanacfg = E1000_READ_REG(hw, E1000_PCIEANACFG);
u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6);
- if (pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE)
+ if ((pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE) &&
+ (hw->dev_spec.ich8lan.disable_k1_off == FALSE))
fextnvm6 |= E1000_FEXTNVM6_K1_OFF_ENABLE;
else
fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE;
diff --git a/sys/dev/e1000/e1000_ich8lan.h b/sys/dev/e1000/e1000_ich8lan.h
index edc1dd1..6d81291 100644
--- a/sys/dev/e1000/e1000_ich8lan.h
+++ b/sys/dev/e1000/e1000_ich8lan.h
@@ -239,7 +239,7 @@
/* PHY Power Management Control */
#define HV_PM_CTRL PHY_REG(770, 17)
-#define HV_PM_CTRL_PLL_STOP_IN_K1_GIGA 0x100
+#define HV_PM_CTRL_K1_CLK_REQ 0x200
#define HV_PM_CTRL_K1_ENABLE 0x4000
#define I217_PLL_CLOCK_GATE_REG PHY_REG(772, 28)
diff --git a/sys/dev/e1000/e1000_phy.c b/sys/dev/e1000/e1000_phy.c
index 847d315..9684b43 100644
--- a/sys/dev/e1000/e1000_phy.c
+++ b/sys/dev/e1000/e1000_phy.c
@@ -4146,12 +4146,13 @@ s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data)
*data = E1000_READ_REG(hw, E1000_MPHY_DATA);
/* Disable access to mPHY if it was originally disabled */
- if (locked) {
+ if (locked)
ready = e1000_is_mphy_ready(hw);
- if (!ready)
- return -E1000_ERR_PHY;
- }
- E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, E1000_MPHY_DIS_ACCESS);
+ if (!ready)
+ return -E1000_ERR_PHY;
+ E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
+ E1000_MPHY_DIS_ACCESS);
+
return E1000_SUCCESS;
}
@@ -4210,12 +4211,13 @@ s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data,
E1000_WRITE_REG(hw, E1000_MPHY_DATA, data);
/* Disable access to mPHY if it was originally disabled */
- if (locked) {
+ if (locked)
ready = e1000_is_mphy_ready(hw);
- if (!ready)
- return -E1000_ERR_PHY;
- }
- E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, E1000_MPHY_DIS_ACCESS);
+ if (!ready)
+ return -E1000_ERR_PHY;
+ E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL,
+ E1000_MPHY_DIS_ACCESS);
+
return E1000_SUCCESS;
}
diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c
index b8e9750..7e2690e 100644
--- a/sys/dev/e1000/if_em.c
+++ b/sys/dev/e1000/if_em.c
@@ -193,6 +193,12 @@ static em_vendor_info_t em_vendor_info_array[] =
{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
{ 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
+ PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
+ PCI_ANY_ID, PCI_ANY_ID, 0},
+ { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
/* required last entry */
{ 0, 0, 0, 0, 0}
};
diff --git a/sys/dev/filemon/filemon.c b/sys/dev/filemon/filemon.c
index 919af9d..26e1bc3 100644
--- a/sys/dev/filemon/filemon.c
+++ b/sys/dev/filemon/filemon.c
@@ -137,6 +137,8 @@ filemon_proc_get(struct proc *p)
{
struct filemon *filemon;
+ if (p->p_filemon == NULL)
+ return (NULL);
PROC_LOCK(p);
filemon = filemon_acquire(p->p_filemon);
PROC_UNLOCK(p);
diff --git a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
index 8f3efef..b79e10c 100644
--- a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
+++ b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
@@ -810,6 +810,7 @@ hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
* because the fields will be used later in storvsc_io_done().
*/
request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
+ request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status;
request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
@@ -1945,28 +1946,6 @@ create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
return(0);
}
-/*
- * SCSI Inquiry checks qualifier and type.
- * If qualifier is 011b, means the device server is not capable
- * of supporting a peripheral device on this logical unit, and
- * the type should be set to 1Fh.
- *
- * Return 1 if it is valid, 0 otherwise.
- */
-static inline int
-is_inquiry_valid(const struct scsi_inquiry_data *inq_data)
-{
- uint8_t type;
- if (SID_QUAL(inq_data) != SID_QUAL_LU_CONNECTED) {
- return (0);
- }
- type = SID_TYPE(inq_data);
- if (type == T_NODEVICE) {
- return (0);
- }
- return (1);
-}
-
/**
* @brief completion function before returning to CAM
*
@@ -1985,7 +1964,6 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
bus_dma_segment_t *ori_sglist = NULL;
int ori_sg_count = 0;
-
/* destroy bounce buffer if it is used */
if (reqp->bounce_sgl_count) {
ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
@@ -2040,88 +2018,71 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
ccb->ccb_h.status &= ~CAM_STATUS_MASK;
if (vm_srb->scsi_status == SCSI_STATUS_OK) {
const struct scsi_generic *cmd;
- /*
- * Check whether the data for INQUIRY cmd is valid or
- * not. Windows 10 and Windows 2016 send all zero
- * inquiry data to VM even for unpopulated slots.
- */
+
+ if (vm_srb->srb_status != SRB_STATUS_SUCCESS) {
+ if (vm_srb->srb_status == SRB_STATUS_INVALID_LUN) {
+ xpt_print(ccb->ccb_h.path, "invalid LUN %d\n",
+ vm_srb->lun);
+ } else {
+ xpt_print(ccb->ccb_h.path, "Unknown SRB flag: %d\n",
+ vm_srb->srb_status);
+ }
+ /*
+ * If there are errors, for example, invalid LUN,
+ * host will inform VM through SRB status.
+ */
+ ccb->ccb_h.status |= CAM_SEL_TIMEOUT;
+ } else {
+ ccb->ccb_h.status |= CAM_REQ_CMP;
+ }
+
cmd = (const struct scsi_generic *)
((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
if (cmd->opcode == INQUIRY) {
- /*
- * The host of Windows 10 or 2016 server will response
- * the inquiry request with invalid data for unexisted device:
- [0x7f 0x0 0x5 0x2 0x1f ... ]
- * But on windows 2012 R2, the response is:
- [0x7f 0x0 0x0 0x0 0x0 ]
- * That is why here wants to validate the inquiry response.
- * The validation will skip the INQUIRY whose response is short,
- * which is less than SHORT_INQUIRY_LENGTH (36).
- *
- * For more information about INQUIRY, please refer to:
- * ftp://ftp.avc-pioneer.com/Mtfuji_7/Proposal/Jun09/INQUIRY.pdf
- */
- struct scsi_inquiry_data *inq_data =
- (struct scsi_inquiry_data *)csio->data_ptr;
- uint8_t* resp_buf = (uint8_t*)csio->data_ptr;
- /* Get the buffer length reported by host */
- int resp_xfer_len = vm_srb->transfer_len;
- /* Get the available buffer length */
- int resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
- int data_len = (resp_buf_len < resp_xfer_len) ? resp_buf_len : resp_xfer_len;
- if (data_len < SHORT_INQUIRY_LENGTH) {
- ccb->ccb_h.status |= CAM_REQ_CMP;
- if (bootverbose && data_len >= 5) {
- mtx_lock(&sc->hs_lock);
- xpt_print(ccb->ccb_h.path,
- "storvsc skips the validation for short inquiry (%d)"
- " [%x %x %x %x %x]\n",
- data_len,resp_buf[0],resp_buf[1],resp_buf[2],
- resp_buf[3],resp_buf[4]);
- mtx_unlock(&sc->hs_lock);
- }
- } else if (is_inquiry_valid(inq_data) == 0) {
- ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
+ struct scsi_inquiry_data *inq_data =
+ (struct scsi_inquiry_data *)csio->data_ptr;
+ uint8_t *resp_buf = (uint8_t *)csio->data_ptr;
+ int resp_xfer_len, resp_buf_len, data_len;
+
+ /* Get the buffer length reported by host */
+ resp_xfer_len = vm_srb->transfer_len;
+ /* Get the available buffer length */
+ resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
+ data_len = (resp_buf_len < resp_xfer_len) ?
+ resp_buf_len : resp_xfer_len;
+
if (bootverbose && data_len >= 5) {
- mtx_lock(&sc->hs_lock);
- xpt_print(ccb->ccb_h.path,
- "storvsc uninstalled invalid device"
- " [%x %x %x %x %x]\n",
- resp_buf[0],resp_buf[1],resp_buf[2],resp_buf[3],resp_buf[4]);
- mtx_unlock(&sc->hs_lock);
+ xpt_print(ccb->ccb_h.path, "storvsc inquiry "
+ "(%d) [%x %x %x %x %x ... ]\n", data_len,
+ resp_buf[0], resp_buf[1], resp_buf[2],
+ resp_buf[3], resp_buf[4]);
}
- } else {
- char vendor[16];
- cam_strvis(vendor, inq_data->vendor, sizeof(inq_data->vendor),
- sizeof(vendor));
- /**
- * XXX: upgrade SPC2 to SPC3 if host is WIN8 or WIN2012 R2
- * in order to support UNMAP feature
- */
- if (!strncmp(vendor,"Msft",4) &&
- SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
- (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
- vmstor_proto_version== VMSTOR_PROTOCOL_VERSION_WIN8)) {
- inq_data->version = SCSI_REV_SPC3;
- if (bootverbose) {
- mtx_lock(&sc->hs_lock);
- xpt_print(ccb->ccb_h.path,
- "storvsc upgrades SPC2 to SPC3\n");
- mtx_unlock(&sc->hs_lock);
+ if (vm_srb->srb_status == SRB_STATUS_SUCCESS &&
+ data_len > SHORT_INQUIRY_LENGTH) {
+ char vendor[16];
+
+ cam_strvis(vendor, inq_data->vendor,
+ sizeof(inq_data->vendor), sizeof(vendor));
+
+ /*
+ * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or
+ * WIN2012 R2 in order to support UNMAP feature.
+ */
+ if (!strncmp(vendor, "Msft", 4) &&
+ SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 &&
+ (vmstor_proto_version ==
+ VMSTOR_PROTOCOL_VERSION_WIN8_1 ||
+ vmstor_proto_version ==
+ VMSTOR_PROTOCOL_VERSION_WIN8)) {
+ inq_data->version = SCSI_REV_SPC3;
+ if (bootverbose) {
+ xpt_print(ccb->ccb_h.path,
+ "storvsc upgrades "
+ "SPC2 to SPC3\n");
+ }
}
}
- ccb->ccb_h.status |= CAM_REQ_CMP;
- if (bootverbose) {
- mtx_lock(&sc->hs_lock);
- xpt_print(ccb->ccb_h.path,
- "storvsc has passed inquiry response (%d) validation\n",
- data_len);
- mtx_unlock(&sc->hs_lock);
- }
- }
- } else {
- ccb->ccb_h.status |= CAM_REQ_CMP;
}
} else {
mtx_lock(&sc->hs_lock);
diff --git a/sys/dev/hyperv/storvsc/hv_vstorage.h b/sys/dev/hyperv/storvsc/hv_vstorage.h
index f2b9480..9205e35 100644
--- a/sys/dev/hyperv/storvsc/hv_vstorage.h
+++ b/sys/dev/hyperv/storvsc/hv_vstorage.h
@@ -249,9 +249,9 @@ struct vstor_packet {
/**
* SRB Status Masks (can be combined with above status codes)
*/
-#define SRB_STATUS_QUEUE_FROZEN 0x40
-#define SRB_STATUS_AUTOSENSE_VALID 0x80
-
+#define SRB_STATUS_QUEUE_FROZEN 0x40
+#define SRB_STATUS_AUTOSENSE_VALID 0x80
+#define SRB_STATUS_INVALID_LUN 0X20
/**
* Packet flags
diff --git a/sys/dev/ntb/if_ntb/if_ntb.c b/sys/dev/ntb/if_ntb/if_ntb.c
index 7b659f7..c67ae0d 100644
--- a/sys/dev/ntb/if_ntb/if_ntb.c
+++ b/sys/dev/ntb/if_ntb/if_ntb.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
* Copyright (C) 2013 Intel Corporation
* Copyright (C) 2015 EMC Corporation
* All rights reserved.
@@ -25,21 +26,27 @@
* SUCH DAMAGE.
*/
+/*
+ * The Non-Transparent Bridge (NTB) is a device that allows you to connect
+ * two or more systems using a PCI-e links, providing remote memory access.
+ *
+ * This module contains a driver for simulated Ethernet device, using
+ * underlying NTB Transport device.
+ *
+ * NOTE: Much of the code in this module is shared with Linux. Any patches may
+ * be picked up and redistributed in Linux with a dual GPL/BSD license.
+ */
+
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
-#include <sys/bitset.h>
+#include <sys/buf_ring.h>
#include <sys/bus.h>
-#include <sys/ktr.h>
#include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
#include <sys/module.h>
-#include <sys/mutex.h>
-#include <sys/queue.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
@@ -48,421 +55,164 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/if_media.h>
#include <net/if_types.h>
+#include <net/if_media.h>
#include <net/if_var.h>
#include <net/bpf.h>
#include <net/ethernet.h>
-#include <vm/vm.h>
-#include <vm/pmap.h>
-
#include <machine/bus.h>
-#include <machine/cpufunc.h>
-
-#include <netinet/in.h>
-#include <netinet/ip.h>
-
-#include "../ntb_hw/ntb_hw.h"
-
-/*
- * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
- * allows you to connect two systems using a PCI-e link.
- *
- * This module contains a protocol for sending and receiving messages, and
- * exposes that protocol through a simulated ethernet device called ntb.
- *
- * NOTE: Much of the code in this module is shared with Linux. Any patches may
- * be picked up and redistributed in Linux with a dual GPL/BSD license.
- */
-#define QP_SETSIZE 64
-BITSET_DEFINE(_qpset, QP_SETSIZE);
-#define test_bit(pos, addr) BIT_ISSET(QP_SETSIZE, (pos), (addr))
-#define set_bit(pos, addr) BIT_SET(QP_SETSIZE, (pos), (addr))
-#define clear_bit(pos, addr) BIT_CLR(QP_SETSIZE, (pos), (addr))
-#define ffs_bit(addr) BIT_FFS(QP_SETSIZE, (addr))
+#include "../ntb_transport.h"
#define KTR_NTB KTR_SPARE3
+#define NTB_MEDIATYPE (IFM_ETHER | IFM_AUTO | IFM_FDX)
-#define NTB_TRANSPORT_VERSION 4
-#define NTB_RX_MAX_PKTS 64
-#define NTB_RXQ_SIZE 300
-
-enum ntb_link_event {
- NTB_LINK_DOWN = 0,
- NTB_LINK_UP,
-};
+#define NTB_CSUM_FEATURES (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
+#define NTB_CSUM_FEATURES6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
+#define NTB_CSUM_SET (CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \
+ CSUM_PSEUDO_HDR | \
+ CSUM_IP_CHECKED | CSUM_IP_VALID | \
+ CSUM_SCTP_VALID)
static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW, 0, "if_ntb");
-static unsigned g_if_ntb_debug_level;
-SYSCTL_UINT(_hw_if_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
- &g_if_ntb_debug_level, 0, "if_ntb log level -- higher is more verbose");
-#define ntb_printf(lvl, ...) do { \
- if ((lvl) <= g_if_ntb_debug_level) { \
- if_printf(nt->ifp, __VA_ARGS__); \
- } \
-} while (0)
-
-static unsigned transport_mtu = IP_MAXPACKET + ETHER_HDR_LEN + ETHER_CRC_LEN;
-
-static uint64_t max_mw_size;
-SYSCTL_UQUAD(_hw_if_ntb, OID_AUTO, max_mw_size, CTLFLAG_RDTUN, &max_mw_size, 0,
- "If enabled (non-zero), limit the size of large memory windows. "
- "Both sides of the NTB MUST set the same value here.");
-
-static unsigned max_num_clients;
-SYSCTL_UINT(_hw_if_ntb, OID_AUTO, max_num_clients, CTLFLAG_RDTUN,
- &max_num_clients, 0, "Maximum number of NTB transport clients. "
- "0 (default) - use all available NTB memory windows; "
- "positive integer N - Limit to N memory windows.");
-
-static unsigned enable_xeon_watchdog;
-SYSCTL_UINT(_hw_if_ntb, OID_AUTO, enable_xeon_watchdog, CTLFLAG_RDTUN,
- &enable_xeon_watchdog, 0, "If non-zero, write a register every second to "
- "keep a watchdog from tearing down the NTB link");
-
-STAILQ_HEAD(ntb_queue_list, ntb_queue_entry);
-
-typedef uint32_t ntb_q_idx_t;
-
-struct ntb_queue_entry {
- /* ntb_queue list reference */
- STAILQ_ENTRY(ntb_queue_entry) entry;
-
- /* info on data to be transferred */
- void *cb_data;
- void *buf;
- uint32_t len;
- uint32_t flags;
-
- struct ntb_transport_qp *qp;
- struct ntb_payload_header *x_hdr;
- ntb_q_idx_t index;
-};
-
-struct ntb_rx_info {
- ntb_q_idx_t entry;
-};
-
-struct ntb_transport_qp {
- struct ntb_transport_ctx *transport;
- struct ntb_softc *ntb;
-
- void *cb_data;
-
- bool client_ready;
- volatile bool link_is_up;
- uint8_t qp_num; /* Only 64 QPs are allowed. 0-63 */
-
- struct ntb_rx_info *rx_info;
- struct ntb_rx_info *remote_rx_info;
-
- void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
- void *data, int len);
- struct ntb_queue_list tx_free_q;
- struct mtx ntb_tx_free_q_lock;
- caddr_t tx_mw;
- bus_addr_t tx_mw_phys;
- ntb_q_idx_t tx_index;
- ntb_q_idx_t tx_max_entry;
- uint64_t tx_max_frame;
-
- void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
- void *data, int len);
- struct ntb_queue_list rx_post_q;
- struct ntb_queue_list rx_pend_q;
- /* ntb_rx_q_lock: synchronize access to rx_XXXX_q */
- struct mtx ntb_rx_q_lock;
- struct task rx_completion_task;
- struct task rxc_db_work;
- caddr_t rx_buff;
- ntb_q_idx_t rx_index;
- ntb_q_idx_t rx_max_entry;
- uint64_t rx_max_frame;
-
- void (*event_handler)(void *data, enum ntb_link_event status);
- struct callout link_work;
- struct callout queue_full;
- struct callout rx_full;
-
- uint64_t last_rx_no_buf;
+static unsigned g_if_ntb_num_queues = UINT_MAX;
+SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN,
+ &g_if_ntb_num_queues, 0, "Number of queues per interface");
- /* Stats */
- uint64_t rx_bytes;
- uint64_t rx_pkts;
- uint64_t rx_ring_empty;
- uint64_t rx_err_no_buf;
- uint64_t rx_err_oflow;
- uint64_t rx_err_ver;
- uint64_t tx_bytes;
- uint64_t tx_pkts;
- uint64_t tx_ring_full;
- uint64_t tx_err_no_buf;
-};
-
-struct ntb_queue_handlers {
- void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
- void *data, int len);
- void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
- void *data, int len);
- void (*event_handler)(void *data, enum ntb_link_event status);
-};
-
-struct ntb_transport_mw {
- vm_paddr_t phys_addr;
- size_t phys_size;
- size_t xlat_align;
- size_t xlat_align_size;
- bus_addr_t addr_limit;
- /* Tx buff is off vbase / phys_addr */
- caddr_t vbase;
- size_t xlat_size;
- size_t buff_size;
- /* Rx buff is off virt_addr / dma_addr */
- caddr_t virt_addr;
- bus_addr_t dma_addr;
-};
-
-struct ntb_transport_ctx {
- struct ntb_softc *ntb;
- struct ifnet *ifp;
- struct ntb_transport_mw mw_vec[NTB_MAX_NUM_MW];
- struct ntb_transport_qp *qp_vec;
- struct _qpset qp_bitmap;
- struct _qpset qp_bitmap_free;
- unsigned mw_count;
- unsigned qp_count;
- volatile bool link_is_up;
- struct callout link_work;
- struct callout link_watchdog;
- struct task link_cleanup;
- uint64_t bufsize;
- u_char eaddr[ETHER_ADDR_LEN];
- struct mtx tx_lock;
- struct mtx rx_lock;
-
- /* The hardcoded single queuepair in ntb_setup_interface() */
+struct ntb_net_queue {
+ struct ntb_net_ctx *sc;
+ if_t ifp;
struct ntb_transport_qp *qp;
+ struct buf_ring *br;
+ struct task tx_task;
+ struct taskqueue *tx_tq;
+ struct mtx tx_lock;
+ struct callout queue_full;
};
-static struct ntb_transport_ctx net_softc;
-
-enum {
- IF_NTB_DESC_DONE_FLAG = 1 << 0,
- IF_NTB_LINK_DOWN_FLAG = 1 << 1,
-};
-
-struct ntb_payload_header {
- ntb_q_idx_t ver;
- uint32_t len;
- uint32_t flags;
-};
-
-enum {
- /*
- * The order of this enum is part of the if_ntb remote protocol. Do
- * not reorder without bumping protocol version (and it's probably best
- * to keep the protocol in lock-step with the Linux NTB driver.
- */
- IF_NTB_VERSION = 0,
- IF_NTB_QP_LINKS,
- IF_NTB_NUM_QPS,
- IF_NTB_NUM_MWS,
- /*
- * N.B.: transport_link_work assumes MW1 enums = MW0 + 2.
- */
- IF_NTB_MW0_SZ_HIGH,
- IF_NTB_MW0_SZ_LOW,
- IF_NTB_MW1_SZ_HIGH,
- IF_NTB_MW1_SZ_LOW,
- IF_NTB_MAX_SPAD,
-
- /*
- * Some NTB-using hardware have a watchdog to work around NTB hangs; if
- * a register or doorbell isn't written every few seconds, the link is
- * torn down. Write an otherwise unused register every few seconds to
- * work around this watchdog.
- */
- IF_NTB_WATCHDOG_SPAD = 15
+struct ntb_net_ctx {
+ if_t ifp;
+ struct ifmedia media;
+ u_char eaddr[ETHER_ADDR_LEN];
+ int num_queues;
+ struct ntb_net_queue *queues;
+ int mtu;
};
-CTASSERT(IF_NTB_WATCHDOG_SPAD < XEON_SPAD_COUNT &&
- IF_NTB_WATCHDOG_SPAD < ATOM_SPAD_COUNT);
-
-#define QP_TO_MW(nt, qp) ((qp) % nt->mw_count)
-#define NTB_QP_DEF_NUM_ENTRIES 100
-#define NTB_LINK_DOWN_TIMEOUT 10
-static int ntb_handle_module_events(struct module *m, int what, void *arg);
-static int ntb_setup_interface(void);
-static int ntb_teardown_interface(void);
+static int ntb_net_probe(device_t dev);
+static int ntb_net_attach(device_t dev);
+static int ntb_net_detach(device_t dev);
static void ntb_net_init(void *arg);
-static int ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data);
-static void ntb_start(struct ifnet *ifp);
+static int ntb_ifmedia_upd(struct ifnet *);
+static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *);
+static int ntb_ioctl(if_t ifp, u_long command, caddr_t data);
+static int ntb_transmit(if_t ifp, struct mbuf *m);
static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
void *data, int len);
static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
void *data, int len);
static void ntb_net_event_handler(void *data, enum ntb_link_event status);
-static int ntb_transport_probe(struct ntb_softc *ntb);
-static void ntb_transport_free(struct ntb_transport_ctx *);
-static void ntb_transport_init_queue(struct ntb_transport_ctx *nt,
- unsigned int qp_num);
-static void ntb_transport_free_queue(struct ntb_transport_qp *qp);
-static struct ntb_transport_qp *ntb_transport_create_queue(void *data,
- struct ntb_softc *pdev, const struct ntb_queue_handlers *handlers);
-static void ntb_transport_link_up(struct ntb_transport_qp *qp);
-static int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb,
- void *data, unsigned int len);
-static int ntb_process_tx(struct ntb_transport_qp *qp,
- struct ntb_queue_entry *entry);
-static void ntb_memcpy_tx(struct ntb_transport_qp *qp,
- struct ntb_queue_entry *entry, void *offset);
+static void ntb_handle_tx(void *arg, int pending);
static void ntb_qp_full(void *arg);
-static void ntb_transport_rxc_db(void *arg, int pending);
-static int ntb_process_rxc(struct ntb_transport_qp *qp);
-static void ntb_memcpy_rx(struct ntb_transport_qp *qp,
- struct ntb_queue_entry *entry, void *offset);
-static inline void ntb_rx_copy_callback(struct ntb_transport_qp *qp,
- void *data);
-static void ntb_complete_rxc(void *arg, int pending);
-static void ntb_transport_doorbell_callback(void *data, uint32_t vector);
-static void ntb_transport_event_callback(void *data);
-static void ntb_transport_link_work(void *arg);
-static int ntb_set_mw(struct ntb_transport_ctx *, int num_mw, size_t size);
-static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw);
-static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
- unsigned int qp_num);
-static void ntb_qp_link_work(void *arg);
-static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt);
-static void ntb_transport_link_cleanup_work(void *, int);
-static void ntb_qp_link_down(struct ntb_transport_qp *qp);
-static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp);
-static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp);
-static void ntb_transport_link_down(struct ntb_transport_qp *qp);
-static void ntb_send_link_down(struct ntb_transport_qp *qp);
-static void ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
- struct ntb_queue_list *list);
-static struct ntb_queue_entry *ntb_list_rm(struct mtx *lock,
- struct ntb_queue_list *list);
-static struct ntb_queue_entry *ntb_list_mv(struct mtx *lock,
- struct ntb_queue_list *from, struct ntb_queue_list *to);
+static void ntb_qflush(if_t ifp);
static void create_random_local_eui48(u_char *eaddr);
-static unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp);
-static void xeon_link_watchdog_hb(void *);
-
-static const struct ntb_ctx_ops ntb_transport_ops = {
- .link_event = ntb_transport_event_callback,
- .db_event = ntb_transport_doorbell_callback,
-};
-MALLOC_DEFINE(M_NTB_IF, "if_ntb", "ntb network driver");
-
-static inline void
-iowrite32(uint32_t val, void *addr)
-{
-
- bus_space_write_4(X86_BUS_SPACE_MEM, 0/* HACK */, (uintptr_t)addr,
- val);
-}
-
-/* Module setup and teardown */
static int
-ntb_handle_module_events(struct module *m, int what, void *arg)
+ntb_net_probe(device_t dev)
{
- int err = 0;
- switch (what) {
- case MOD_LOAD:
- err = ntb_setup_interface();
- break;
- case MOD_UNLOAD:
- err = ntb_teardown_interface();
- break;
- default:
- err = EOPNOTSUPP;
- break;
- }
- return (err);
+ device_set_desc(dev, "NTB Network Interface");
+ return (0);
}
-static moduledata_t if_ntb_mod = {
- "if_ntb",
- ntb_handle_module_events,
- NULL
-};
-
-DECLARE_MODULE(if_ntb, if_ntb_mod, SI_SUB_KLD, SI_ORDER_ANY);
-MODULE_DEPEND(if_ntb, ntb_hw, 1, 1, 1);
-
static int
-ntb_setup_interface(void)
+ntb_net_attach(device_t dev)
{
- struct ifnet *ifp;
+ struct ntb_net_ctx *sc = device_get_softc(dev);
+ struct ntb_net_queue *q;
+ if_t ifp;
struct ntb_queue_handlers handlers = { ntb_net_rx_handler,
ntb_net_tx_handler, ntb_net_event_handler };
- int rc;
-
- net_softc.ntb = devclass_get_softc(devclass_find("ntb_hw"), 0);
- if (net_softc.ntb == NULL) {
- printf("ntb: Cannot find devclass\n");
- return (ENXIO);
- }
+ int i;
- ifp = net_softc.ifp = if_alloc(IFT_ETHER);
+ ifp = sc->ifp = if_gethandle(IFT_ETHER);
if (ifp == NULL) {
- ntb_transport_free(&net_softc);
printf("ntb: Cannot allocate ifnet structure\n");
return (ENOMEM);
}
- if_initname(ifp, "ntb", 0);
-
- rc = ntb_transport_probe(net_softc.ntb);
- if (rc != 0) {
- printf("ntb: Cannot init transport: %d\n", rc);
- if_free(net_softc.ifp);
- return (rc);
- }
-
- net_softc.qp = ntb_transport_create_queue(ifp, net_softc.ntb,
- &handlers);
- ifp->if_init = ntb_net_init;
- ifp->if_softc = &net_softc;
- ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_ioctl = ntb_ioctl;
- ifp->if_start = ntb_start;
- IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
- ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN;
- IFQ_SET_READY(&ifp->if_snd);
- create_random_local_eui48(net_softc.eaddr);
- ether_ifattach(ifp, net_softc.eaddr);
- ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_JUMBO_MTU;
- ifp->if_capenable = ifp->if_capabilities;
- ifp->if_mtu = ntb_transport_max_size(net_softc.qp) - ETHER_HDR_LEN -
- ETHER_CRC_LEN;
-
- ntb_transport_link_up(net_softc.qp);
- net_softc.bufsize = ntb_transport_max_size(net_softc.qp) +
- sizeof(struct ether_header);
+ if_initname(ifp, device_get_name(dev), device_get_unit(dev));
+ if_setdev(ifp, dev);
+
+ sc->num_queues = min(g_if_ntb_num_queues,
+ ntb_transport_queue_count(dev));
+ sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue),
+ M_DEVBUF, M_WAITOK | M_ZERO);
+ sc->mtu = INT_MAX;
+ for (i = 0; i < sc->num_queues; i++) {
+ q = &sc->queues[i];
+ q->sc = sc;
+ q->ifp = ifp;
+ q->qp = ntb_transport_create_queue(dev, i, &handlers, q);
+ if (q->qp == NULL)
+ break;
+ sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp));
+ mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF);
+ q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock);
+ TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q);
+ q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT,
+ taskqueue_thread_enqueue, &q->tx_tq);
+ taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d",
+ device_get_nameunit(dev), i);
+ callout_init(&q->queue_full, 1);
+ }
+ sc->num_queues = i;
+ device_printf(dev, "%d queue(s)\n", sc->num_queues);
+
+ if_setinitfn(ifp, ntb_net_init);
+ if_setsoftc(ifp, sc);
+ if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
+ if_setioctlfn(ifp, ntb_ioctl);
+ if_settransmitfn(ifp, ntb_transmit);
+ if_setqflushfn(ifp, ntb_qflush);
+ create_random_local_eui48(sc->eaddr);
+ ether_ifattach(ifp, sc->eaddr);
+ if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 |
+ IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
+ if_setcapenable(ifp, IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
+ if_setmtu(ifp, sc->mtu - ETHER_HDR_LEN);
+
+ ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd,
+ ntb_ifmedia_sts);
+ ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL);
+ ifmedia_set(&sc->media, NTB_MEDIATYPE);
+
+ for (i = 0; i < sc->num_queues; i++)
+ ntb_transport_link_up(sc->queues[i].qp);
return (0);
}
static int
-ntb_teardown_interface(void)
+ntb_net_detach(device_t dev)
{
+ struct ntb_net_ctx *sc = device_get_softc(dev);
+ struct ntb_net_queue *q;
+ int i;
- if (net_softc.qp != NULL) {
- ntb_transport_link_down(net_softc.qp);
-
- ntb_transport_free_queue(net_softc.qp);
- ntb_transport_free(&net_softc);
- }
-
- if (net_softc.ifp != NULL) {
- ether_ifdetach(net_softc.ifp);
- if_free(net_softc.ifp);
- net_softc.ifp = NULL;
- }
-
+ for (i = 0; i < sc->num_queues; i++)
+ ntb_transport_link_down(sc->queues[i].qp);
+ ether_ifdetach(sc->ifp);
+ if_free(sc->ifp);
+ ifmedia_removeall(&sc->media);
+ for (i = 0; i < sc->num_queues; i++) {
+ q = &sc->queues[i];
+ ntb_transport_free_queue(q->qp);
+ buf_ring_free(q->br, M_DEVBUF);
+ callout_drain(&q->queue_full);
+ taskqueue_drain_all(q->tx_tq);
+ mtx_destroy(&q->tx_lock);
+ }
+ free(sc->queues, M_DEVBUF);
return (0);
}
@@ -471,1213 +221,268 @@ ntb_teardown_interface(void)
static void
ntb_net_init(void *arg)
{
- struct ntb_transport_ctx *ntb_softc = arg;
- struct ifnet *ifp = ntb_softc->ifp;
+ struct ntb_net_ctx *sc = arg;
+ if_t ifp = sc->ifp;
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- ifp->if_flags |= IFF_UP;
- if_link_state_change(ifp, LINK_STATE_UP);
+ if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
+ if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ?
+ LINK_STATE_UP : LINK_STATE_DOWN);
}
static int
-ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+ntb_ioctl(if_t ifp, u_long command, caddr_t data)
{
- struct ntb_transport_ctx *nt = ifp->if_softc;
+ struct ntb_net_ctx *sc = if_getsoftc(ifp);
struct ifreq *ifr = (struct ifreq *)data;
int error = 0;
switch (command) {
case SIOCSIFMTU:
{
- if (ifr->ifr_mtu > ntb_transport_max_size(nt->qp) -
- ETHER_HDR_LEN - ETHER_CRC_LEN) {
+ if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) {
error = EINVAL;
break;
}
- ifp->if_mtu = ifr->ifr_mtu;
+ if_setmtu(ifp, ifr->ifr_mtu);
break;
}
- default:
- error = ether_ioctl(ifp, command, data);
- break;
- }
- return (error);
-}
-
-
-static void
-ntb_start(struct ifnet *ifp)
-{
- struct mbuf *m_head;
- struct ntb_transport_ctx *nt = ifp->if_softc;
- int rc;
+ case SIOCSIFMEDIA:
+ case SIOCGIFMEDIA:
+ error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
+ break;
- mtx_lock(&nt->tx_lock);
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- CTR0(KTR_NTB, "TX: ntb_start");
- while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
- IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
- CTR1(KTR_NTB, "TX: start mbuf %p", m_head);
- rc = ntb_transport_tx_enqueue(nt->qp, m_head, m_head,
- m_length(m_head, NULL));
- if (rc != 0) {
- CTR1(KTR_NTB,
- "TX: could not tx mbuf %p. Returning to snd q",
- m_head);
- if (rc == EAGAIN) {
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
- callout_reset(&nt->qp->queue_full, hz / 1000,
- ntb_qp_full, ifp);
- }
- break;
+ case SIOCSIFCAP:
+ if (ifr->ifr_reqcap & IFCAP_RXCSUM)
+ if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
+ else
+ if_setcapenablebit(ifp, 0, IFCAP_RXCSUM);
+ if (ifr->ifr_reqcap & IFCAP_TXCSUM) {
+ if_setcapenablebit(ifp, IFCAP_TXCSUM, 0);
+ if_sethwassistbits(ifp, NTB_CSUM_FEATURES, 0);
+ } else {
+ if_setcapenablebit(ifp, 0, IFCAP_TXCSUM);
+ if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES);
+ }
+ if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6)
+ if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
+ else
+ if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6);
+ if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) {
+ if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0);
+ if_sethwassistbits(ifp, NTB_CSUM_FEATURES6, 0);
+ } else {
+ if_setcapenablebit(ifp, 0, IFCAP_TXCSUM_IPV6);
+ if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES6);
}
-
- }
- mtx_unlock(&nt->tx_lock);
-}
-
-/* Network Device Callbacks */
-static void
-ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
- int len)
-{
-
- m_freem(data);
- CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data);
-}
-
-static void
-ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
- int len)
-{
- struct mbuf *m = data;
- struct ifnet *ifp = qp_data;
-
- CTR0(KTR_NTB, "RX: rx handler");
- (*ifp->if_input)(ifp, m);
-}
-
-static void
-ntb_net_event_handler(void *data, enum ntb_link_event status)
-{
- struct ifnet *ifp;
-
- ifp = data;
- (void)ifp;
-
- /* XXX The Linux driver munges with the carrier status here. */
-
- switch (status) {
- case NTB_LINK_DOWN:
- break;
- case NTB_LINK_UP:
break;
- default:
- panic("Bogus ntb_link_event %u\n", status);
- }
-}
-
-/* Transport Init and teardown */
-
-static void
-xeon_link_watchdog_hb(void *arg)
-{
- struct ntb_transport_ctx *nt;
-
- nt = arg;
- ntb_spad_write(nt->ntb, IF_NTB_WATCHDOG_SPAD, 0);
- callout_reset(&nt->link_watchdog, 1 * hz, xeon_link_watchdog_hb, nt);
-}
-
-static int
-ntb_transport_probe(struct ntb_softc *ntb)
-{
- struct ntb_transport_ctx *nt = &net_softc;
- struct ntb_transport_mw *mw;
- uint64_t qp_bitmap;
- int rc;
- unsigned i;
-
- nt->mw_count = ntb_mw_count(ntb);
- for (i = 0; i < nt->mw_count; i++) {
- mw = &nt->mw_vec[i];
-
- rc = ntb_mw_get_range(ntb, i, &mw->phys_addr, &mw->vbase,
- &mw->phys_size, &mw->xlat_align, &mw->xlat_align_size,
- &mw->addr_limit);
- if (rc != 0)
- goto err;
-
- mw->buff_size = 0;
- mw->xlat_size = 0;
- mw->virt_addr = NULL;
- mw->dma_addr = 0;
-
- rc = ntb_mw_set_wc(nt->ntb, i, VM_MEMATTR_WRITE_COMBINING);
- if (rc)
- ntb_printf(0, "Unable to set mw%d caching\n", i);
- }
-
- qp_bitmap = ntb_db_valid_mask(ntb);
- nt->qp_count = flsll(qp_bitmap);
- KASSERT(nt->qp_count != 0, ("bogus db bitmap"));
- nt->qp_count -= 1;
-
- if (max_num_clients != 0 && max_num_clients < nt->qp_count)
- nt->qp_count = max_num_clients;
- else if (nt->mw_count < nt->qp_count)
- nt->qp_count = nt->mw_count;
- KASSERT(nt->qp_count <= QP_SETSIZE, ("invalid qp_count"));
-
- mtx_init(&nt->tx_lock, "ntb transport tx", NULL, MTX_DEF);
- mtx_init(&nt->rx_lock, "ntb transport rx", NULL, MTX_DEF);
-
- nt->qp_vec = malloc(nt->qp_count * sizeof(*nt->qp_vec), M_NTB_IF,
- M_WAITOK | M_ZERO);
-
- for (i = 0; i < nt->qp_count; i++) {
- set_bit(i, &nt->qp_bitmap);
- set_bit(i, &nt->qp_bitmap_free);
- ntb_transport_init_queue(nt, i);
- }
-
- callout_init(&nt->link_work, 0);
- callout_init(&nt->link_watchdog, 0);
- TASK_INIT(&nt->link_cleanup, 0, ntb_transport_link_cleanup_work, nt);
-
- rc = ntb_set_ctx(ntb, nt, &ntb_transport_ops);
- if (rc != 0)
- goto err;
-
- nt->link_is_up = false;
- ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
- ntb_link_event(ntb);
-
- callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt);
- if (enable_xeon_watchdog != 0)
- callout_reset(&nt->link_watchdog, 0, xeon_link_watchdog_hb, nt);
- return (0);
-
-err:
- free(nt->qp_vec, M_NTB_IF);
- nt->qp_vec = NULL;
- return (rc);
-}
-
-static void
-ntb_transport_free(struct ntb_transport_ctx *nt)
-{
- struct ntb_softc *ntb = nt->ntb;
- struct _qpset qp_bitmap_alloc;
- uint8_t i;
-
- ntb_transport_link_cleanup(nt);
- taskqueue_drain(taskqueue_swi, &nt->link_cleanup);
- callout_drain(&nt->link_work);
- callout_drain(&nt->link_watchdog);
-
- BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &qp_bitmap_alloc);
- BIT_NAND(QP_SETSIZE, &qp_bitmap_alloc, &nt->qp_bitmap_free);
-
- /* Verify that all the QPs are freed */
- for (i = 0; i < nt->qp_count; i++)
- if (test_bit(i, &qp_bitmap_alloc))
- ntb_transport_free_queue(&nt->qp_vec[i]);
-
- ntb_link_disable(ntb);
- ntb_clear_ctx(ntb);
-
- for (i = 0; i < nt->mw_count; i++)
- ntb_free_mw(nt, i);
-
- free(nt->qp_vec, M_NTB_IF);
-}
-
-static void
-ntb_transport_init_queue(struct ntb_transport_ctx *nt, unsigned int qp_num)
-{
- struct ntb_transport_mw *mw;
- struct ntb_transport_qp *qp;
- vm_paddr_t mw_base;
- uint64_t mw_size, qp_offset;
- size_t tx_size;
- unsigned num_qps_mw, mw_num, mw_count;
-
- mw_count = nt->mw_count;
- mw_num = QP_TO_MW(nt, qp_num);
- mw = &nt->mw_vec[mw_num];
-
- qp = &nt->qp_vec[qp_num];
- qp->qp_num = qp_num;
- qp->transport = nt;
- qp->ntb = nt->ntb;
- qp->client_ready = false;
- qp->event_handler = NULL;
- ntb_qp_link_down_reset(qp);
-
- if (nt->qp_count % mw_count && mw_num + 1 < nt->qp_count / mw_count)
- num_qps_mw = nt->qp_count / mw_count + 1;
- else
- num_qps_mw = nt->qp_count / mw_count;
-
- mw_base = mw->phys_addr;
- mw_size = mw->phys_size;
-
- tx_size = mw_size / num_qps_mw;
- qp_offset = tx_size * (qp_num / mw_count);
-
- qp->tx_mw = mw->vbase + qp_offset;
- KASSERT(qp->tx_mw != NULL, ("uh oh?"));
-
- /* XXX Assumes that a vm_paddr_t is equivalent to bus_addr_t */
- qp->tx_mw_phys = mw_base + qp_offset;
- KASSERT(qp->tx_mw_phys != 0, ("uh oh?"));
-
- tx_size -= sizeof(struct ntb_rx_info);
- qp->rx_info = (void *)(qp->tx_mw + tx_size);
-
- /* Due to house-keeping, there must be at least 2 buffs */
- qp->tx_max_frame = qmin(tx_size / 2,
- transport_mtu + sizeof(struct ntb_payload_header));
- qp->tx_max_entry = tx_size / qp->tx_max_frame;
-
- callout_init(&qp->link_work, 0);
- callout_init(&qp->queue_full, 1);
- callout_init(&qp->rx_full, 1);
-
- mtx_init(&qp->ntb_rx_q_lock, "ntb rx q", NULL, MTX_SPIN);
- mtx_init(&qp->ntb_tx_free_q_lock, "ntb tx free q", NULL, MTX_SPIN);
- TASK_INIT(&qp->rx_completion_task, 0, ntb_complete_rxc, qp);
- TASK_INIT(&qp->rxc_db_work, 0, ntb_transport_rxc_db, qp);
-
- STAILQ_INIT(&qp->rx_post_q);
- STAILQ_INIT(&qp->rx_pend_q);
- STAILQ_INIT(&qp->tx_free_q);
- callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
-}
-
-static void
-ntb_transport_free_queue(struct ntb_transport_qp *qp)
-{
- struct ntb_queue_entry *entry;
-
- if (qp == NULL)
- return;
-
- callout_drain(&qp->link_work);
-
- ntb_db_set_mask(qp->ntb, 1ull << qp->qp_num);
- taskqueue_drain(taskqueue_swi, &qp->rxc_db_work);
- taskqueue_drain(taskqueue_swi, &qp->rx_completion_task);
-
- qp->cb_data = NULL;
- qp->rx_handler = NULL;
- qp->tx_handler = NULL;
- qp->event_handler = NULL;
-
- while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q)))
- free(entry, M_NTB_IF);
-
- while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q)))
- free(entry, M_NTB_IF);
-
- while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
- free(entry, M_NTB_IF);
-
- set_bit(qp->qp_num, &qp->transport->qp_bitmap_free);
-}
-
-/**
- * ntb_transport_create_queue - Create a new NTB transport layer queue
- * @rx_handler: receive callback function
- * @tx_handler: transmit callback function
- * @event_handler: event callback function
- *
- * Create a new NTB transport layer queue and provide the queue with a callback
- * routine for both transmit and receive. The receive callback routine will be
- * used to pass up data when the transport has received it on the queue. The
- * transmit callback routine will be called when the transport has completed the
- * transmission of the data on the queue and the data is ready to be freed.
- *
- * RETURNS: pointer to newly created ntb_queue, NULL on error.
- */
-static struct ntb_transport_qp *
-ntb_transport_create_queue(void *data, struct ntb_softc *ntb,
- const struct ntb_queue_handlers *handlers)
-{
- struct ntb_queue_entry *entry;
- struct ntb_transport_qp *qp;
- struct ntb_transport_ctx *nt;
- unsigned int free_queue;
- int i;
-
- nt = ntb_get_ctx(ntb, NULL);
- KASSERT(nt != NULL, ("bogus"));
-
- free_queue = ffs_bit(&nt->qp_bitmap);
- if (free_queue == 0)
- return (NULL);
-
- /* decrement free_queue to make it zero based */
- free_queue--;
-
- qp = &nt->qp_vec[free_queue];
- clear_bit(qp->qp_num, &nt->qp_bitmap_free);
- qp->cb_data = data;
- qp->rx_handler = handlers->rx_handler;
- qp->tx_handler = handlers->tx_handler;
- qp->event_handler = handlers->event_handler;
-
- for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
- entry = malloc(sizeof(*entry), M_NTB_IF, M_WAITOK | M_ZERO);
- entry->cb_data = nt->ifp;
- entry->buf = NULL;
- entry->len = transport_mtu;
- ntb_list_add(&qp->ntb_rx_q_lock, entry, &qp->rx_pend_q);
- }
-
- for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
- entry = malloc(sizeof(*entry), M_NTB_IF, M_WAITOK | M_ZERO);
- ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+ default:
+ error = ether_ioctl(ifp, command, data);
+ break;
}
- ntb_db_clear(ntb, 1ull << qp->qp_num);
- ntb_db_clear_mask(ntb, 1ull << qp->qp_num);
- return (qp);
-}
-
-/**
- * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
- * @qp: NTB transport layer queue to be enabled
- *
- * Notify NTB transport layer of client readiness to use queue
- */
-static void
-ntb_transport_link_up(struct ntb_transport_qp *qp)
-{
- struct ntb_transport_ctx *nt;
-
- if (qp == NULL)
- return;
-
- qp->client_ready = true;
-
- nt = qp->transport;
- ntb_printf(2, "qp client ready\n");
-
- if (qp->transport->link_is_up)
- callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
+ return (error);
}
-
-
-/* Transport Tx */
-
-/**
- * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry
- * @qp: NTB transport layer queue the entry is to be enqueued on
- * @cb: per buffer pointer for callback function to use
- * @data: pointer to data buffer that will be sent
- * @len: length of the data buffer
- *
- * Enqueue a new transmit buffer onto the transport queue from which a NTB
- * payload will be transmitted. This assumes that a lock is being held to
- * serialize access to the qp.
- *
- * RETURNS: An appropriate ERRNO error value on error, or zero for success.
- */
static int
-ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
- unsigned int len)
+ntb_ifmedia_upd(struct ifnet *ifp)
{
- struct ntb_queue_entry *entry;
- int rc;
+ struct ntb_net_ctx *sc = if_getsoftc(ifp);
+ struct ifmedia *ifm = &sc->media;
- if (qp == NULL || !qp->link_is_up || len == 0) {
- CTR0(KTR_NTB, "TX: link not up");
+ if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
return (EINVAL);
- }
-
- entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
- if (entry == NULL) {
- CTR0(KTR_NTB, "TX: could not get entry from tx_free_q");
- qp->tx_err_no_buf++;
- return (EBUSY);
- }
- CTR1(KTR_NTB, "TX: got entry %p from tx_free_q", entry);
-
- entry->cb_data = cb;
- entry->buf = data;
- entry->len = len;
- entry->flags = 0;
-
- rc = ntb_process_tx(qp, entry);
- if (rc != 0) {
- ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
- CTR1(KTR_NTB,
- "TX: process_tx failed. Returning entry %p to tx_free_q",
- entry);
- }
- return (rc);
-}
-
-static int
-ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry)
-{
- void *offset;
-
- offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
- CTR3(KTR_NTB,
- "TX: process_tx: tx_pkts=%lu, tx_index=%u, remote entry=%u",
- qp->tx_pkts, qp->tx_index, qp->remote_rx_info->entry);
- if (qp->tx_index == qp->remote_rx_info->entry) {
- CTR0(KTR_NTB, "TX: ring full");
- qp->tx_ring_full++;
- return (EAGAIN);
- }
-
- if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
- if (qp->tx_handler != NULL)
- qp->tx_handler(qp, qp->cb_data, entry->buf,
- EIO);
- else
- m_freem(entry->buf);
-
- entry->buf = NULL;
- ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
- CTR1(KTR_NTB,
- "TX: frame too big. returning entry %p to tx_free_q",
- entry);
- return (0);
- }
- CTR2(KTR_NTB, "TX: copying entry %p to offset %p", entry, offset);
- ntb_memcpy_tx(qp, entry, offset);
-
- qp->tx_index++;
- qp->tx_index %= qp->tx_max_entry;
-
- qp->tx_pkts++;
return (0);
}
static void
-ntb_memcpy_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry,
- void *offset)
-{
- struct ntb_payload_header *hdr;
-
- /* This piece is from Linux' ntb_async_tx() */
- hdr = (struct ntb_payload_header *)((char *)offset + qp->tx_max_frame -
- sizeof(struct ntb_payload_header));
- entry->x_hdr = hdr;
- iowrite32(entry->len, &hdr->len);
- iowrite32(qp->tx_pkts, &hdr->ver);
-
- /* This piece is ntb_memcpy_tx() */
- CTR2(KTR_NTB, "TX: copying %d bytes to offset %p", entry->len, offset);
- if (entry->buf != NULL) {
- m_copydata((struct mbuf *)entry->buf, 0, entry->len, offset);
-
- /*
- * Ensure that the data is fully copied before setting the
- * flags
- */
- wmb();
- }
-
- /* The rest is ntb_tx_copy_callback() */
- iowrite32(entry->flags | IF_NTB_DESC_DONE_FLAG, &hdr->flags);
- CTR1(KTR_NTB, "TX: hdr %p set DESC_DONE", hdr);
-
- ntb_peer_db_set(qp->ntb, 1ull << qp->qp_num);
-
- /*
- * The entry length can only be zero if the packet is intended to be a
- * "link down" or similar. Since no payload is being sent in these
- * cases, there is nothing to add to the completion queue.
- */
- if (entry->len > 0) {
- qp->tx_bytes += entry->len;
-
- if (qp->tx_handler)
- qp->tx_handler(qp, qp->cb_data, entry->buf,
- entry->len);
- else
- m_freem(entry->buf);
- entry->buf = NULL;
- }
-
- CTR3(KTR_NTB,
- "TX: entry %p sent. hdr->ver = %u, hdr->flags = 0x%x, Returning "
- "to tx_free_q", entry, hdr->ver, hdr->flags);
- ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
-}
-
-static void
-ntb_qp_full(void *arg)
+ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
{
+ struct ntb_net_ctx *sc = if_getsoftc(ifp);
- CTR0(KTR_NTB, "TX: qp_full callout");
- ntb_start(arg);
+ ifmr->ifm_status = IFM_AVALID;
+ ifmr->ifm_active = NTB_MEDIATYPE;
+ if (ntb_transport_link_query(sc->queues[0].qp))
+ ifmr->ifm_status |= IFM_ACTIVE;
}
-/* Transport Rx */
static void
-ntb_transport_rxc_db(void *arg, int pending __unused)
+ntb_transmit_locked(struct ntb_net_queue *q)
{
- struct ntb_transport_qp *qp = arg;
- ntb_q_idx_t i;
- int rc;
-
- /*
- * Limit the number of packets processed in a single interrupt to
- * provide fairness to others
- */
- CTR0(KTR_NTB, "RX: transport_rx");
- mtx_lock(&qp->transport->rx_lock);
- for (i = 0; i < qp->rx_max_entry; i++) {
- rc = ntb_process_rxc(qp);
+ if_t ifp = q->ifp;
+ struct mbuf *m;
+ int rc, len;
+ short mflags;
+
+ CTR0(KTR_NTB, "TX: ntb_transmit_locked");
+ while ((m = drbr_peek(ifp, q->br)) != NULL) {
+ CTR1(KTR_NTB, "TX: start mbuf %p", m);
+ if_etherbpfmtap(ifp, m);
+ len = m->m_pkthdr.len;
+ mflags = m->m_flags;
+ rc = ntb_transport_tx_enqueue(q->qp, m, m, len);
if (rc != 0) {
- CTR0(KTR_NTB, "RX: process_rxc failed");
+ CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc);
+ if (rc == EAGAIN) {
+ drbr_putback(ifp, q->br, m);
+ callout_reset_sbt(&q->queue_full,
+ SBT_1MS / 4, SBT_1MS / 4,
+ ntb_qp_full, q, 0);
+ } else {
+ m_freem(m);
+ drbr_advance(ifp, q->br);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ }
break;
}
- }
- mtx_unlock(&qp->transport->rx_lock);
-
- if (i == qp->rx_max_entry)
- taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
- else if ((ntb_db_read(qp->ntb) & (1ull << qp->qp_num)) != 0) {
- /* If db is set, clear it and read it back to commit clear. */
- ntb_db_clear(qp->ntb, 1ull << qp->qp_num);
- (void)ntb_db_read(qp->ntb);
-
- /*
- * An interrupt may have arrived between finishing
- * ntb_process_rxc and clearing the doorbell bit: there might
- * be some more work to do.
- */
- taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
+ drbr_advance(ifp, q->br);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
+ if (mflags & M_MCAST)
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
}
}
static int
-ntb_process_rxc(struct ntb_transport_qp *qp)
+ntb_transmit(if_t ifp, struct mbuf *m)
{
- struct ntb_payload_header *hdr;
- struct ntb_queue_entry *entry;
- caddr_t offset;
-
- offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index;
- hdr = (void *)(offset + qp->rx_max_frame -
- sizeof(struct ntb_payload_header));
-
- CTR1(KTR_NTB, "RX: process_rxc rx_index = %u", qp->rx_index);
- if ((hdr->flags & IF_NTB_DESC_DONE_FLAG) == 0) {
- CTR0(KTR_NTB, "RX: hdr not done");
- qp->rx_ring_empty++;
- return (EAGAIN);
- }
-
- if ((hdr->flags & IF_NTB_LINK_DOWN_FLAG) != 0) {
- CTR0(KTR_NTB, "RX: link down");
- ntb_qp_link_down(qp);
- hdr->flags = 0;
- return (EAGAIN);
- }
-
- if (hdr->ver != (uint32_t)qp->rx_pkts) {
- CTR2(KTR_NTB,"RX: ver != rx_pkts (%x != %lx). "
- "Returning entry to rx_pend_q", hdr->ver, qp->rx_pkts);
- qp->rx_err_ver++;
- return (EIO);
- }
-
- entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q);
- if (entry == NULL) {
- qp->rx_err_no_buf++;
- CTR0(KTR_NTB, "RX: No entries in rx_pend_q");
- return (EAGAIN);
- }
- callout_stop(&qp->rx_full);
- CTR1(KTR_NTB, "RX: rx entry %p from rx_pend_q", entry);
-
- entry->x_hdr = hdr;
- entry->index = qp->rx_index;
-
- if (hdr->len > entry->len) {
- CTR2(KTR_NTB, "RX: len too long. Wanted %ju got %ju",
- (uintmax_t)hdr->len, (uintmax_t)entry->len);
- qp->rx_err_oflow++;
-
- entry->len = -EIO;
- entry->flags |= IF_NTB_DESC_DONE_FLAG;
+ struct ntb_net_ctx *sc = if_getsoftc(ifp);
+ struct ntb_net_queue *q;
+ int error, i;
- taskqueue_enqueue(taskqueue_swi, &qp->rx_completion_task);
- } else {
- qp->rx_bytes += hdr->len;
- qp->rx_pkts++;
-
- CTR1(KTR_NTB, "RX: received %ld rx_pkts", qp->rx_pkts);
-
- entry->len = hdr->len;
-
- ntb_memcpy_rx(qp, entry, offset);
- }
-
- qp->rx_index++;
- qp->rx_index %= qp->rx_max_entry;
+ CTR0(KTR_NTB, "TX: ntb_transmit");
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ i = m->m_pkthdr.flowid % sc->num_queues;
+ else
+ i = curcpu % sc->num_queues;
+ q = &sc->queues[i];
+
+ error = drbr_enqueue(ifp, q->br, m);
+ if (error)
+ return (error);
+
+ if (mtx_trylock(&q->tx_lock)) {
+ ntb_transmit_locked(q);
+ mtx_unlock(&q->tx_lock);
+ } else
+ taskqueue_enqueue(q->tx_tq, &q->tx_task);
return (0);
}
static void
-ntb_memcpy_rx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry,
- void *offset)
+ntb_handle_tx(void *arg, int pending)
{
- struct ifnet *ifp = entry->cb_data;
- unsigned int len = entry->len;
- struct mbuf *m;
-
- CTR2(KTR_NTB, "RX: copying %d bytes from offset %p", len, offset);
- m = m_devget(offset, len, 0, ifp, NULL);
- m->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID;
+ struct ntb_net_queue *q = arg;
- entry->buf = (void *)m;
-
- /* Ensure that the data is globally visible before clearing the flag */
- wmb();
-
- CTR2(KTR_NTB, "RX: copied entry %p to mbuf %p.", entry, m);
- ntb_rx_copy_callback(qp, entry);
+ mtx_lock(&q->tx_lock);
+ ntb_transmit_locked(q);
+ mtx_unlock(&q->tx_lock);
}
-static inline void
-ntb_rx_copy_callback(struct ntb_transport_qp *qp, void *data)
+static void
+ntb_qp_full(void *arg)
{
- struct ntb_queue_entry *entry;
+ struct ntb_net_queue *q = arg;
- entry = data;
- entry->flags |= IF_NTB_DESC_DONE_FLAG;
- taskqueue_enqueue(taskqueue_swi, &qp->rx_completion_task);
+ CTR0(KTR_NTB, "TX: qp_full callout");
+ if (ntb_transport_tx_free_entry(q->qp) > 0)
+ taskqueue_enqueue(q->tx_tq, &q->tx_task);
+ else
+ callout_schedule_sbt(&q->queue_full,
+ SBT_1MS / 4, SBT_1MS / 4, 0);
}
static void
-ntb_complete_rxc(void *arg, int pending)
+ntb_qflush(if_t ifp)
{
- struct ntb_transport_qp *qp = arg;
- struct ntb_queue_entry *entry;
+ struct ntb_net_ctx *sc = if_getsoftc(ifp);
+ struct ntb_net_queue *q;
struct mbuf *m;
- unsigned len;
-
- CTR0(KTR_NTB, "RX: rx_completion_task");
-
- mtx_lock_spin(&qp->ntb_rx_q_lock);
-
- while (!STAILQ_EMPTY(&qp->rx_post_q)) {
- entry = STAILQ_FIRST(&qp->rx_post_q);
- if ((entry->flags & IF_NTB_DESC_DONE_FLAG) == 0)
- break;
-
- entry->x_hdr->flags = 0;
- iowrite32(entry->index, &qp->rx_info->entry);
-
- STAILQ_REMOVE_HEAD(&qp->rx_post_q, entry);
-
- len = entry->len;
- m = entry->buf;
-
- /*
- * Re-initialize queue_entry for reuse; rx_handler takes
- * ownership of the mbuf.
- */
- entry->buf = NULL;
- entry->len = transport_mtu;
- entry->cb_data = qp->transport->ifp;
-
- STAILQ_INSERT_TAIL(&qp->rx_pend_q, entry, entry);
-
- mtx_unlock_spin(&qp->ntb_rx_q_lock);
+ int i;
- CTR2(KTR_NTB, "RX: completing entry %p, mbuf %p", entry, m);
- if (qp->rx_handler != NULL && qp->client_ready)
- qp->rx_handler(qp, qp->cb_data, m, len);
- else
+ for (i = 0; i < sc->num_queues; i++) {
+ q = &sc->queues[i];
+ mtx_lock(&q->tx_lock);
+ while ((m = buf_ring_dequeue_sc(q->br)) != NULL)
m_freem(m);
-
- mtx_lock_spin(&qp->ntb_rx_q_lock);
- }
-
- mtx_unlock_spin(&qp->ntb_rx_q_lock);
-}
-
-static void
-ntb_transport_doorbell_callback(void *data, uint32_t vector)
-{
- struct ntb_transport_ctx *nt = data;
- struct ntb_transport_qp *qp;
- struct _qpset db_bits;
- uint64_t vec_mask;
- unsigned qp_num;
-
- BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &db_bits);
- BIT_NAND(QP_SETSIZE, &db_bits, &nt->qp_bitmap_free);
-
- vec_mask = ntb_db_vector_mask(nt->ntb, vector);
- while (vec_mask != 0) {
- qp_num = ffsll(vec_mask) - 1;
-
- if (test_bit(qp_num, &db_bits)) {
- qp = &nt->qp_vec[qp_num];
- taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
- }
-
- vec_mask &= ~(1ull << qp_num);
- }
-}
-
-/* Link Event handler */
-static void
-ntb_transport_event_callback(void *data)
-{
- struct ntb_transport_ctx *nt = data;
-
- if (ntb_link_is_up(nt->ntb, NULL, NULL)) {
- ntb_printf(1, "HW link up\n");
- callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt);
- } else {
- ntb_printf(1, "HW link down\n");
- taskqueue_enqueue(taskqueue_swi, &nt->link_cleanup);
+ mtx_unlock(&q->tx_lock);
}
+ if_qflush(ifp);
}
-/* Link bring up */
+/* Network Device Callbacks */
static void
-ntb_transport_link_work(void *arg)
-{
- struct ntb_transport_ctx *nt = arg;
- struct ntb_softc *ntb = nt->ntb;
- struct ntb_transport_qp *qp;
- uint64_t val64, size;
- uint32_t val;
- unsigned i;
- int rc;
-
- /* send the local info, in the opposite order of the way we read it */
- for (i = 0; i < nt->mw_count; i++) {
- size = nt->mw_vec[i].phys_size;
-
- if (max_mw_size != 0 && size > max_mw_size)
- size = max_mw_size;
-
- ntb_peer_spad_write(ntb, IF_NTB_MW0_SZ_HIGH + (i * 2),
- size >> 32);
- ntb_peer_spad_write(ntb, IF_NTB_MW0_SZ_LOW + (i * 2), size);
- }
-
- ntb_peer_spad_write(ntb, IF_NTB_NUM_MWS, nt->mw_count);
-
- ntb_peer_spad_write(ntb, IF_NTB_NUM_QPS, nt->qp_count);
-
- ntb_peer_spad_write(ntb, IF_NTB_VERSION, NTB_TRANSPORT_VERSION);
-
- /* Query the remote side for its info */
- val = 0;
- ntb_spad_read(ntb, IF_NTB_VERSION, &val);
- if (val != NTB_TRANSPORT_VERSION)
- goto out;
-
- ntb_spad_read(ntb, IF_NTB_NUM_QPS, &val);
- if (val != nt->qp_count)
- goto out;
-
- ntb_spad_read(ntb, IF_NTB_NUM_MWS, &val);
- if (val != nt->mw_count)
- goto out;
-
- for (i = 0; i < nt->mw_count; i++) {
- ntb_spad_read(ntb, IF_NTB_MW0_SZ_HIGH + (i * 2), &val);
- val64 = (uint64_t)val << 32;
-
- ntb_spad_read(ntb, IF_NTB_MW0_SZ_LOW + (i * 2), &val);
- val64 |= val;
-
- rc = ntb_set_mw(nt, i, val64);
- if (rc != 0)
- goto free_mws;
- }
-
- nt->link_is_up = true;
- ntb_printf(1, "transport link up\n");
-
- for (i = 0; i < nt->qp_count; i++) {
- qp = &nt->qp_vec[i];
-
- ntb_transport_setup_qp_mw(nt, i);
-
- if (qp->client_ready)
- callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
- }
-
- return;
-
-free_mws:
- for (i = 0; i < nt->mw_count; i++)
- ntb_free_mw(nt, i);
-out:
- if (ntb_link_is_up(ntb, NULL, NULL))
- callout_reset(&nt->link_work,
- NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_transport_link_work, nt);
-}
-
-static int
-ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, size_t size)
+ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
+ int len)
{
- struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
- size_t xlat_size, buff_size;
- int rc;
-
- if (size == 0)
- return (EINVAL);
-
- xlat_size = roundup(size, mw->xlat_align_size);
- buff_size = xlat_size;
-
- /* No need to re-setup */
- if (mw->xlat_size == xlat_size)
- return (0);
-
- if (mw->buff_size != 0)
- ntb_free_mw(nt, num_mw);
-
- /* Alloc memory for receiving data. Must be aligned */
- mw->xlat_size = xlat_size;
- mw->buff_size = buff_size;
-
- mw->virt_addr = contigmalloc(mw->buff_size, M_NTB_IF, M_ZERO, 0,
- mw->addr_limit, mw->xlat_align, 0);
- if (mw->virt_addr == NULL) {
- ntb_printf(0, "Unable to allocate MW buffer of size %zu/%zu\n",
- mw->buff_size, mw->xlat_size);
- mw->xlat_size = 0;
- mw->buff_size = 0;
- return (ENOMEM);
- }
- /* TODO: replace with bus_space_* functions */
- mw->dma_addr = vtophys(mw->virt_addr);
-
- /*
- * Ensure that the allocation from contigmalloc is aligned as
- * requested. XXX: This may not be needed -- brought in for parity
- * with the Linux driver.
- */
- if (mw->dma_addr % mw->xlat_align != 0) {
- ntb_printf(0,
- "DMA memory 0x%jx not aligned to BAR size 0x%zx\n",
- (uintmax_t)mw->dma_addr, size);
- ntb_free_mw(nt, num_mw);
- return (ENOMEM);
- }
-
- /* Notify HW the memory location of the receive buffer */
- rc = ntb_mw_set_trans(nt->ntb, num_mw, mw->dma_addr, mw->xlat_size);
- if (rc) {
- ntb_printf(0, "Unable to set mw%d translation\n", num_mw);
- ntb_free_mw(nt, num_mw);
- return (rc);
- }
- return (0);
+ m_freem(data);
+ CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data);
}
static void
-ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
+ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
+ int len)
{
- struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
+ struct ntb_net_queue *q = qp_data;
+ struct ntb_net_ctx *sc = q->sc;
+ struct mbuf *m = data;
+ if_t ifp = q->ifp;
+ uint16_t proto;
- if (mw->virt_addr == NULL)
+ CTR1(KTR_NTB, "RX: rx handler (%d)", len);
+ if (len < 0) {
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
return;
-
- ntb_mw_clear_trans(nt->ntb, num_mw);
- contigfree(mw->virt_addr, mw->xlat_size, M_NTB_IF);
- mw->xlat_size = 0;
- mw->buff_size = 0;
- mw->virt_addr = NULL;
-}
-
-static int
-ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, unsigned int qp_num)
-{
- struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
- struct ntb_transport_mw *mw;
- void *offset;
- ntb_q_idx_t i;
- size_t rx_size;
- unsigned num_qps_mw, mw_num, mw_count;
-
- mw_count = nt->mw_count;
- mw_num = QP_TO_MW(nt, qp_num);
- mw = &nt->mw_vec[mw_num];
-
- if (mw->virt_addr == NULL)
- return (ENOMEM);
-
- if (nt->qp_count % mw_count && mw_num + 1 < nt->qp_count / mw_count)
- num_qps_mw = nt->qp_count / mw_count + 1;
- else
- num_qps_mw = nt->qp_count / mw_count;
-
- rx_size = mw->xlat_size / num_qps_mw;
- qp->rx_buff = mw->virt_addr + rx_size * (qp_num / mw_count);
- rx_size -= sizeof(struct ntb_rx_info);
-
- qp->remote_rx_info = (void*)(qp->rx_buff + rx_size);
-
- /* Due to house-keeping, there must be at least 2 buffs */
- qp->rx_max_frame = qmin(rx_size / 2,
- transport_mtu + sizeof(struct ntb_payload_header));
- qp->rx_max_entry = rx_size / qp->rx_max_frame;
- qp->rx_index = 0;
-
- qp->remote_rx_info->entry = qp->rx_max_entry - 1;
-
- /* Set up the hdr offsets with 0s */
- for (i = 0; i < qp->rx_max_entry; i++) {
- offset = (void *)(qp->rx_buff + qp->rx_max_frame * (i + 1) -
- sizeof(struct ntb_payload_header));
- memset(offset, 0, sizeof(struct ntb_payload_header));
}
- qp->rx_pkts = 0;
- qp->tx_pkts = 0;
- qp->tx_index = 0;
-
- return (0);
-}
-
-static void
-ntb_qp_link_work(void *arg)
-{
- struct ntb_transport_qp *qp = arg;
- struct ntb_softc *ntb = qp->ntb;
- struct ntb_transport_ctx *nt = qp->transport;
- uint32_t val, dummy;
-
- ntb_spad_read(ntb, IF_NTB_QP_LINKS, &val);
-
- ntb_peer_spad_write(ntb, IF_NTB_QP_LINKS, val | (1ull << qp->qp_num));
-
- /* query remote spad for qp ready bits */
- ntb_peer_spad_read(ntb, IF_NTB_QP_LINKS, &dummy);
-
- /* See if the remote side is up */
- if ((val & (1ull << qp->qp_num)) != 0) {
- ntb_printf(2, "qp link up\n");
- qp->link_is_up = true;
-
- if (qp->event_handler != NULL)
- qp->event_handler(qp->cb_data, NTB_LINK_UP);
-
- taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
- } else if (nt->link_is_up)
- callout_reset(&qp->link_work,
- NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp);
-}
-
-/* Link down event*/
-static void
-ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
-{
- struct ntb_transport_qp *qp;
- struct _qpset qp_bitmap_alloc;
- unsigned i;
-
- BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &qp_bitmap_alloc);
- BIT_NAND(QP_SETSIZE, &qp_bitmap_alloc, &nt->qp_bitmap_free);
-
- /* Pass along the info to any clients */
- for (i = 0; i < nt->qp_count; i++)
- if (test_bit(i, &qp_bitmap_alloc)) {
- qp = &nt->qp_vec[i];
- ntb_qp_link_cleanup(qp);
- callout_drain(&qp->link_work);
- }
-
- if (!nt->link_is_up)
- callout_drain(&nt->link_work);
-
- /*
- * The scratchpad registers keep the values if the remote side
- * goes down, blast them now to give them a sane value the next
- * time they are accessed
- */
- for (i = 0; i < IF_NTB_MAX_SPAD; i++)
- ntb_spad_write(nt->ntb, i, 0);
-}
-
-static void
-ntb_transport_link_cleanup_work(void *arg, int pending __unused)
-{
-
- ntb_transport_link_cleanup(arg);
-}
-
-static void
-ntb_qp_link_down(struct ntb_transport_qp *qp)
-{
-
- ntb_qp_link_cleanup(qp);
-}
-
-static void
-ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
-{
-
- qp->link_is_up = false;
-
- qp->tx_index = qp->rx_index = 0;
- qp->tx_bytes = qp->rx_bytes = 0;
- qp->tx_pkts = qp->rx_pkts = 0;
-
- qp->rx_ring_empty = 0;
- qp->tx_ring_full = 0;
-
- qp->rx_err_no_buf = qp->tx_err_no_buf = 0;
- qp->rx_err_oflow = qp->rx_err_ver = 0;
-}
-
-static void
-ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
-{
- struct ntb_transport_ctx *nt = qp->transport;
-
- callout_drain(&qp->link_work);
- ntb_qp_link_down_reset(qp);
-
- if (qp->event_handler != NULL)
- qp->event_handler(qp->cb_data, NTB_LINK_DOWN);
-
- if (nt->link_is_up)
- callout_reset(&qp->link_work,
- NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp);
-}
-
-/* Link commanded down */
-/**
- * ntb_transport_link_down - Notify NTB transport to no longer enqueue data
- * @qp: NTB transport layer queue to be disabled
- *
- * Notify NTB transport layer of client's desire to no longer receive data on
- * transport queue specified. It is the client's responsibility to ensure all
- * entries on queue are purged or otherwise handled appropriately.
- */
-static void
-ntb_transport_link_down(struct ntb_transport_qp *qp)
-{
- uint32_t val;
-
- if (qp == NULL)
- return;
-
- qp->client_ready = false;
-
- ntb_spad_read(qp->ntb, IF_NTB_QP_LINKS, &val);
-
- ntb_peer_spad_write(qp->ntb, IF_NTB_QP_LINKS,
- val & ~(1 << qp->qp_num));
-
- if (qp->link_is_up)
- ntb_send_link_down(qp);
- else
- callout_drain(&qp->link_work);
-}
-
-static void
-ntb_send_link_down(struct ntb_transport_qp *qp)
-{
- struct ntb_queue_entry *entry;
- int i, rc;
-
- if (!qp->link_is_up)
- return;
-
- for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) {
- entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
- if (entry != NULL)
+ m->m_pkthdr.rcvif = ifp;
+ if (sc->num_queues > 1) {
+ m->m_pkthdr.flowid = q - sc->queues;
+ M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
+ }
+ if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
+ m_copydata(m, 12, 2, (void *)&proto);
+ switch (ntohs(proto)) {
+ case ETHERTYPE_IP:
+ if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
+ m->m_pkthdr.csum_data = 0xffff;
+ m->m_pkthdr.csum_flags = NTB_CSUM_SET;
+ }
+ break;
+ case ETHERTYPE_IPV6:
+ if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) {
+ m->m_pkthdr.csum_data = 0xffff;
+ m->m_pkthdr.csum_flags = NTB_CSUM_SET;
+ }
break;
- pause("NTB Wait for link down", hz / 10);
+ }
}
-
- if (entry == NULL)
- return;
-
- entry->cb_data = NULL;
- entry->buf = NULL;
- entry->len = 0;
- entry->flags = IF_NTB_LINK_DOWN_FLAG;
-
- mtx_lock(&qp->transport->tx_lock);
- rc = ntb_process_tx(qp, entry);
- if (rc != 0)
- printf("ntb: Failed to send link down\n");
- mtx_unlock(&qp->transport->tx_lock);
-
- ntb_qp_link_down_reset(qp);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_input(ifp, m);
}
-
-/* List Management */
-
static void
-ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
- struct ntb_queue_list *list)
-{
-
- mtx_lock_spin(lock);
- STAILQ_INSERT_TAIL(list, entry, entry);
- mtx_unlock_spin(lock);
-}
-
-static struct ntb_queue_entry *
-ntb_list_rm(struct mtx *lock, struct ntb_queue_list *list)
-{
- struct ntb_queue_entry *entry;
-
- mtx_lock_spin(lock);
- if (STAILQ_EMPTY(list)) {
- entry = NULL;
- goto out;
- }
- entry = STAILQ_FIRST(list);
- STAILQ_REMOVE_HEAD(list, entry);
-out:
- mtx_unlock_spin(lock);
-
- return (entry);
-}
-
-static struct ntb_queue_entry *
-ntb_list_mv(struct mtx *lock, struct ntb_queue_list *from,
- struct ntb_queue_list *to)
+ntb_net_event_handler(void *data, enum ntb_link_event status)
{
- struct ntb_queue_entry *entry;
+ struct ntb_net_queue *q = data;
+ int new_state;
- mtx_lock_spin(lock);
- if (STAILQ_EMPTY(from)) {
- entry = NULL;
- goto out;
+ switch (status) {
+ case NTB_LINK_DOWN:
+ new_state = LINK_STATE_DOWN;
+ break;
+ case NTB_LINK_UP:
+ new_state = LINK_STATE_UP;
+ break;
+ default:
+ new_state = LINK_STATE_UNKNOWN;
+ break;
}
- entry = STAILQ_FIRST(from);
- STAILQ_REMOVE_HEAD(from, entry);
- STAILQ_INSERT_TAIL(to, entry, entry);
-
-out:
- mtx_unlock_spin(lock);
- return (entry);
+ if_link_state_change(q->ifp, new_state);
}
/* Helper functions */
@@ -1688,27 +493,24 @@ static void
create_random_local_eui48(u_char *eaddr)
{
static uint8_t counter = 0;
- uint32_t seed = ticks;
eaddr[0] = EUI48_LOCALLY_ADMINISTERED;
- memcpy(&eaddr[1], &seed, sizeof(uint32_t));
+ arc4rand(&eaddr[1], 4, 0);
eaddr[5] = counter++;
}
-/**
- * ntb_transport_max_size - Query the max payload size of a qp
- * @qp: NTB transport layer queue to be queried
- *
- * Query the maximum payload size permissible on the given qp
- *
- * RETURNS: the max payload size of a qp
- */
-static unsigned int
-ntb_transport_max_size(struct ntb_transport_qp *qp)
-{
-
- if (qp == NULL)
- return (0);
+static device_method_t ntb_net_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, ntb_net_probe),
+ DEVMETHOD(device_attach, ntb_net_attach),
+ DEVMETHOD(device_detach, ntb_net_detach),
+ DEVMETHOD_END
+};
- return (qp->tx_max_frame - sizeof(struct ntb_payload_header));
-}
+devclass_t ntb_net_devclass;
+static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods,
+ sizeof(struct ntb_net_ctx));
+DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, ntb_net_devclass,
+ NULL, NULL);
+MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1);
+MODULE_VERSION(if_ntb, 1);
diff --git a/sys/dev/ntb/ntb.c b/sys/dev/ntb/ntb.c
new file mode 100644
index 0000000..44c0c61
--- /dev/null
+++ b/sys/dev/ntb/ntb.c
@@ -0,0 +1,462 @@
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/rmlock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+
+#include "ntb.h"
+
+devclass_t ntb_hw_devclass;
+SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
+
+struct ntb_child {
+ device_t dev;
+ int enabled;
+ int mwoff;
+ int mwcnt;
+ int spadoff;
+ int spadcnt;
+ int dboff;
+ int dbmask;
+ void *ctx;
+ const struct ntb_ctx_ops *ctx_ops;
+ struct rmlock ctx_lock;
+ struct ntb_child *next;
+};
+
+int
+ntb_register_device(device_t dev)
+{
+ struct ntb_child **cpp = device_get_softc(dev);
+ struct ntb_child *nc;
+ int i, mw, mwu, mwt, spad, spadu, spadt, db, dbu, dbt;
+ char cfg[128] = "";
+ char buf[32];
+ char *n, *np, *c, *p, *name;
+
+ mwu = 0;
+ mwt = NTB_MW_COUNT(dev);
+ spadu = 0;
+ spadt = NTB_SPAD_COUNT(dev);
+ dbu = 0;
+ dbt = flsll(NTB_DB_VALID_MASK(dev));
+
+ device_printf(dev, "%d memory windows, %d scratchpads, "
+ "%d doorbells\n", mwt, spadt, dbt);
+
+ snprintf(buf, sizeof(buf), "hint.%s.%d.config", device_get_name(dev),
+ device_get_unit(dev));
+ TUNABLE_STR_FETCH(buf, cfg, sizeof(cfg));
+ n = cfg;
+ i = 0;
+ while ((c = strsep(&n, ",")) != NULL) {
+ np = c;
+ name = strsep(&np, ":");
+ if (name != NULL && name[0] == 0)
+ name = NULL;
+ p = strsep(&np, ":");
+ mw = (p && p[0] != 0) ? strtol(p, NULL, 10) : mwt - mwu;
+ p = strsep(&np, ":");
+ spad = (p && p[0] != 0) ? strtol(p, NULL, 10) : spadt - spadu;
+ db = (np && np[0] != 0) ? strtol(np, NULL, 10) : dbt - dbu;
+
+ if (mw > mwt - mwu || spad > spadt - spadu || db > dbt - dbu) {
+ device_printf(dev, "Not enough resources for config\n");
+ break;
+ }
+
+ nc = malloc(sizeof(*nc), M_DEVBUF, M_WAITOK | M_ZERO);
+ nc->mwoff = mwu;
+ nc->mwcnt = mw;
+ nc->spadoff = spadu;
+ nc->spadcnt = spad;
+ nc->dboff = dbu;
+ nc->dbmask = (db == 0) ? 0 : (0xffffffffffffffff >> (64 - db));
+ rm_init(&nc->ctx_lock, "ntb ctx");
+ nc->dev = device_add_child(dev, name, -1);
+ if (nc->dev == NULL) {
+ ntb_unregister_device(dev);
+ return (ENOMEM);
+ }
+ device_set_ivars(nc->dev, nc);
+ *cpp = nc;
+ cpp = &nc->next;
+
+ if (bootverbose) {
+ device_printf(dev, "%d \"%s\":", i, name);
+ if (mw > 0) {
+ printf(" memory windows %d", mwu);
+ if (mw > 1)
+ printf("-%d", mwu + mw - 1);
+ }
+ if (spad > 0) {
+ printf(" scratchpads %d", spadu);
+ if (spad > 1)
+ printf("-%d", spadu + spad - 1);
+ }
+ if (db > 0) {
+ printf(" doorbells %d", dbu);
+ if (db > 1)
+ printf("-%d", dbu + db - 1);
+ }
+ printf("\n");
+ }
+
+ mwu += mw;
+ spadu += spad;
+ dbu += db;
+ i++;
+ }
+
+ bus_generic_attach(dev);
+ return (0);
+}
+
+int
+ntb_unregister_device(device_t dev)
+{
+ struct ntb_child **cpp = device_get_softc(dev);
+ struct ntb_child *nc;
+ int error = 0;
+
+ while ((nc = *cpp) != NULL) {
+ *cpp = (*cpp)->next;
+ error = device_delete_child(dev, nc->dev);
+ if (error)
+ break;
+ rm_destroy(&nc->ctx_lock);
+ free(nc, M_DEVBUF);
+ }
+ return (error);
+}
+
+void
+ntb_link_event(device_t dev)
+{
+ struct ntb_child **cpp = device_get_softc(dev);
+ struct ntb_child *nc;
+ struct rm_priotracker ctx_tracker;
+
+ for (nc = *cpp; nc != NULL; nc = nc->next) {
+ rm_rlock(&nc->ctx_lock, &ctx_tracker);
+ if (nc->ctx_ops != NULL && nc->ctx_ops->link_event != NULL)
+ nc->ctx_ops->link_event(nc->ctx);
+ rm_runlock(&nc->ctx_lock, &ctx_tracker);
+ }
+}
+
+void
+ntb_db_event(device_t dev, uint32_t vec)
+{
+ struct ntb_child **cpp = device_get_softc(dev);
+ struct ntb_child *nc;
+ struct rm_priotracker ctx_tracker;
+
+ for (nc = *cpp; nc != NULL; nc = nc->next) {
+ rm_rlock(&nc->ctx_lock, &ctx_tracker);
+ if (nc->ctx_ops != NULL && nc->ctx_ops->db_event != NULL)
+ nc->ctx_ops->db_event(nc->ctx, vec);
+ rm_runlock(&nc->ctx_lock, &ctx_tracker);
+ }
+}
+
+bool
+ntb_link_is_up(device_t ntb, enum ntb_speed *speed, enum ntb_width *width)
+{
+
+ return (NTB_LINK_IS_UP(device_get_parent(ntb), speed, width));
+}
+
+int
+ntb_link_enable(device_t ntb, enum ntb_speed speed, enum ntb_width width)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+ struct ntb_child **cpp = device_get_softc(device_get_parent(nc->dev));
+ struct ntb_child *nc1;
+
+ for (nc1 = *cpp; nc1 != NULL; nc1 = nc1->next) {
+ if (nc1->enabled) {
+ nc->enabled = 1;
+ return (0);
+ }
+ }
+ nc->enabled = 1;
+ return (NTB_LINK_ENABLE(device_get_parent(ntb), speed, width));
+}
+
+int
+ntb_link_disable(device_t ntb)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+ struct ntb_child **cpp = device_get_softc(device_get_parent(nc->dev));
+ struct ntb_child *nc1;
+
+ if (!nc->enabled)
+ return (0);
+ nc->enabled = 0;
+ for (nc1 = *cpp; nc1 != NULL; nc1 = nc1->next) {
+ if (nc1->enabled)
+ return (0);
+ }
+ return (NTB_LINK_DISABLE(device_get_parent(ntb)));
+}
+
+bool
+ntb_link_enabled(device_t ntb)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (nc->enabled && NTB_LINK_ENABLED(device_get_parent(ntb)));
+}
+
+int
+ntb_set_ctx(device_t ntb, void *ctx, const struct ntb_ctx_ops *ctx_ops)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ if (ctx == NULL || ctx_ops == NULL)
+ return (EINVAL);
+
+ rm_wlock(&nc->ctx_lock);
+ if (nc->ctx_ops != NULL) {
+ rm_wunlock(&nc->ctx_lock);
+ return (EINVAL);
+ }
+ nc->ctx = ctx;
+ nc->ctx_ops = ctx_ops;
+ rm_wunlock(&nc->ctx_lock);
+
+ return (0);
+}
+
+void *
+ntb_get_ctx(device_t ntb, const struct ntb_ctx_ops **ctx_ops)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ KASSERT(nc->ctx != NULL && nc->ctx_ops != NULL, ("bogus"));
+ if (ctx_ops != NULL)
+ *ctx_ops = nc->ctx_ops;
+ return (nc->ctx);
+}
+
+void
+ntb_clear_ctx(device_t ntb)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ rm_wlock(&nc->ctx_lock);
+ nc->ctx = NULL;
+ nc->ctx_ops = NULL;
+ rm_wunlock(&nc->ctx_lock);
+}
+
+uint8_t
+ntb_mw_count(device_t ntb)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (nc->mwcnt);
+}
+
+int
+ntb_mw_get_range(device_t ntb, unsigned mw_idx, vm_paddr_t *base,
+ caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
+ bus_addr_t *plimit)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_MW_GET_RANGE(device_get_parent(ntb), mw_idx + nc->mwoff,
+ base, vbase, size, align, align_size, plimit));
+}
+
+int
+ntb_mw_set_trans(device_t ntb, unsigned mw_idx, bus_addr_t addr, size_t size)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_MW_SET_TRANS(device_get_parent(ntb), mw_idx + nc->mwoff,
+ addr, size));
+}
+
+int
+ntb_mw_clear_trans(device_t ntb, unsigned mw_idx)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_MW_CLEAR_TRANS(device_get_parent(ntb), mw_idx + nc->mwoff));
+}
+
+int
+ntb_mw_get_wc(device_t ntb, unsigned mw_idx, vm_memattr_t *mode)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_MW_GET_WC(device_get_parent(ntb), mw_idx + nc->mwoff, mode));
+}
+
+int
+ntb_mw_set_wc(device_t ntb, unsigned mw_idx, vm_memattr_t mode)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_MW_SET_WC(device_get_parent(ntb), mw_idx + nc->mwoff, mode));
+}
+
+uint8_t
+ntb_spad_count(device_t ntb)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (nc->spadcnt);
+}
+
+void
+ntb_spad_clear(device_t ntb)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+ unsigned i;
+
+ for (i = 0; i < nc->spadcnt; i++)
+ NTB_SPAD_WRITE(device_get_parent(ntb), i + nc->spadoff, 0);
+}
+
+int
+ntb_spad_write(device_t ntb, unsigned int idx, uint32_t val)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_SPAD_WRITE(device_get_parent(ntb), idx + nc->spadoff, val));
+}
+
+int
+ntb_spad_read(device_t ntb, unsigned int idx, uint32_t *val)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_SPAD_READ(device_get_parent(ntb), idx + nc->spadoff, val));
+}
+
+int
+ntb_peer_spad_write(device_t ntb, unsigned int idx, uint32_t val)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_PEER_SPAD_WRITE(device_get_parent(ntb), idx + nc->spadoff,
+ val));
+}
+
+int
+ntb_peer_spad_read(device_t ntb, unsigned int idx, uint32_t *val)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_PEER_SPAD_READ(device_get_parent(ntb), idx + nc->spadoff,
+ val));
+}
+
+uint64_t
+ntb_db_valid_mask(device_t ntb)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (nc->dbmask);
+}
+
+int
+ntb_db_vector_count(device_t ntb)
+{
+
+ return (NTB_DB_VECTOR_COUNT(device_get_parent(ntb)));
+}
+
+uint64_t
+ntb_db_vector_mask(device_t ntb, uint32_t vector)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return ((NTB_DB_VECTOR_MASK(device_get_parent(ntb), vector)
+ >> nc->dboff) & nc->dbmask);
+}
+
+int
+ntb_peer_db_addr(device_t ntb, bus_addr_t *db_addr, vm_size_t *db_size)
+{
+
+ return (NTB_PEER_DB_ADDR(device_get_parent(ntb), db_addr, db_size));
+}
+
+void
+ntb_db_clear(device_t ntb, uint64_t bits)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_DB_CLEAR(device_get_parent(ntb), bits << nc->dboff));
+}
+
+void
+ntb_db_clear_mask(device_t ntb, uint64_t bits)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_DB_CLEAR_MASK(device_get_parent(ntb), bits << nc->dboff));
+}
+
+uint64_t
+ntb_db_read(device_t ntb)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return ((NTB_DB_READ(device_get_parent(ntb)) >> nc->dboff)
+ & nc->dbmask);
+}
+
+void
+ntb_db_set_mask(device_t ntb, uint64_t bits)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_DB_SET_MASK(device_get_parent(ntb), bits << nc->dboff));
+}
+
+void
+ntb_peer_db_set(device_t ntb, uint64_t bits)
+{
+ struct ntb_child *nc = device_get_ivars(ntb);
+
+ return (NTB_PEER_DB_SET(device_get_parent(ntb), bits << nc->dboff));
+}
+
+MODULE_VERSION(ntb, 1);
diff --git a/sys/dev/ntb/ntb.h b/sys/dev/ntb/ntb.h
new file mode 100644
index 0000000..8593c65
--- /dev/null
+++ b/sys/dev/ntb/ntb.h
@@ -0,0 +1,409 @@
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NTB_H_
+#define _NTB_H_
+
+#include "ntb_if.h"
+
+extern devclass_t ntb_hw_devclass;
+SYSCTL_DECL(_hw_ntb);
+
+int ntb_register_device(device_t ntb);
+int ntb_unregister_device(device_t ntb);
+
+/*
+ * ntb_link_event() - notify driver context of a change in link status
+ * @ntb: NTB device context
+ *
+ * Notify the driver context that the link status may have changed. The driver
+ * should call intb_link_is_up() to get the current status.
+ */
+void ntb_link_event(device_t ntb);
+
+/*
+ * ntb_db_event() - notify driver context of a doorbell event
+ * @ntb: NTB device context
+ * @vector: Interrupt vector number
+ *
+ * Notify the driver context of a doorbell event. If hardware supports
+ * multiple interrupt vectors for doorbells, the vector number indicates which
+ * vector received the interrupt. The vector number is relative to the first
+ * vector used for doorbells, starting at zero, and must be less than
+ * ntb_db_vector_count(). The driver may call ntb_db_read() to check which
+ * doorbell bits need service, and ntb_db_vector_mask() to determine which of
+ * those bits are associated with the vector number.
+ */
+void ntb_db_event(device_t ntb, uint32_t vec);
+
+/*
+ * ntb_link_is_up() - get the current ntb link state
+ * @ntb: NTB device context
+ * @speed: OUT - The link speed expressed as PCIe generation number
+ * @width: OUT - The link width expressed as the number of PCIe lanes
+ *
+ * RETURNS: true or false based on the hardware link state
+ */
+bool ntb_link_is_up(device_t ntb, enum ntb_speed *speed, enum ntb_width *width);
+
+/*
+ * ntb_link_enable() - enable the link on the secondary side of the ntb
+ * @ntb: NTB device context
+ * @max_speed: The maximum link speed expressed as PCIe generation number[0]
+ * @max_width: The maximum link width expressed as the number of PCIe lanes[0]
+ *
+ * Enable the link on the secondary side of the ntb. This can only be done
+ * from the primary side of the ntb in primary or b2b topology. The ntb device
+ * should train the link to its maximum speed and width, or the requested speed
+ * and width, whichever is smaller, if supported.
+ *
+ * Return: Zero on success, otherwise an error number.
+ *
+ * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed
+ * and width input will be ignored.
+ */
+int ntb_link_enable(device_t ntb, enum ntb_speed speed, enum ntb_width width);
+
+/*
+ * ntb_link_disable() - disable the link on the secondary side of the ntb
+ * @ntb: NTB device context
+ *
+ * Disable the link on the secondary side of the ntb. This can only be done
+ * from the primary side of the ntb in primary or b2b topology. The ntb device
+ * should disable the link. Returning from this call must indicate that a
+ * barrier has passed, though with no more writes may pass in either direction
+ * across the link, except if this call returns an error number.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+int ntb_link_disable(device_t ntb);
+
+/*
+ * get enable status of the link on the secondary side of the ntb
+ */
+bool ntb_link_enabled(device_t ntb);
+
+/*
+ * ntb_set_ctx() - associate a driver context with an ntb device
+ * @ntb: NTB device context
+ * @ctx: Driver context
+ * @ctx_ops: Driver context operations
+ *
+ * Associate a driver context and operations with a ntb device. The context is
+ * provided by the client driver, and the driver may associate a different
+ * context with each ntb device.
+ *
+ * Return: Zero if the context is associated, otherwise an error number.
+ */
+int ntb_set_ctx(device_t ntb, void *ctx, const struct ntb_ctx_ops *ctx_ops);
+
+/*
+ * ntb_set_ctx() - get a driver context associated with an ntb device
+ * @ntb: NTB device context
+ * @ctx_ops: Driver context operations
+ *
+ * Get a driver context and operations associated with a ntb device.
+ */
+void * ntb_get_ctx(device_t ntb, const struct ntb_ctx_ops **ctx_ops);
+
+/*
+ * ntb_clear_ctx() - disassociate any driver context from an ntb device
+ * @ntb: NTB device context
+ *
+ * Clear any association that may exist between a driver context and the ntb
+ * device.
+ */
+void ntb_clear_ctx(device_t ntb);
+
+/*
+ * ntb_mw_count() - Get the number of memory windows available for KPI
+ * consumers.
+ *
+ * (Excludes any MW wholly reserved for register access.)
+ */
+uint8_t ntb_mw_count(device_t ntb);
+
+/*
+ * ntb_mw_get_range() - get the range of a memory window
+ * @ntb: NTB device context
+ * @idx: Memory window number
+ * @base: OUT - the base address for mapping the memory window
+ * @size: OUT - the size for mapping the memory window
+ * @align: OUT - the base alignment for translating the memory window
+ * @align_size: OUT - the size alignment for translating the memory window
+ *
+ * Get the range of a memory window. NULL may be given for any output
+ * parameter if the value is not needed. The base and size may be used for
+ * mapping the memory window, to access the peer memory. The alignment and
+ * size may be used for translating the memory window, for the peer to access
+ * memory on the local system.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+int ntb_mw_get_range(device_t ntb, unsigned mw_idx, vm_paddr_t *base,
+ caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
+ bus_addr_t *plimit);
+
+/*
+ * ntb_mw_set_trans() - set the translation of a memory window
+ * @ntb: NTB device context
+ * @idx: Memory window number
+ * @addr: The dma address local memory to expose to the peer
+ * @size: The size of the local memory to expose to the peer
+ *
+ * Set the translation of a memory window. The peer may access local memory
+ * through the window starting at the address, up to the size. The address
+ * must be aligned to the alignment specified by ntb_mw_get_range(). The size
+ * must be aligned to the size alignment specified by ntb_mw_get_range(). The
+ * address must be below the plimit specified by ntb_mw_get_range() (i.e. for
+ * 32-bit BARs).
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+int ntb_mw_set_trans(device_t ntb, unsigned mw_idx, bus_addr_t addr,
+ size_t size);
+
+/*
+ * ntb_mw_clear_trans() - clear the translation of a memory window
+ * @ntb: NTB device context
+ * @idx: Memory window number
+ *
+ * Clear the translation of a memory window. The peer may no longer access
+ * local memory through the window.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+int ntb_mw_clear_trans(device_t ntb, unsigned mw_idx);
+
+/*
+ * ntb_mw_get_wc - Get the write-combine status of a memory window
+ *
+ * Returns: Zero on success, setting *wc; otherwise an error number (e.g. if
+ * idx is an invalid memory window).
+ *
+ * Mode is a VM_MEMATTR_* type.
+ */
+int ntb_mw_get_wc(device_t ntb, unsigned mw_idx, vm_memattr_t *mode);
+
+/*
+ * ntb_mw_set_wc - Set the write-combine status of a memory window
+ *
+ * If 'mode' matches the current status, this does nothing and succeeds. Mode
+ * is a VM_MEMATTR_* type.
+ *
+ * Returns: Zero on success, setting the caching attribute on the virtual
+ * mapping of the BAR; otherwise an error number (e.g. if idx is an invalid
+ * memory window, or if changing the caching attribute fails).
+ */
+int ntb_mw_set_wc(device_t ntb, unsigned mw_idx, vm_memattr_t mode);
+
+/*
+ * ntb_spad_count() - get the total scratch regs usable
+ * @ntb: pointer to ntb_softc instance
+ *
+ * This function returns the max 32bit scratchpad registers usable by the
+ * upper layer.
+ *
+ * RETURNS: total number of scratch pad registers available
+ */
+uint8_t ntb_spad_count(device_t ntb);
+
+/*
+ * ntb_get_max_spads() - zero local scratch registers
+ * @ntb: pointer to ntb_softc instance
+ *
+ * This functions overwrites all local scratchpad registers with zeroes.
+ */
+void ntb_spad_clear(device_t ntb);
+
+/*
+ * ntb_spad_write() - write to the secondary scratchpad register
+ * @ntb: pointer to ntb_softc instance
+ * @idx: index to the scratchpad register, 0 based
+ * @val: the data value to put into the register
+ *
+ * This function allows writing of a 32bit value to the indexed scratchpad
+ * register. The register resides on the secondary (external) side.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int ntb_spad_write(device_t ntb, unsigned int idx, uint32_t val);
+
+/*
+ * ntb_spad_read() - read from the primary scratchpad register
+ * @ntb: pointer to ntb_softc instance
+ * @idx: index to scratchpad register, 0 based
+ * @val: pointer to 32bit integer for storing the register value
+ *
+ * This function allows reading of the 32bit scratchpad register on
+ * the primary (internal) side.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int ntb_spad_read(device_t ntb, unsigned int idx, uint32_t *val);
+
+/*
+ * ntb_peer_spad_write() - write to the secondary scratchpad register
+ * @ntb: pointer to ntb_softc instance
+ * @idx: index to the scratchpad register, 0 based
+ * @val: the data value to put into the register
+ *
+ * This function allows writing of a 32bit value to the indexed scratchpad
+ * register. The register resides on the secondary (external) side.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int ntb_peer_spad_write(device_t ntb, unsigned int idx, uint32_t val);
+
+/*
+ * ntb_peer_spad_read() - read from the primary scratchpad register
+ * @ntb: pointer to ntb_softc instance
+ * @idx: index to scratchpad register, 0 based
+ * @val: pointer to 32bit integer for storing the register value
+ *
+ * This function allows reading of the 32bit scratchpad register on
+ * the primary (internal) side.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int ntb_peer_spad_read(device_t ntb, unsigned int idx, uint32_t *val);
+
+/*
+ * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
+ * @ntb: NTB device context
+ *
+ * Hardware may support different number or arrangement of doorbell bits.
+ *
+ * Return: A mask of doorbell bits supported by the ntb.
+ */
+uint64_t ntb_db_valid_mask(device_t ntb);
+
+/*
+ * ntb_db_vector_count() - get the number of doorbell interrupt vectors
+ * @ntb: NTB device context.
+ *
+ * Hardware may support different number of interrupt vectors.
+ *
+ * Return: The number of doorbell interrupt vectors.
+ */
+int ntb_db_vector_count(device_t ntb);
+
+/*
+ * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector
+ * @ntb: NTB device context
+ * @vector: Doorbell vector number
+ *
+ * Each interrupt vector may have a different number or arrangement of bits.
+ *
+ * Return: A mask of doorbell bits serviced by a vector.
+ */
+uint64_t ntb_db_vector_mask(device_t ntb, uint32_t vector);
+
+/*
+ * ntb_peer_db_addr() - address and size of the peer doorbell register
+ * @ntb: NTB device context.
+ * @db_addr: OUT - The address of the peer doorbell register.
+ * @db_size: OUT - The number of bytes to write the peer doorbell register.
+ *
+ * Return the address of the peer doorbell register. This may be used, for
+ * example, by drivers that offload memory copy operations to a dma engine.
+ * The drivers may wish to ring the peer doorbell at the completion of memory
+ * copy operations. For efficiency, and to simplify ordering of operations
+ * between the dma memory copies and the ringing doorbell, the driver may
+ * append one additional dma memory copy with the doorbell register as the
+ * destination, after the memory copy operations.
+ *
+ * Return: Zero on success, otherwise an error number.
+ *
+ * Note that writing the peer doorbell via a memory window will *not* generate
+ * an interrupt on the remote host; that must be done separately.
+ */
+int ntb_peer_db_addr(device_t ntb, bus_addr_t *db_addr, vm_size_t *db_size);
+
+/*
+ * ntb_db_clear() - clear bits in the local doorbell register
+ * @ntb: NTB device context.
+ * @db_bits: Doorbell bits to clear.
+ *
+ * Clear bits in the local doorbell register, arming the bits for the next
+ * doorbell.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+void ntb_db_clear(device_t ntb, uint64_t bits);
+
+/*
+ * ntb_db_clear_mask() - clear bits in the local doorbell mask
+ * @ntb: NTB device context.
+ * @db_bits: Doorbell bits to clear.
+ *
+ * Clear bits in the local doorbell mask register, allowing doorbell interrupts
+ * from being generated for those doorbell bits. If a doorbell bit is already
+ * set at the time the mask is cleared, and the corresponding mask bit is
+ * changed from set to clear, then the ntb driver must ensure that
+ * ntb_db_event() is called. If the hardware does not generate the interrupt
+ * on clearing the mask bit, then the driver must call ntb_db_event() anyway.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+void ntb_db_clear_mask(device_t ntb, uint64_t bits);
+
+/*
+ * ntb_db_read() - read the local doorbell register
+ * @ntb: NTB device context.
+ *
+ * Read the local doorbell register, and return the bits that are set.
+ *
+ * Return: The bits currently set in the local doorbell register.
+ */
+uint64_t ntb_db_read(device_t ntb);
+
+/*
+ * ntb_db_set_mask() - set bits in the local doorbell mask
+ * @ntb: NTB device context.
+ * @db_bits: Doorbell mask bits to set.
+ *
+ * Set bits in the local doorbell mask register, preventing doorbell interrupts
+ * from being generated for those doorbell bits. Bits that were already set
+ * must remain set.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+void ntb_db_set_mask(device_t ntb, uint64_t bits);
+
+/*
+ * ntb_peer_db_set() - Set the doorbell on the secondary/external side
+ * @ntb: pointer to ntb_softc instance
+ * @bit: doorbell bits to ring
+ *
+ * This function allows triggering of a doorbell on the secondary/external
+ * side that will initiate an interrupt on the remote host
+ */
+void ntb_peer_db_set(device_t ntb, uint64_t bits);
+
+#endif /* _NTB_H_ */
diff --git a/sys/dev/ntb/ntb_hw/ntb_hw.c b/sys/dev/ntb/ntb_hw/ntb_hw.c
index c1381f0..dac3699 100644
--- a/sys/dev/ntb/ntb_hw/ntb_hw.c
+++ b/sys/dev/ntb/ntb_hw/ntb_hw.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
* Copyright (C) 2013 Intel Corporation
* Copyright (C) 2015 EMC Corporation
* All rights reserved.
@@ -25,6 +26,16 @@
* SUCH DAMAGE.
*/
+/*
+ * The Non-Transparent Bridge (NTB) is a device that allows you to connect
+ * two or more systems using a PCI-e links, providing remote memory access.
+ *
+ * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs.
+ *
+ * NOTE: Much of the code in this module is shared with Linux. Any patches may
+ * be picked up and redistributed in Linux with a dual GPL/BSD license.
+ */
+
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -33,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/endian.h>
+#include <sys/interrupt.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mutex.h>
@@ -50,19 +62,7 @@ __FBSDID("$FreeBSD$");
#include <dev/pci/pcivar.h>
#include "ntb_regs.h"
-#include "ntb_hw.h"
-
-/*
- * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
- * allows you to connect two systems using a PCI-e link.
- *
- * This module contains the hardware abstraction layer for the NTB. It allows
- * you to send and receive interrupts, map the memory windows and send and
- * receive messages in the scratch-pad registers.
- *
- * NOTE: Much of the code in this module is shared with Linux. Any patches may
- * be picked up and redistributed in Linux with a dual GPL/BSD license.
- */
+#include "../ntb.h"
#define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
@@ -70,8 +70,6 @@ __FBSDID("$FreeBSD$");
#define ATOM_LINK_RECOVERY_TIME 500 /* ms */
#define BAR_HIGH_MASK (~((1ull << 12) - 1))
-#define DEVICE2SOFTC(dev) ((struct ntb_softc *) device_get_softc(dev))
-
#define NTB_MSIX_VER_GUARD 0xaabbccdd
#define NTB_MSIX_RECEIVED 0xe0f0e0f0
@@ -122,8 +120,8 @@ enum {
};
/* Device features and workarounds */
-#define HAS_FEATURE(feature) \
- ((ntb->features & (feature)) != 0)
+#define HAS_FEATURE(ntb, feature) \
+ (((ntb)->features & (feature)) != 0)
struct ntb_hw_info {
uint32_t device_id;
@@ -202,6 +200,9 @@ struct ntb_msix_data {
};
struct ntb_softc {
+ /* ntb.c context. Do not move! Must go first! */
+ void *ntb_store;
+
device_t device;
enum ntb_device_type type;
uint32_t features;
@@ -220,13 +221,7 @@ struct ntb_softc {
struct callout heartbeat_timer;
struct callout lr_timer;
- void *ntb_ctx;
- const struct ntb_ctx_ops *ctx_ops;
struct ntb_vec *msix_vec;
-#define CTX_LOCK(sc) mtx_lock(&(sc)->ctx_lock)
-#define CTX_UNLOCK(sc) mtx_unlock(&(sc)->ctx_lock)
-#define CTX_ASSERT(sc,f) mtx_assert(&(sc)->ctx_lock, (f))
- struct mtx ctx_lock;
uint32_t ppd;
enum ntb_conn_type conn_type;
@@ -258,6 +253,7 @@ struct ntb_softc {
uint64_t db_valid_mask;
uint64_t db_link_mask;
uint64_t db_mask;
+ uint64_t fake_db_bell; /* NTB_SB01BASE_LOCKUP*/
int last_ts; /* ticks @ last irq */
@@ -287,61 +283,74 @@ bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
}
#endif
-#define ntb_bar_read(SIZE, bar, offset) \
+#define intel_ntb_bar_read(SIZE, bar, offset) \
bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
ntb->bar_info[(bar)].pci_bus_handle, (offset))
-#define ntb_bar_write(SIZE, bar, offset, val) \
+#define intel_ntb_bar_write(SIZE, bar, offset, val) \
bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
-#define ntb_reg_read(SIZE, offset) ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
-#define ntb_reg_write(SIZE, offset, val) \
- ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
-#define ntb_mw_read(SIZE, offset) \
- ntb_bar_read(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), offset)
-#define ntb_mw_write(SIZE, offset, val) \
- ntb_bar_write(SIZE, ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
+#define intel_ntb_reg_read(SIZE, offset) \
+ intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
+#define intel_ntb_reg_write(SIZE, offset, val) \
+ intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
+#define intel_ntb_mw_read(SIZE, offset) \
+ intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
+ offset)
+#define intel_ntb_mw_write(SIZE, offset, val) \
+ intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
offset, val)
-static int ntb_probe(device_t device);
-static int ntb_attach(device_t device);
-static int ntb_detach(device_t device);
-static unsigned ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
-static inline enum ntb_bar ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
+static int intel_ntb_probe(device_t device);
+static int intel_ntb_attach(device_t device);
+static int intel_ntb_detach(device_t device);
+static uint64_t intel_ntb_db_valid_mask(device_t dev);
+static void intel_ntb_spad_clear(device_t dev);
+static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector);
+static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed,
+ enum ntb_width *width);
+static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed,
+ enum ntb_width width);
+static int intel_ntb_link_disable(device_t dev);
+static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val);
+static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val);
+
+static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
+static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
uint32_t *base, uint32_t *xlat, uint32_t *lmt);
-static int ntb_map_pci_bars(struct ntb_softc *ntb);
-static int ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
+static int intel_ntb_map_pci_bars(struct ntb_softc *ntb);
+static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
vm_memattr_t);
static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
const char *);
static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
static int map_memory_window_bar(struct ntb_softc *ntb,
struct ntb_pci_bar_info *bar);
-static void ntb_unmap_pci_bar(struct ntb_softc *ntb);
-static int ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
-static int ntb_init_isr(struct ntb_softc *ntb);
-static int ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
-static int ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
-static void ntb_teardown_interrupts(struct ntb_softc *ntb);
-static inline uint64_t ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
-static void ntb_interrupt(struct ntb_softc *, uint32_t vec);
+static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb);
+static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
+static int intel_ntb_init_isr(struct ntb_softc *ntb);
+static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
+static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
+static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb);
+static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
+static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec);
static void ndev_vec_isr(void *arg);
static void ndev_irq_isr(void *arg);
static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
-static int ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
-static void ntb_free_msix_vec(struct ntb_softc *ntb);
-static void ntb_get_msix_info(struct ntb_softc *ntb);
-static void ntb_exchange_msix(void *);
-static struct ntb_hw_info *ntb_get_device_info(uint32_t device_id);
-static void ntb_detect_max_mw(struct ntb_softc *ntb);
-static int ntb_detect_xeon(struct ntb_softc *ntb);
-static int ntb_detect_atom(struct ntb_softc *ntb);
-static int ntb_xeon_init_dev(struct ntb_softc *ntb);
-static int ntb_atom_init_dev(struct ntb_softc *ntb);
-static void ntb_teardown_xeon(struct ntb_softc *ntb);
+static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
+static void intel_ntb_free_msix_vec(struct ntb_softc *ntb);
+static void intel_ntb_get_msix_info(struct ntb_softc *ntb);
+static void intel_ntb_exchange_msix(void *);
+static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id);
+static void intel_ntb_detect_max_mw(struct ntb_softc *ntb);
+static int intel_ntb_detect_xeon(struct ntb_softc *ntb);
+static int intel_ntb_detect_atom(struct ntb_softc *ntb);
+static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb);
+static int intel_ntb_atom_init_dev(struct ntb_softc *ntb);
+static void intel_ntb_teardown_xeon(struct ntb_softc *ntb);
static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
enum ntb_bar regbar);
@@ -351,18 +360,16 @@ static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
enum ntb_bar idx);
static int xeon_setup_b2b_mw(struct ntb_softc *,
const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
-static int xeon_setup_msix_bar(struct ntb_softc *);
static inline bool link_is_up(struct ntb_softc *ntb);
static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
static inline bool atom_link_is_err(struct ntb_softc *ntb);
-static inline enum ntb_speed ntb_link_sta_speed(struct ntb_softc *);
-static inline enum ntb_width ntb_link_sta_width(struct ntb_softc *);
+static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *);
+static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *);
static void atom_link_hb(void *arg);
-static void ntb_db_event(struct ntb_softc *ntb, uint32_t vec);
static void recover_atom_link(void *arg);
-static bool ntb_poll_link(struct ntb_softc *ntb);
+static bool intel_ntb_poll_link(struct ntb_softc *ntb);
static void save_bar_parameters(struct ntb_pci_bar_info *bar);
-static void ntb_sysctl_init(struct ntb_softc *);
+static void intel_ntb_sysctl_init(struct ntb_softc *);
static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
@@ -372,7 +379,7 @@ static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
static unsigned g_ntb_hw_debug_level;
SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
&g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
-#define ntb_printf(lvl, ...) do { \
+#define intel_ntb_printf(lvl, ...) do { \
if ((lvl) <= g_ntb_hw_debug_level) { \
device_printf(ntb->device, __VA_ARGS__); \
} \
@@ -395,7 +402,7 @@ SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
"UC-: " __XSTRING(_NTB_PAT_UCM));
static inline vm_memattr_t
-ntb_pat_flags(void)
+intel_ntb_pat_flags(void)
{
switch (g_ntb_mw_pat) {
@@ -421,7 +428,7 @@ ntb_pat_flags(void)
* anywhere better yet.
*/
static inline const char *
-ntb_vm_memattr_to_str(vm_memattr_t pat)
+intel_ntb_vm_memattr_to_str(vm_memattr_t pat)
{
switch (pat) {
@@ -442,7 +449,7 @@ ntb_vm_memattr_to_str(vm_memattr_t pat)
}
}
-static int g_ntb_msix_idx = 0;
+static int g_ntb_msix_idx = 1;
SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
0, "Use this memory window to access the peer MSIX message complex on "
"certain Xeon-based NTB systems, as a workaround for a hardware errata. "
@@ -457,6 +464,18 @@ SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
"available memory window. Both sides of the NTB MUST set the same "
"value here! (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
+/* Hardware owns the low 16 bits of features. */
+#define NTB_BAR_SIZE_4K (1 << 0)
+#define NTB_SDOORBELL_LOCKUP (1 << 1)
+#define NTB_SB01BASE_LOCKUP (1 << 2)
+#define NTB_B2BDOORBELL_BIT14 (1 << 3)
+/* Software/configuration owns the top 16 bits. */
+#define NTB_SPLIT_BAR (1ull << 16)
+
+#define NTB_FEATURES_STR \
+ "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
+ "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"
+
static struct ntb_hw_info pci_ids[] = {
/* XXX: PS/SS IDs left out until they are supported. */
{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
@@ -597,35 +616,15 @@ SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
*/
MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
-static device_method_t ntb_pci_methods[] = {
- /* Device interface */
- DEVMETHOD(device_probe, ntb_probe),
- DEVMETHOD(device_attach, ntb_attach),
- DEVMETHOD(device_detach, ntb_detach),
- DEVMETHOD_END
-};
-
-static driver_t ntb_pci_driver = {
- "ntb_hw",
- ntb_pci_methods,
- sizeof(struct ntb_softc),
-};
-
-static devclass_t ntb_devclass;
-DRIVER_MODULE(ntb_hw, pci, ntb_pci_driver, ntb_devclass, NULL, NULL);
-MODULE_VERSION(ntb_hw, 1);
-
-SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
-
/*
* OS <-> Driver linkage functions
*/
static int
-ntb_probe(device_t device)
+intel_ntb_probe(device_t device)
{
struct ntb_hw_info *p;
- p = ntb_get_device_info(pci_get_devid(device));
+ p = intel_ntb_get_device_info(pci_get_devid(device));
if (p == NULL)
return (ENXIO);
@@ -634,14 +633,14 @@ ntb_probe(device_t device)
}
static int
-ntb_attach(device_t device)
+intel_ntb_attach(device_t device)
{
struct ntb_softc *ntb;
struct ntb_hw_info *p;
int error;
- ntb = DEVICE2SOFTC(device);
- p = ntb_get_device_info(pci_get_devid(device));
+ ntb = device_get_softc(device);
+ p = intel_ntb_get_device_info(pci_get_devid(device));
ntb->device = device;
ntb->type = p->type;
@@ -654,47 +653,52 @@ ntb_attach(device_t device)
callout_init(&ntb->lr_timer, 1);
callout_init(&ntb->peer_msix_work, 1);
mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
- mtx_init(&ntb->ctx_lock, "ntb ctx", NULL, MTX_DEF);
if (ntb->type == NTB_ATOM)
- error = ntb_detect_atom(ntb);
+ error = intel_ntb_detect_atom(ntb);
else
- error = ntb_detect_xeon(ntb);
+ error = intel_ntb_detect_xeon(ntb);
if (error != 0)
goto out;
- ntb_detect_max_mw(ntb);
+ intel_ntb_detect_max_mw(ntb);
pci_enable_busmaster(ntb->device);
- error = ntb_map_pci_bars(ntb);
+ error = intel_ntb_map_pci_bars(ntb);
if (error != 0)
goto out;
if (ntb->type == NTB_ATOM)
- error = ntb_atom_init_dev(ntb);
+ error = intel_ntb_atom_init_dev(ntb);
else
- error = ntb_xeon_init_dev(ntb);
+ error = intel_ntb_xeon_init_dev(ntb);
if (error != 0)
goto out;
- ntb_spad_clear(ntb);
+ intel_ntb_spad_clear(device);
+
+ intel_ntb_poll_link(ntb);
- ntb_poll_link(ntb);
+ intel_ntb_sysctl_init(ntb);
- ntb_sysctl_init(ntb);
+ /* Attach children to this controller */
+ error = ntb_register_device(device);
out:
if (error != 0)
- ntb_detach(device);
+ intel_ntb_detach(device);
return (error);
}
static int
-ntb_detach(device_t device)
+intel_ntb_detach(device_t device)
{
struct ntb_softc *ntb;
- ntb = DEVICE2SOFTC(device);
+ ntb = device_get_softc(device);
+
+ /* Detach & delete all children */
+ ntb_unregister_device(device);
if (ntb->self_reg != NULL) {
DB_MASK_LOCK(ntb);
@@ -706,13 +710,12 @@ ntb_detach(device_t device)
callout_drain(&ntb->peer_msix_work);
pci_disable_busmaster(ntb->device);
if (ntb->type == NTB_XEON)
- ntb_teardown_xeon(ntb);
- ntb_teardown_interrupts(ntb);
+ intel_ntb_teardown_xeon(ntb);
+ intel_ntb_teardown_interrupts(ntb);
mtx_destroy(&ntb->db_mask_lock);
- mtx_destroy(&ntb->ctx_lock);
- ntb_unmap_pci_bar(ntb);
+ intel_ntb_unmap_pci_bar(ntb);
return (0);
}
@@ -721,7 +724,7 @@ ntb_detach(device_t device)
* Driver internal routines
*/
static inline enum ntb_bar
-ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
+intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
{
KASSERT(mw < ntb->mw_count,
@@ -736,7 +739,7 @@ bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
{
/* XXX This assertion could be stronger. */
KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
- return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(NTB_SPLIT_BAR));
+ return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR));
}
static inline void
@@ -777,7 +780,7 @@ bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
}
static int
-ntb_map_pci_bars(struct ntb_softc *ntb)
+intel_ntb_map_pci_bars(struct ntb_softc *ntb)
{
int rc;
@@ -802,7 +805,7 @@ ntb_map_pci_bars(struct ntb_softc *ntb)
ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
- if (!HAS_FEATURE(NTB_SPLIT_BAR))
+ if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR))
goto out;
ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
@@ -876,7 +879,7 @@ map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
* but the PCI driver does not honor the size in this call, so we have
* to modify it after the fact.
*/
- if (HAS_FEATURE(NTB_BAR_SIZE_4K)) {
+ if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) {
if (bar->pci_resource_id == PCIR_BAR(2))
bar_size_bits = pci_read_config(ntb->device,
XEON_PBAR23SZ_OFFSET, 1);
@@ -903,7 +906,7 @@ map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
* Optionally, mark MW BARs as anything other than UC to improve
* performance.
*/
- mapmode = ntb_pat_flags();
+ mapmode = intel_ntb_pat_flags();
if (mapmode == bar->map_mode)
return (0);
@@ -916,7 +919,7 @@ map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
(char *)bar->vbase + bar->size - 1,
(void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
- ntb_vm_memattr_to_str(mapmode));
+ intel_ntb_vm_memattr_to_str(mapmode));
} else
device_printf(ntb->device,
"Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
@@ -924,13 +927,13 @@ map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
(char *)bar->vbase + bar->size - 1,
(void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
- ntb_vm_memattr_to_str(mapmode), rc);
+ intel_ntb_vm_memattr_to_str(mapmode), rc);
/* Proceed anyway */
return (0);
}
static void
-ntb_unmap_pci_bar(struct ntb_softc *ntb)
+intel_ntb_unmap_pci_bar(struct ntb_softc *ntb)
{
struct ntb_pci_bar_info *current_bar;
int i;
@@ -945,7 +948,7 @@ ntb_unmap_pci_bar(struct ntb_softc *ntb)
}
static int
-ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
+intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
{
uint32_t i;
int rc;
@@ -998,7 +1001,7 @@ SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
* round-robin fashion.
*/
static int
-ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
+intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
{
u_int *vectors;
uint32_t i;
@@ -1018,7 +1021,7 @@ ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
}
static int
-ntb_init_isr(struct ntb_softc *ntb)
+intel_ntb_init_isr(struct ntb_softc *ntb)
{
uint32_t desired_vectors, num_vectors;
int rc;
@@ -1044,7 +1047,7 @@ ntb_init_isr(struct ntb_softc *ntb)
num_vectors--;
if (rc == 0 && num_vectors < desired_vectors) {
- rc = ntb_remap_msix(ntb->device, desired_vectors,
+ rc = intel_ntb_remap_msix(ntb->device, desired_vectors,
num_vectors);
if (rc == 0)
num_vectors = desired_vectors;
@@ -1057,7 +1060,7 @@ ntb_init_isr(struct ntb_softc *ntb)
num_vectors = 1;
if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
device_printf(ntb->device,
"Errata workaround does not support MSI or INTX\n");
return (EINVAL);
@@ -1065,32 +1068,30 @@ ntb_init_isr(struct ntb_softc *ntb)
ntb->db_vec_count = 1;
ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
- rc = ntb_setup_legacy_interrupt(ntb);
+ rc = intel_ntb_setup_legacy_interrupt(ntb);
} else {
if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
- HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+ HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
device_printf(ntb->device,
"Errata workaround expects %d doorbell bits\n",
XEON_NONLINK_DB_MSIX_BITS);
return (EINVAL);
}
- ntb_create_msix_vec(ntb, num_vectors);
- rc = ntb_setup_msix(ntb, num_vectors);
- if (rc == 0 && HAS_FEATURE(NTB_SB01BASE_LOCKUP))
- ntb_get_msix_info(ntb);
+ intel_ntb_create_msix_vec(ntb, num_vectors);
+ rc = intel_ntb_setup_msix(ntb, num_vectors);
}
if (rc != 0) {
device_printf(ntb->device,
"Error allocating interrupts: %d\n", rc);
- ntb_free_msix_vec(ntb);
+ intel_ntb_free_msix_vec(ntb);
}
return (rc);
}
static int
-ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
+intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
{
int rc;
@@ -1117,7 +1118,7 @@ ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
}
static void
-ntb_teardown_interrupts(struct ntb_softc *ntb)
+intel_ntb_teardown_interrupts(struct ntb_softc *ntb)
{
struct ntb_int_info *current_int;
int i;
@@ -1133,7 +1134,7 @@ ntb_teardown_interrupts(struct ntb_softc *ntb)
rman_get_rid(current_int->res), current_int->res);
}
- ntb_free_msix_vec(ntb);
+ intel_ntb_free_msix_vec(ntb);
pci_release_msi(ntb->device);
}
@@ -1146,11 +1147,11 @@ db_ioread(struct ntb_softc *ntb, uint64_t regoff)
{
if (ntb->type == NTB_ATOM)
- return (ntb_reg_read(8, regoff));
+ return (intel_ntb_reg_read(8, regoff));
KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
- return (ntb_reg_read(2, regoff));
+ return (intel_ntb_reg_read(2, regoff));
}
static inline void
@@ -1172,89 +1173,78 @@ db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
{
if (ntb->type == NTB_ATOM) {
- ntb_reg_write(8, regoff, val);
+ intel_ntb_reg_write(8, regoff, val);
return;
}
KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
- ntb_reg_write(2, regoff, (uint16_t)val);
+ intel_ntb_reg_write(2, regoff, (uint16_t)val);
}
-void
-ntb_db_set_mask(struct ntb_softc *ntb, uint64_t bits)
+static void
+intel_ntb_db_set_mask(device_t dev, uint64_t bits)
{
-
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
- return;
+ struct ntb_softc *ntb = device_get_softc(dev);
DB_MASK_LOCK(ntb);
ntb->db_mask |= bits;
- db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+ if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
+ db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
DB_MASK_UNLOCK(ntb);
}
-void
-ntb_db_clear_mask(struct ntb_softc *ntb, uint64_t bits)
+static void
+intel_ntb_db_clear_mask(device_t dev, uint64_t bits)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
+ uint64_t ibits;
+ int i;
KASSERT((bits & ~ntb->db_valid_mask) == 0,
("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
(uintmax_t)(bits & ~ntb->db_valid_mask),
(uintmax_t)ntb->db_valid_mask));
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
- return;
-
DB_MASK_LOCK(ntb);
+ ibits = ntb->fake_db_bell & ntb->db_mask & bits;
ntb->db_mask &= ~bits;
- db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+ /* Simulate fake interrupts if unmasked DB bits are set. */
+ for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
+ if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0)
+ swi_sched(ntb->int_info[i].tag, 0);
+ }
+ } else {
+ db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
+ }
DB_MASK_UNLOCK(ntb);
}
-uint64_t
-ntb_db_read(struct ntb_softc *ntb)
+static uint64_t
+intel_ntb_db_read(device_t dev)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
- uint64_t res;
- unsigned i;
-
- res = 0;
- for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
- if (ntb->msix_vec[i].masked != 0)
- res |= ntb_db_vector_mask(ntb, i);
- }
- return (res);
- }
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
+ return (ntb->fake_db_bell);
return (db_ioread(ntb, ntb->self_reg->db_bell));
}
-void
-ntb_db_clear(struct ntb_softc *ntb, uint64_t bits)
+static void
+intel_ntb_db_clear(device_t dev, uint64_t bits)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
KASSERT((bits & ~ntb->db_valid_mask) == 0,
("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
(uintmax_t)(bits & ~ntb->db_valid_mask),
(uintmax_t)ntb->db_valid_mask));
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
- unsigned i;
-
- for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
- if ((bits & ntb_db_vector_mask(ntb, i)) != 0) {
- DB_MASK_LOCK(ntb);
- if (ntb->msix_vec[i].masked != 0) {
- /* XXX These need a public API. */
-#if 0
- pci_unmask_msix(ntb->device, i);
-#endif
- ntb->msix_vec[i].masked = 0;
- }
- DB_MASK_UNLOCK(ntb);
- }
- }
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+ DB_MASK_LOCK(ntb);
+ ntb->fake_db_bell &= ~bits;
+ DB_MASK_UNLOCK(ntb);
return;
}
@@ -1262,43 +1252,59 @@ ntb_db_clear(struct ntb_softc *ntb, uint64_t bits)
}
static inline uint64_t
-ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
+intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
{
uint64_t shift, mask;
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+ /*
+ * Remap vectors in custom way to make at least first
+ * three doorbells to not generate stray events.
+ * This breaks Linux compatibility (if one existed)
+ * when more then one DB is used (not by if_ntb).
+ */
+ if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1)
+ return (1 << db_vector);
+ if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1)
+ return (0x7ffc);
+ }
+
shift = ntb->db_vec_shift;
mask = (1ull << shift) - 1;
return (mask << (shift * db_vector));
}
static void
-ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
+intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
{
uint64_t vec_mask;
ntb->last_ts = ticks;
- vec_mask = ntb_vec_mask(ntb, vec);
+ vec_mask = intel_ntb_vec_mask(ntb, vec);
if ((vec_mask & ntb->db_link_mask) != 0) {
- if (ntb_poll_link(ntb))
- ntb_link_event(ntb);
+ if (intel_ntb_poll_link(ntb))
+ ntb_link_event(ntb->device);
}
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP) &&
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
(vec_mask & ntb->db_link_mask) == 0) {
DB_MASK_LOCK(ntb);
- if (ntb->msix_vec[vec].masked == 0) {
- /* XXX These need a public API. */
-#if 0
- pci_mask_msix(ntb->device, vec);
-#endif
- ntb->msix_vec[vec].masked = 1;
- }
+
+ /* Do not report same DB events again if not cleared yet. */
+ vec_mask &= ~ntb->fake_db_bell;
+
+ /* Update our internal doorbell register. */
+ ntb->fake_db_bell |= vec_mask;
+
+ /* Do not report masked DB events. */
+ vec_mask &= ~ntb->db_mask;
+
DB_MASK_UNLOCK(ntb);
}
if ((vec_mask & ntb->db_valid_mask) != 0)
- ntb_db_event(ntb, vec);
+ ntb_db_event(ntb->device, vec);
}
static void
@@ -1306,18 +1312,18 @@ ndev_vec_isr(void *arg)
{
struct ntb_vec *nvec = arg;
- ntb_interrupt(nvec->ntb, nvec->num);
+ intel_ntb_interrupt(nvec->ntb, nvec->num);
}
static void
ndev_irq_isr(void *arg)
{
/* If we couldn't set up MSI-X, we only have the one vector. */
- ntb_interrupt(arg, 0);
+ intel_ntb_interrupt(arg, 0);
}
static int
-ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
+intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
{
uint32_t i;
@@ -1332,7 +1338,7 @@ ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
}
static void
-ntb_free_msix_vec(struct ntb_softc *ntb)
+intel_ntb_free_msix_vec(struct ntb_softc *ntb)
{
if (ntb->msix_vec == NULL)
@@ -1343,7 +1349,7 @@ ntb_free_msix_vec(struct ntb_softc *ntb)
}
static void
-ntb_get_msix_info(struct ntb_softc *ntb)
+intel_ntb_get_msix_info(struct ntb_softc *ntb)
{
struct pci_devinfo *dinfo;
struct pcicfg_msix *msix;
@@ -1352,8 +1358,6 @@ ntb_get_msix_info(struct ntb_softc *ntb)
dinfo = device_get_ivars(ntb->device);
msix = &dinfo->cfg.msix;
- laddr = data = 0;
-
CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));
for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
@@ -1361,7 +1365,7 @@ ntb_get_msix_info(struct ntb_softc *ntb)
laddr = bus_read_4(msix->msix_table_res, offset +
PCI_MSIX_ENTRY_LOWER_ADDR);
- ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);
+ intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);
KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
@@ -1370,14 +1374,14 @@ ntb_get_msix_info(struct ntb_softc *ntb)
data = bus_read_4(msix->msix_table_res, offset +
PCI_MSIX_ENTRY_DATA);
- ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
+ intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
ntb->msix_data[i].nmd_data = data;
}
}
static struct ntb_hw_info *
-ntb_get_device_info(uint32_t device_id)
+intel_ntb_get_device_info(uint32_t device_id)
{
struct ntb_hw_info *ep = pci_ids;
@@ -1390,15 +1394,15 @@ ntb_get_device_info(uint32_t device_id)
}
static void
-ntb_teardown_xeon(struct ntb_softc *ntb)
+intel_ntb_teardown_xeon(struct ntb_softc *ntb)
{
if (ntb->reg != NULL)
- ntb_link_disable(ntb);
+ intel_ntb_link_disable(ntb->device);
}
static void
-ntb_detect_max_mw(struct ntb_softc *ntb)
+intel_ntb_detect_max_mw(struct ntb_softc *ntb)
{
if (ntb->type == NTB_ATOM) {
@@ -1406,14 +1410,14 @@ ntb_detect_max_mw(struct ntb_softc *ntb)
return;
}
- if (HAS_FEATURE(NTB_SPLIT_BAR))
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
else
ntb->mw_count = XEON_SNB_MW_COUNT;
}
static int
-ntb_detect_xeon(struct ntb_softc *ntb)
+intel_ntb_detect_xeon(struct ntb_softc *ntb)
{
uint8_t ppd, conn_type;
@@ -1428,11 +1432,21 @@ ntb_detect_xeon(struct ntb_softc *ntb)
if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
ntb->features |= NTB_SPLIT_BAR;
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
+ !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
+ device_printf(ntb->device,
+ "Can not apply SB01BASE_LOCKUP workaround "
+ "with split BARs disabled!\n");
+ device_printf(ntb->device,
+ "Expect system hangs under heavy NTB traffic!\n");
+ ntb->features &= ~NTB_SB01BASE_LOCKUP;
+ }
+
/*
* SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
* errata workaround; only do one at a time.
*/
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP))
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
ntb->features &= ~NTB_SDOORBELL_LOCKUP;
conn_type = ppd & XEON_PPD_CONN_TYPE;
@@ -1451,7 +1465,7 @@ ntb_detect_xeon(struct ntb_softc *ntb)
}
static int
-ntb_detect_atom(struct ntb_softc *ntb)
+intel_ntb_detect_atom(struct ntb_softc *ntb)
{
uint32_t ppd, conn_type;
@@ -1476,7 +1490,7 @@ ntb_detect_atom(struct ntb_softc *ntb)
}
static int
-ntb_xeon_init_dev(struct ntb_softc *ntb)
+intel_ntb_xeon_init_dev(struct ntb_softc *ntb)
{
int rc;
@@ -1497,15 +1511,16 @@ ntb_xeon_init_dev(struct ntb_softc *ntb)
ntb->peer_reg = &xeon_b2b_reg;
ntb->xlat_reg = &xeon_sec_xlat;
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+ ntb->fake_db_bell = 0;
ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
ntb->mw_count;
- ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
+ intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
g_ntb_msix_idx, ntb->msix_mw_idx);
- rc = ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
+ rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
VM_MEMATTR_UNCACHEABLE);
KASSERT(rc == 0, ("shouldn't fail"));
- } else if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
+ } else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
/*
* There is a Xeon hardware errata related to writes to SDOORBELL or
* B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
@@ -1515,12 +1530,12 @@ ntb_xeon_init_dev(struct ntb_softc *ntb)
*/
ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
ntb->mw_count;
- ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
+ intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
g_ntb_mw_idx, ntb->b2b_mw_idx);
- rc = ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
+ rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
VM_MEMATTR_UNCACHEABLE);
KASSERT(rc == 0, ("shouldn't fail"));
- } else if (HAS_FEATURE(NTB_B2BDOORBELL_BIT14))
+ } else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14))
/*
* HW Errata on bit 14 of b2bdoorbell register. Writes will not be
* mirrored to the remote system. Shrink the number of bits by one,
@@ -1543,7 +1558,7 @@ ntb_xeon_init_dev(struct ntb_softc *ntb)
return (rc);
/* Enable Bus Master and Memory Space on the secondary side */
- ntb_reg_write(2, XEON_SPCICMD_OFFSET,
+ intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET,
PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
/*
@@ -1554,16 +1569,12 @@ ntb_xeon_init_dev(struct ntb_softc *ntb)
db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
DB_MASK_UNLOCK(ntb);
- rc = xeon_setup_msix_bar(ntb);
- if (rc != 0)
- return (rc);
-
- rc = ntb_init_isr(ntb);
+ rc = intel_ntb_init_isr(ntb);
return (rc);
}
static int
-ntb_atom_init_dev(struct ntb_softc *ntb)
+intel_ntb_atom_init_dev(struct ntb_softc *ntb)
{
int error;
@@ -1590,15 +1601,15 @@ ntb_atom_init_dev(struct ntb_softc *ntb)
configure_atom_secondary_side_bars(ntb);
/* Enable Bus Master and Memory Space on the secondary side */
- ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
+ intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
- error = ntb_init_isr(ntb);
+ error = intel_ntb_init_isr(ntb);
if (error != 0)
return (error);
/* Initiate PCI-E link training */
- ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+ intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
@@ -1611,19 +1622,19 @@ configure_atom_secondary_side_bars(struct ntb_softc *ntb)
{
if (ntb->dev_type == NTB_DEV_USD) {
- ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
+ intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
XEON_B2B_BAR2_ADDR64);
- ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
+ intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
XEON_B2B_BAR4_ADDR64);
- ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
- ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
+ intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
+ intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
} else {
- ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
+ intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
XEON_B2B_BAR2_ADDR64);
- ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
+ intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
XEON_B2B_BAR4_ADDR64);
- ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
- ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
+ intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
+ intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
}
}
@@ -1649,7 +1660,7 @@ xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
struct ntb_pci_bar_info *bar;
uint8_t bar_sz;
- if (!HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
+ if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
return;
bar = &ntb->bar_info[idx];
@@ -1673,28 +1684,28 @@ xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
uint32_t base_reg, lmt_reg;
bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
- if (idx == regbar)
- bar_addr += ntb->b2b_off;
+ if (idx == regbar) {
+ if (ntb->b2b_off)
+ bar_addr += ntb->b2b_off;
+ else
+ bar_addr = 0;
+ }
- /*
- * Set limit registers first to avoid an errata where setting the base
- * registers locks the limit registers.
- */
if (!bar_is_64bit(ntb, idx)) {
- ntb_reg_write(4, lmt_reg, bar_addr);
- reg_val = ntb_reg_read(4, lmt_reg);
+ intel_ntb_reg_write(4, base_reg, bar_addr);
+ reg_val = intel_ntb_reg_read(4, base_reg);
(void)reg_val;
- ntb_reg_write(4, base_reg, bar_addr);
- reg_val = ntb_reg_read(4, base_reg);
+ intel_ntb_reg_write(4, lmt_reg, bar_addr);
+ reg_val = intel_ntb_reg_read(4, lmt_reg);
(void)reg_val;
} else {
- ntb_reg_write(8, lmt_reg, bar_addr);
- reg_val = ntb_reg_read(8, lmt_reg);
+ intel_ntb_reg_write(8, base_reg, bar_addr);
+ reg_val = intel_ntb_reg_read(8, base_reg);
(void)reg_val;
- ntb_reg_write(8, base_reg, bar_addr);
- reg_val = ntb_reg_read(8, base_reg);
+ intel_ntb_reg_write(8, lmt_reg, bar_addr);
+ reg_val = intel_ntb_reg_read(8, lmt_reg);
(void)reg_val;
}
}
@@ -1705,30 +1716,17 @@ xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
struct ntb_pci_bar_info *bar;
bar = &ntb->bar_info[idx];
- if (HAS_FEATURE(NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
- ntb_reg_write(4, bar->pbarxlat_off, base_addr);
- base_addr = ntb_reg_read(4, bar->pbarxlat_off);
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
+ intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr);
+ base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off);
} else {
- ntb_reg_write(8, bar->pbarxlat_off, base_addr);
- base_addr = ntb_reg_read(8, bar->pbarxlat_off);
+ intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr);
+ base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off);
}
(void)base_addr;
}
static int
-xeon_setup_msix_bar(struct ntb_softc *ntb)
-{
- enum ntb_bar bar_num;
-
- if (!HAS_FEATURE(NTB_SB01BASE_LOCKUP))
- return (0);
-
- bar_num = ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
- ntb->peer_lapic_bar = &ntb->bar_info[bar_num];
- return (0);
-}
-
-static int
xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
const struct ntb_b2b_addr *peer_addr)
{
@@ -1742,7 +1740,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
b2b_bar_num = NTB_CONFIG_BAR;
ntb->b2b_off = 0;
} else {
- b2b_bar_num = ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
+ b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
("invalid b2b mw bar"));
@@ -1773,7 +1771,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
bar_addr = addr->bar0_addr;
else if (b2b_bar_num == NTB_B2B_BAR_1)
bar_addr = addr->bar2_addr64;
- else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
+ else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
bar_addr = addr->bar4_addr64;
else if (b2b_bar_num == NTB_B2B_BAR_2)
bar_addr = addr->bar4_addr32;
@@ -1782,7 +1780,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
else
KASSERT(false, ("invalid bar"));
- ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
+ intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
/*
* Other SBARs are normally hit by the PBAR xlat, except for the b2b
@@ -1793,7 +1791,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
*/
xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
b2b_bar_num);
- if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
NTB_B2B_BAR_2, b2b_bar_num);
xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
@@ -1803,56 +1801,41 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
NTB_B2B_BAR_2, b2b_bar_num);
/* Zero incoming translation addrs */
- ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
- ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
-
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
- size_t size, xlatoffset;
+ intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
+ intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
- switch (ntb_mw_to_bar(ntb, ntb->msix_mw_idx)) {
- case NTB_B2B_BAR_1:
- size = 8;
- xlatoffset = XEON_SBAR2XLAT_OFFSET;
- break;
- case NTB_B2B_BAR_2:
- xlatoffset = XEON_SBAR4XLAT_OFFSET;
- if (HAS_FEATURE(NTB_SPLIT_BAR))
- size = 4;
- else
- size = 8;
- break;
- case NTB_B2B_BAR_3:
- xlatoffset = XEON_SBAR5XLAT_OFFSET;
- size = 4;
- break;
- default:
- KASSERT(false, ("Bogus msix mw idx: %u",
- ntb->msix_mw_idx));
- return (EINVAL);
- }
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
+ uint32_t xlat_reg, lmt_reg;
+ enum ntb_bar bar_num;
/*
* We point the chosen MSIX MW BAR xlat to remote LAPIC for
* workaround
*/
- if (size == 4) {
- ntb_reg_write(4, xlatoffset, MSI_INTEL_ADDR_BASE);
- ntb->msix_xlat = ntb_reg_read(4, xlatoffset);
+ bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
+ bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg);
+ if (bar_is_64bit(ntb, bar_num)) {
+ intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE);
+ ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg);
+ intel_ntb_reg_write(8, lmt_reg, 0);
} else {
- ntb_reg_write(8, xlatoffset, MSI_INTEL_ADDR_BASE);
- ntb->msix_xlat = ntb_reg_read(8, xlatoffset);
+ intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE);
+ ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg);
+ intel_ntb_reg_write(4, lmt_reg, 0);
}
+
+ ntb->peer_lapic_bar = &ntb->bar_info[bar_num];
}
- (void)ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
- (void)ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
+ (void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
+ (void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
/* Zero outgoing translation limits (whole bar size windows) */
- ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
- ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
+ intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
+ intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
/* Set outgoing translation offsets */
xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
- if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
} else
@@ -1864,7 +1847,7 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
bar_addr = peer_addr->bar0_addr;
else if (b2b_bar_num == NTB_B2B_BAR_1)
bar_addr = peer_addr->bar2_addr64;
- else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(NTB_SPLIT_BAR))
+ else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
bar_addr = peer_addr->bar4_addr64;
else if (b2b_bar_num == NTB_B2B_BAR_2)
bar_addr = peer_addr->bar4_addr32;
@@ -1877,8 +1860,8 @@ xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
* B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
* at a time.
*/
- ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
- ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
+ intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
+ intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
return (0);
}
@@ -1897,7 +1880,7 @@ link_is_up(struct ntb_softc *ntb)
if (ntb->type == NTB_XEON)
return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
- !HAS_FEATURE(NTB_SB01BASE_LOCKUP)));
+ !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)));
KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
@@ -1910,11 +1893,11 @@ atom_link_is_err(struct ntb_softc *ntb)
KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
- status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
+ status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
return (true);
- status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
+ status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
}
@@ -1937,8 +1920,8 @@ atom_link_hb(void *arg)
goto out;
}
- if (ntb_poll_link(ntb))
- ntb_link_event(ntb);
+ if (intel_ntb_poll_link(ntb))
+ ntb_link_event(ntb->device);
if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
/* Link is down with error, proceed with recovery */
@@ -1956,166 +1939,47 @@ atom_perform_link_restart(struct ntb_softc *ntb)
uint32_t status;
/* Driver resets the NTB ModPhy lanes - magic! */
- ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
- ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
- ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
- ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
+ intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
+ intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
+ intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
+ intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
/* Driver waits 100ms to allow the NTB ModPhy to settle */
pause("ModPhy", hz / 10);
/* Clear AER Errors, write to clear */
- status = ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
+ status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
status &= PCIM_AER_COR_REPLAY_ROLLOVER;
- ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
+ intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
/* Clear unexpected electrical idle event in LTSSM, write to clear */
- status = ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
+ status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
- ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
+ intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
/* Clear DeSkew Buffer error, write to clear */
- status = ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
+ status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
status |= ATOM_DESKEWSTS_DBERR;
- ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
+ intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
- status = ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
+ status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
status &= ATOM_IBIST_ERR_OFLOW;
- ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
+ intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
/* Releases the NTB state machine to allow the link to retrain */
- status = ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
+ status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
- ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
-}
-
-/*
- * ntb_set_ctx() - associate a driver context with an ntb device
- * @ntb: NTB device context
- * @ctx: Driver context
- * @ctx_ops: Driver context operations
- *
- * Associate a driver context and operations with a ntb device. The context is
- * provided by the client driver, and the driver may associate a different
- * context with each ntb device.
- *
- * Return: Zero if the context is associated, otherwise an error number.
- */
-int
-ntb_set_ctx(struct ntb_softc *ntb, void *ctx, const struct ntb_ctx_ops *ops)
-{
-
- if (ctx == NULL || ops == NULL)
- return (EINVAL);
- if (ntb->ctx_ops != NULL)
- return (EINVAL);
-
- CTX_LOCK(ntb);
- if (ntb->ctx_ops != NULL) {
- CTX_UNLOCK(ntb);
- return (EINVAL);
- }
- ntb->ntb_ctx = ctx;
- ntb->ctx_ops = ops;
- CTX_UNLOCK(ntb);
-
- return (0);
-}
-
-/*
- * It is expected that this will only be used from contexts where the ctx_lock
- * is not needed to protect ntb_ctx lifetime.
- */
-void *
-ntb_get_ctx(struct ntb_softc *ntb, const struct ntb_ctx_ops **ops)
-{
-
- KASSERT(ntb->ntb_ctx != NULL && ntb->ctx_ops != NULL, ("bogus"));
- if (ops != NULL)
- *ops = ntb->ctx_ops;
- return (ntb->ntb_ctx);
-}
-
-/*
- * ntb_clear_ctx() - disassociate any driver context from an ntb device
- * @ntb: NTB device context
- *
- * Clear any association that may exist between a driver context and the ntb
- * device.
- */
-void
-ntb_clear_ctx(struct ntb_softc *ntb)
-{
-
- CTX_LOCK(ntb);
- ntb->ntb_ctx = NULL;
- ntb->ctx_ops = NULL;
- CTX_UNLOCK(ntb);
-}
-
-/*
- * ntb_link_event() - notify driver context of a change in link status
- * @ntb: NTB device context
- *
- * Notify the driver context that the link status may have changed. The driver
- * should call ntb_link_is_up() to get the current status.
- */
-void
-ntb_link_event(struct ntb_softc *ntb)
-{
-
- CTX_LOCK(ntb);
- if (ntb->ctx_ops != NULL && ntb->ctx_ops->link_event != NULL)
- ntb->ctx_ops->link_event(ntb->ntb_ctx);
- CTX_UNLOCK(ntb);
+ intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
}
-/*
- * ntb_db_event() - notify driver context of a doorbell event
- * @ntb: NTB device context
- * @vector: Interrupt vector number
- *
- * Notify the driver context of a doorbell event. If hardware supports
- * multiple interrupt vectors for doorbells, the vector number indicates which
- * vector received the interrupt. The vector number is relative to the first
- * vector used for doorbells, starting at zero, and must be less than
- * ntb_db_vector_count(). The driver may call ntb_db_read() to check which
- * doorbell bits need service, and ntb_db_vector_mask() to determine which of
- * those bits are associated with the vector number.
- */
-static void
-ntb_db_event(struct ntb_softc *ntb, uint32_t vec)
-{
-
- CTX_LOCK(ntb);
- if (ntb->ctx_ops != NULL && ntb->ctx_ops->db_event != NULL)
- ntb->ctx_ops->db_event(ntb->ntb_ctx, vec);
- CTX_UNLOCK(ntb);
-}
-
-/*
- * ntb_link_enable() - enable the link on the secondary side of the ntb
- * @ntb: NTB device context
- * @max_speed: The maximum link speed expressed as PCIe generation number[0]
- * @max_width: The maximum link width expressed as the number of PCIe lanes[0]
- *
- * Enable the link on the secondary side of the ntb. This can only be done
- * from the primary side of the ntb in primary or b2b topology. The ntb device
- * should train the link to its maximum speed and width, or the requested speed
- * and width, whichever is smaller, if supported.
- *
- * Return: Zero on success, otherwise an error number.
- *
- * [0]: Only NTB_SPEED_AUTO and NTB_WIDTH_AUTO are valid inputs; other speed
- * and width input will be ignored.
- */
-int
-ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused,
- enum ntb_width w __unused)
+static int
+intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused,
+ enum ntb_width width __unused)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
uint32_t cntl;
- ntb_printf(2, "%s\n", __func__);
+ intel_ntb_printf(2, "%s\n", __func__);
if (ntb->type == NTB_ATOM) {
pci_write_config(ntb->device, NTB_PPD_OFFSET,
@@ -2124,57 +1988,47 @@ ntb_link_enable(struct ntb_softc *ntb, enum ntb_speed s __unused,
}
if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
- ntb_link_event(ntb);
+ ntb_link_event(dev);
return (0);
}
- cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
+ cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
- if (HAS_FEATURE(NTB_SPLIT_BAR))
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
- ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
+ intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
return (0);
}
-/*
- * ntb_link_disable() - disable the link on the secondary side of the ntb
- * @ntb: NTB device context
- *
- * Disable the link on the secondary side of the ntb. This can only be done
- * from the primary side of the ntb in primary or b2b topology. The ntb device
- * should disable the link. Returning from this call must indicate that a
- * barrier has passed, though with no more writes may pass in either direction
- * across the link, except if this call returns an error number.
- *
- * Return: Zero on success, otherwise an error number.
- */
-int
-ntb_link_disable(struct ntb_softc *ntb)
+static int
+intel_ntb_link_disable(device_t dev)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
uint32_t cntl;
- ntb_printf(2, "%s\n", __func__);
+ intel_ntb_printf(2, "%s\n", __func__);
if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
- ntb_link_event(ntb);
+ ntb_link_event(dev);
return (0);
}
- cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
+ cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
- if (HAS_FEATURE(NTB_SPLIT_BAR))
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
- ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
+ intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
return (0);
}
-bool
-ntb_link_enabled(struct ntb_softc *ntb)
+static bool
+intel_ntb_link_enabled(device_t dev)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
uint32_t cntl;
if (ntb->type == NTB_ATOM) {
@@ -2185,7 +2039,7 @@ ntb_link_enabled(struct ntb_softc *ntb)
if (ntb->conn_type == NTB_CONN_TRANSPARENT)
return (true);
- cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
+ cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
}
@@ -2210,11 +2064,11 @@ recover_atom_link(void *arg)
if (atom_link_is_err(ntb))
goto retry;
- status32 = ntb_reg_read(4, ntb->reg->ntb_ctl);
+ status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
goto out;
- status32 = ntb_reg_read(4, ntb->reg->lnk_sta);
+ status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
width = NTB_LNK_STA_WIDTH(status32);
speed = status32 & NTB_LINK_SPEED_MASK;
@@ -2237,18 +2091,18 @@ retry:
* Polls the HW link status register(s); returns true if something has changed.
*/
static bool
-ntb_poll_link(struct ntb_softc *ntb)
+intel_ntb_poll_link(struct ntb_softc *ntb)
{
uint32_t ntb_cntl;
uint16_t reg_val;
if (ntb->type == NTB_ATOM) {
- ntb_cntl = ntb_reg_read(4, ntb->reg->ntb_ctl);
+ ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
if (ntb_cntl == ntb->ntb_ctl)
return (false);
ntb->ntb_ctl = ntb_cntl;
- ntb->lnk_sta = ntb_reg_read(4, ntb->reg->lnk_sta);
+ ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
} else {
db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
@@ -2258,11 +2112,11 @@ ntb_poll_link(struct ntb_softc *ntb)
ntb->lnk_sta = reg_val;
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
if (_xeon_link_is_up(ntb)) {
if (!ntb->peer_msix_good) {
callout_reset(&ntb->peer_msix_work, 0,
- ntb_exchange_msix, ntb);
+ intel_ntb_exchange_msix, ntb);
return (false);
}
} else {
@@ -2275,7 +2129,7 @@ ntb_poll_link(struct ntb_softc *ntb)
}
static inline enum ntb_speed
-ntb_link_sta_speed(struct ntb_softc *ntb)
+intel_ntb_link_sta_speed(struct ntb_softc *ntb)
{
if (!link_is_up(ntb))
@@ -2284,7 +2138,7 @@ ntb_link_sta_speed(struct ntb_softc *ntb)
}
static inline enum ntb_width
-ntb_link_sta_width(struct ntb_softc *ntb)
+intel_ntb_link_sta_width(struct ntb_softc *ntb)
{
if (!link_is_up(ntb))
@@ -2306,7 +2160,7 @@ SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0,
#define NTB_REGFLAGS_MASK (NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
static void
-ntb_sysctl_init(struct ntb_softc *ntb)
+intel_ntb_sysctl_init(struct ntb_softc *ntb)
{
struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
struct sysctl_ctx_list *ctx;
@@ -2405,7 +2259,7 @@ ntb_sysctl_init(struct ntb_softc *ntb)
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
sysctl_handle_register, "QU", "Incoming XLAT23 register");
- if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
@@ -2425,7 +2279,7 @@ ntb_sysctl_init(struct ntb_softc *ntb)
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_64 | ntb->xlat_reg->bar2_limit,
sysctl_handle_register, "QU", "Incoming LMT23 register");
- if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_32 | ntb->xlat_reg->bar4_limit,
@@ -2516,7 +2370,7 @@ ntb_sysctl_init(struct ntb_softc *ntb)
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
sysctl_handle_register, "QU", "Outgoing XLAT23 register");
- if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
@@ -2536,7 +2390,7 @@ ntb_sysctl_init(struct ntb_softc *ntb)
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
sysctl_handle_register, "QU", "Outgoing LMT23 register");
- if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
@@ -2560,7 +2414,7 @@ ntb_sysctl_init(struct ntb_softc *ntb)
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_64 | ntb->xlat_reg->bar2_base,
sysctl_handle_register, "QU", "Secondary BAR23 base register");
- if (HAS_FEATURE(NTB_SPLIT_BAR)) {
+ if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
NTB_REG_32 | ntb->xlat_reg->bar4_base,
@@ -2583,13 +2437,10 @@ ntb_sysctl_init(struct ntb_softc *ntb)
static int
sysctl_handle_features(SYSCTL_HANDLER_ARGS)
{
- struct ntb_softc *ntb;
+ struct ntb_softc *ntb = arg1;
struct sbuf sb;
int error;
- error = 0;
- ntb = arg1;
-
sbuf_new_for_sysctl(&sb, NULL, 256, req);
sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
@@ -2604,14 +2455,11 @@ sysctl_handle_features(SYSCTL_HANDLER_ARGS)
static int
sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
{
- struct ntb_softc *ntb;
+ struct ntb_softc *ntb = arg1;
unsigned old, new;
int error;
- error = 0;
- ntb = arg1;
-
- old = ntb_link_enabled(ntb);
+ old = intel_ntb_link_enabled(ntb->device);
error = SYSCTL_OUT(req, &old, sizeof(old));
if (error != 0 || req->newptr == NULL)
@@ -2621,31 +2469,28 @@ sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
if (error != 0)
return (error);
- ntb_printf(0, "Admin set interface state to '%sabled'\n",
+ intel_ntb_printf(0, "Admin set interface state to '%sabled'\n",
(new != 0)? "en" : "dis");
if (new != 0)
- error = ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+ error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
else
- error = ntb_link_disable(ntb);
+ error = intel_ntb_link_disable(ntb->device);
return (error);
}
static int
sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
{
- struct ntb_softc *ntb;
+ struct ntb_softc *ntb = arg1;
struct sbuf sb;
enum ntb_speed speed;
enum ntb_width width;
int error;
- error = 0;
- ntb = arg1;
-
sbuf_new_for_sysctl(&sb, NULL, 32, req);
- if (ntb_link_is_up(ntb, &speed, &width))
+ if (intel_ntb_link_is_up(ntb->device, &speed, &width))
sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
(unsigned)speed, (unsigned)width);
else
@@ -2662,14 +2507,11 @@ sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
static int
sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
{
- struct ntb_softc *ntb;
+ struct ntb_softc *ntb = arg1;
unsigned res;
int error;
- error = 0;
- ntb = arg1;
-
- res = ntb_link_is_up(ntb, NULL, NULL);
+ res = intel_ntb_link_is_up(ntb->device, NULL, NULL);
error = SYSCTL_OUT(req, &res, sizeof(res));
if (error || !req->newptr)
@@ -2708,28 +2550,28 @@ sysctl_handle_register(SYSCTL_HANDLER_ARGS)
if (pci)
umv = pci_read_config(ntb->device, reg, 8);
else
- umv = ntb_reg_read(8, reg);
+ umv = intel_ntb_reg_read(8, reg);
outsz = sizeof(uint64_t);
break;
case NTB_REG_32:
if (pci)
umv = pci_read_config(ntb->device, reg, 4);
else
- umv = ntb_reg_read(4, reg);
+ umv = intel_ntb_reg_read(4, reg);
outsz = sizeof(uint32_t);
break;
case NTB_REG_16:
if (pci)
umv = pci_read_config(ntb->device, reg, 2);
else
- umv = ntb_reg_read(2, reg);
+ umv = intel_ntb_reg_read(2, reg);
outsz = sizeof(uint16_t);
break;
case NTB_REG_8:
if (pci)
umv = pci_read_config(ntb->device, reg, 1);
else
- umv = ntb_reg_read(1, reg);
+ umv = intel_ntb_reg_read(1, reg);
outsz = sizeof(uint8_t);
break;
default:
@@ -2749,7 +2591,7 @@ sysctl_handle_register(SYSCTL_HANDLER_ARGS)
}
static unsigned
-ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
+intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
{
if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
@@ -2763,8 +2605,21 @@ ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
return (uidx);
}
+#ifndef EARLY_AP_STARTUP
+static int msix_ready;
+
+static void
+intel_ntb_msix_ready(void *arg __unused)
+{
+
+ msix_ready = 1;
+}
+SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY,
+ intel_ntb_msix_ready, NULL);
+#endif
+
static void
-ntb_exchange_msix(void *ctx)
+intel_ntb_exchange_msix(void *ctx)
{
struct ntb_softc *ntb;
uint32_t val;
@@ -2777,42 +2632,50 @@ ntb_exchange_msix(void *ctx)
if (ntb->peer_msix_done)
goto msix_done;
+#ifndef EARLY_AP_STARTUP
+ /* Block MSIX negotiation until SMP started and IRQ reshuffled. */
+ if (!msix_ready)
+ goto reschedule;
+#endif
+
+ intel_ntb_get_msix_info(ntb);
for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
- ntb_peer_spad_write(ntb, NTB_MSIX_DATA0 + i,
+ intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i,
ntb->msix_data[i].nmd_data);
- ntb_peer_spad_write(ntb, NTB_MSIX_OFS0 + i,
+ intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i,
ntb->msix_data[i].nmd_ofs - ntb->msix_xlat);
}
- ntb_peer_spad_write(ntb, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
+ intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
- ntb_spad_read(ntb, NTB_MSIX_GUARD, &val);
+ intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val);
if (val != NTB_MSIX_VER_GUARD)
goto reschedule;
for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
- ntb_spad_read(ntb, NTB_MSIX_DATA0 + i, &val);
- ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
+ intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val);
+ intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
ntb->peer_msix_data[i].nmd_data = val;
- ntb_spad_read(ntb, NTB_MSIX_OFS0 + i, &val);
- ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
+ intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val);
+ intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
ntb->peer_msix_data[i].nmd_ofs = val;
}
ntb->peer_msix_done = true;
msix_done:
- ntb_peer_spad_write(ntb, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
- ntb_spad_read(ntb, NTB_MSIX_DONE, &val);
+ intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
+ intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val);
if (val != NTB_MSIX_RECEIVED)
goto reschedule;
+ intel_ntb_spad_clear(ntb->device);
ntb->peer_msix_good = true;
/* Give peer time to see our NTB_MSIX_RECEIVED. */
goto reschedule;
msix_good:
- ntb_poll_link(ntb);
- ntb_link_event(ntb);
+ intel_ntb_poll_link(ntb);
+ ntb_link_event(ntb->device);
return;
reschedule:
@@ -2820,40 +2683,27 @@ reschedule:
if (_xeon_link_is_up(ntb)) {
callout_reset(&ntb->peer_msix_work,
hz * (ntb->peer_msix_good ? 2 : 1) / 100,
- ntb_exchange_msix, ntb);
+ intel_ntb_exchange_msix, ntb);
} else
- ntb_spad_clear(ntb);
+ intel_ntb_spad_clear(ntb->device);
}
/*
* Public API to the rest of the OS
*/
-/**
- * ntb_get_max_spads() - get the total scratch regs usable
- * @ntb: pointer to ntb_softc instance
- *
- * This function returns the max 32bit scratchpad registers usable by the
- * upper layer.
- *
- * RETURNS: total number of scratch pad registers available
- */
-uint8_t
-ntb_get_max_spads(struct ntb_softc *ntb)
+static uint8_t
+intel_ntb_spad_count(device_t dev)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
return (ntb->spad_count);
}
-/*
- * ntb_mw_count() - Get the number of memory windows available for KPI
- * consumers.
- *
- * (Excludes any MW wholly reserved for register access.)
- */
-uint8_t
-ntb_mw_count(struct ntb_softc *ntb)
+static uint8_t
+intel_ntb_mw_count(device_t dev)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
uint8_t res;
res = ntb->mw_count;
@@ -2864,25 +2714,15 @@ ntb_mw_count(struct ntb_softc *ntb)
return (res);
}
-/**
- * ntb_spad_write() - write to the secondary scratchpad register
- * @ntb: pointer to ntb_softc instance
- * @idx: index to the scratchpad register, 0 based
- * @val: the data value to put into the register
- *
- * This function allows writing of a 32bit value to the indexed scratchpad
- * register. The register resides on the secondary (external) side.
- *
- * RETURNS: An appropriate ERRNO error value on error, or zero for success.
- */
-int
-ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
+static int
+intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
if (idx >= ntb->spad_count)
return (EINVAL);
- ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
+ intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
return (0);
}
@@ -2890,122 +2730,77 @@ ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
/*
* Zeros the local scratchpad.
*/
-void
-ntb_spad_clear(struct ntb_softc *ntb)
+static void
+intel_ntb_spad_clear(device_t dev)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
unsigned i;
for (i = 0; i < ntb->spad_count; i++)
- ntb_spad_write(ntb, i, 0);
+ intel_ntb_spad_write(dev, i, 0);
}
-/**
- * ntb_spad_read() - read from the primary scratchpad register
- * @ntb: pointer to ntb_softc instance
- * @idx: index to scratchpad register, 0 based
- * @val: pointer to 32bit integer for storing the register value
- *
- * This function allows reading of the 32bit scratchpad register on
- * the primary (internal) side.
- *
- * RETURNS: An appropriate ERRNO error value on error, or zero for success.
- */
-int
-ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
+static int
+intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
if (idx >= ntb->spad_count)
return (EINVAL);
- *val = ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
+ *val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
return (0);
}
-/**
- * ntb_peer_spad_write() - write to the secondary scratchpad register
- * @ntb: pointer to ntb_softc instance
- * @idx: index to the scratchpad register, 0 based
- * @val: the data value to put into the register
- *
- * This function allows writing of a 32bit value to the indexed scratchpad
- * register. The register resides on the secondary (external) side.
- *
- * RETURNS: An appropriate ERRNO error value on error, or zero for success.
- */
-int
-ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val)
+static int
+intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
if (idx >= ntb->spad_count)
return (EINVAL);
- if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
- ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
+ if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
+ intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
else
- ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
+ intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
return (0);
}
-/**
- * ntb_peer_spad_read() - read from the primary scratchpad register
- * @ntb: pointer to ntb_softc instance
- * @idx: index to scratchpad register, 0 based
- * @val: pointer to 32bit integer for storing the register value
- *
- * This function allows reading of the 32bit scratchpad register on
- * the primary (internal) side.
- *
- * RETURNS: An appropriate ERRNO error value on error, or zero for success.
- */
-int
-ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val)
+static int
+intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
if (idx >= ntb->spad_count)
return (EINVAL);
- if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP))
- *val = ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
+ if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
+ *val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
else
- *val = ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
+ *val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
return (0);
}
-/*
- * ntb_mw_get_range() - get the range of a memory window
- * @ntb: NTB device context
- * @idx: Memory window number
- * @base: OUT - the base address for mapping the memory window
- * @size: OUT - the size for mapping the memory window
- * @align: OUT - the base alignment for translating the memory window
- * @align_size: OUT - the size alignment for translating the memory window
- *
- * Get the range of a memory window. NULL may be given for any output
- * parameter if the value is not needed. The base and size may be used for
- * mapping the memory window, to access the peer memory. The alignment and
- * size may be used for translating the memory window, for the peer to access
- * memory on the local system.
- *
- * Return: Zero on success, otherwise an error number.
- */
-int
-ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base,
+static int
+intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base,
caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
bus_addr_t *plimit)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
struct ntb_pci_bar_info *bar;
bus_addr_t limit;
size_t bar_b2b_off;
enum ntb_bar bar_num;
- if (mw_idx >= ntb_mw_count(ntb))
+ if (mw_idx >= intel_ntb_mw_count(dev))
return (EINVAL);
- mw_idx = ntb_user_mw_to_idx(ntb, mw_idx);
+ mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx);
- bar_num = ntb_mw_to_bar(ntb, mw_idx);
+ bar_num = intel_ntb_mw_to_bar(ntb, mw_idx);
bar = &ntb->bar_info[bar_num];
bar_b2b_off = 0;
if (mw_idx == ntb->b2b_mw_idx) {
@@ -3034,37 +2829,21 @@ ntb_mw_get_range(struct ntb_softc *ntb, unsigned mw_idx, vm_paddr_t *base,
return (0);
}
-/*
- * ntb_mw_set_trans() - set the translation of a memory window
- * @ntb: NTB device context
- * @idx: Memory window number
- * @addr: The dma address local memory to expose to the peer
- * @size: The size of the local memory to expose to the peer
- *
- * Set the translation of a memory window. The peer may access local memory
- * through the window starting at the address, up to the size. The address
- * must be aligned to the alignment specified by ntb_mw_get_range(). The size
- * must be aligned to the size alignment specified by ntb_mw_get_range(). The
- * address must be below the plimit specified by ntb_mw_get_range() (i.e. for
- * 32-bit BARs).
- *
- * Return: Zero on success, otherwise an error number.
- */
-int
-ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
- size_t size)
+static int
+intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
struct ntb_pci_bar_info *bar;
uint64_t base, limit, reg_val;
size_t bar_size, mw_size;
uint32_t base_reg, xlat_reg, limit_reg;
enum ntb_bar bar_num;
- if (idx >= ntb_mw_count(ntb))
+ if (idx >= intel_ntb_mw_count(dev))
return (EINVAL);
- idx = ntb_user_mw_to_idx(ntb, idx);
+ idx = intel_ntb_user_mw_to_idx(ntb, idx);
- bar_num = ntb_mw_to_bar(ntb, idx);
+ bar_num = intel_ntb_mw_to_bar(ntb, idx);
bar = &ntb->bar_info[bar_num];
bar_size = bar->size;
@@ -3084,25 +2863,25 @@ ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
limit = 0;
if (bar_is_64bit(ntb, bar_num)) {
- base = ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
+ base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
if (limit_reg != 0 && size != mw_size)
limit = base + size;
/* Set and verify translation address */
- ntb_reg_write(8, xlat_reg, addr);
- reg_val = ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
+ intel_ntb_reg_write(8, xlat_reg, addr);
+ reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
if (reg_val != addr) {
- ntb_reg_write(8, xlat_reg, 0);
+ intel_ntb_reg_write(8, xlat_reg, 0);
return (EIO);
}
/* Set and verify the limit */
- ntb_reg_write(8, limit_reg, limit);
- reg_val = ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
+ intel_ntb_reg_write(8, limit_reg, limit);
+ reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
if (reg_val != limit) {
- ntb_reg_write(8, limit_reg, base);
- ntb_reg_write(8, xlat_reg, 0);
+ intel_ntb_reg_write(8, limit_reg, base);
+ intel_ntb_reg_write(8, xlat_reg, 0);
return (EIO);
}
} else {
@@ -3113,98 +2892,72 @@ ntb_mw_set_trans(struct ntb_softc *ntb, unsigned idx, bus_addr_t addr,
if (((addr + size) & UINT32_MAX) != (addr + size))
return (ERANGE);
- base = ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
+ base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
if (limit_reg != 0 && size != mw_size)
limit = base + size;
/* Set and verify translation address */
- ntb_reg_write(4, xlat_reg, addr);
- reg_val = ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
+ intel_ntb_reg_write(4, xlat_reg, addr);
+ reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
if (reg_val != addr) {
- ntb_reg_write(4, xlat_reg, 0);
+ intel_ntb_reg_write(4, xlat_reg, 0);
return (EIO);
}
/* Set and verify the limit */
- ntb_reg_write(4, limit_reg, limit);
- reg_val = ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
+ intel_ntb_reg_write(4, limit_reg, limit);
+ reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
if (reg_val != limit) {
- ntb_reg_write(4, limit_reg, base);
- ntb_reg_write(4, xlat_reg, 0);
+ intel_ntb_reg_write(4, limit_reg, base);
+ intel_ntb_reg_write(4, xlat_reg, 0);
return (EIO);
}
}
return (0);
}
-/*
- * ntb_mw_clear_trans() - clear the translation of a memory window
- * @ntb: NTB device context
- * @idx: Memory window number
- *
- * Clear the translation of a memory window. The peer may no longer access
- * local memory through the window.
- *
- * Return: Zero on success, otherwise an error number.
- */
-int
-ntb_mw_clear_trans(struct ntb_softc *ntb, unsigned mw_idx)
+static int
+intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx)
{
- return (ntb_mw_set_trans(ntb, mw_idx, 0, 0));
+ return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0));
}
-/*
- * ntb_mw_get_wc - Get the write-combine status of a memory window
- *
- * Returns: Zero on success, setting *wc; otherwise an error number (e.g. if
- * idx is an invalid memory window).
- *
- * Mode is a VM_MEMATTR_* type.
- */
-int
-ntb_mw_get_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t *mode)
+static int
+intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
struct ntb_pci_bar_info *bar;
- if (idx >= ntb_mw_count(ntb))
+ if (idx >= intel_ntb_mw_count(dev))
return (EINVAL);
- idx = ntb_user_mw_to_idx(ntb, idx);
+ idx = intel_ntb_user_mw_to_idx(ntb, idx);
- bar = &ntb->bar_info[ntb_mw_to_bar(ntb, idx)];
+ bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
*mode = bar->map_mode;
return (0);
}
-/*
- * ntb_mw_set_wc - Set the write-combine status of a memory window
- *
- * If 'mode' matches the current status, this does nothing and succeeds. Mode
- * is a VM_MEMATTR_* type.
- *
- * Returns: Zero on success, setting the caching attribute on the virtual
- * mapping of the BAR; otherwise an error number (e.g. if idx is an invalid
- * memory window, or if changing the caching attribute fails).
- */
-int
-ntb_mw_set_wc(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
+static int
+intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
- if (idx >= ntb_mw_count(ntb))
+ if (idx >= intel_ntb_mw_count(dev))
return (EINVAL);
- idx = ntb_user_mw_to_idx(ntb, idx);
- return (ntb_mw_set_wc_internal(ntb, idx, mode));
+ idx = intel_ntb_user_mw_to_idx(ntb, idx);
+ return (intel_ntb_mw_set_wc_internal(ntb, idx, mode));
}
static int
-ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
+intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
{
struct ntb_pci_bar_info *bar;
int rc;
- bar = &ntb->bar_info[ntb_mw_to_bar(ntb, idx)];
+ bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
if (bar->map_mode == mode)
return (0);
@@ -3215,26 +2968,19 @@ ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
return (rc);
}
-/**
- * ntb_peer_db_set() - Set the doorbell on the secondary/external side
- * @ntb: pointer to ntb_softc instance
- * @bit: doorbell bits to ring
- *
- * This function allows triggering of a doorbell on the secondary/external
- * side that will initiate an interrupt on the remote host
- */
-void
-ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
+static void
+intel_ntb_peer_db_set(device_t dev, uint64_t bit)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
- if (HAS_FEATURE(NTB_SB01BASE_LOCKUP)) {
+ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
struct ntb_pci_bar_info *lapic;
unsigned i;
lapic = ntb->peer_lapic_bar;
for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
- if ((bit & ntb_db_vector_mask(ntb, i)) != 0)
+ if ((bit & intel_ntb_db_vector_mask(dev, i)) != 0)
bus_space_write_4(lapic->pci_bus_tag,
lapic->pci_bus_handle,
ntb->peer_msix_data[i].nmd_ofs,
@@ -3243,99 +2989,76 @@ ntb_peer_db_set(struct ntb_softc *ntb, uint64_t bit)
return;
}
- if (HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
- ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
+ if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
+ intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
return;
}
db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
}
-/*
- * ntb_get_peer_db_addr() - Return the address of the remote doorbell register,
- * as well as the size of the register (via *sz_out).
- *
- * This function allows a caller using I/OAT DMA to chain the remote doorbell
- * ring to its memory window write.
- *
- * Note that writing the peer doorbell via a memory window will *not* generate
- * an interrupt on the remote host; that must be done separately.
- */
-bus_addr_t
-ntb_get_peer_db_addr(struct ntb_softc *ntb, vm_size_t *sz_out)
+static int
+intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
struct ntb_pci_bar_info *bar;
uint64_t regoff;
- KASSERT(sz_out != NULL, ("must be non-NULL"));
+ KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL"));
- if (!HAS_FEATURE(NTB_SDOORBELL_LOCKUP)) {
+ if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
bar = &ntb->bar_info[NTB_CONFIG_BAR];
regoff = ntb->peer_reg->db_bell;
} else {
KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
("invalid b2b idx"));
- bar = &ntb->bar_info[ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
+ bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
regoff = XEON_PDOORBELL_OFFSET;
}
KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
- *sz_out = ntb->reg->db_size;
/* HACK: Specific to current x86 bus implementation. */
- return ((uint64_t)bar->pci_bus_handle + regoff);
+ *db_addr = ((uint64_t)bar->pci_bus_handle + regoff);
+ *db_size = ntb->reg->db_size;
+ return (0);
}
-/*
- * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
- * @ntb: NTB device context
- *
- * Hardware may support different number or arrangement of doorbell bits.
- *
- * Return: A mask of doorbell bits supported by the ntb.
- */
-uint64_t
-ntb_db_valid_mask(struct ntb_softc *ntb)
+static uint64_t
+intel_ntb_db_valid_mask(device_t dev)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
return (ntb->db_valid_mask);
}
-/*
- * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector
- * @ntb: NTB device context
- * @vector: Doorbell vector number
- *
- * Each interrupt vector may have a different number or arrangement of bits.
- *
- * Return: A mask of doorbell bits serviced by a vector.
- */
-uint64_t
-ntb_db_vector_mask(struct ntb_softc *ntb, uint32_t vector)
+static int
+intel_ntb_db_vector_count(device_t dev)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
+
+ return (ntb->db_vec_count);
+}
+
+static uint64_t
+intel_ntb_db_vector_mask(device_t dev, uint32_t vector)
+{
+ struct ntb_softc *ntb = device_get_softc(dev);
if (vector > ntb->db_vec_count)
return (0);
- return (ntb->db_valid_mask & ntb_vec_mask(ntb, vector));
+ return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector));
}
-/**
- * ntb_link_is_up() - get the current ntb link state
- * @ntb: NTB device context
- * @speed: OUT - The link speed expressed as PCIe generation number
- * @width: OUT - The link width expressed as the number of PCIe lanes
- *
- * RETURNS: true or false based on the hardware link state
- */
-bool
-ntb_link_is_up(struct ntb_softc *ntb, enum ntb_speed *speed,
- enum ntb_width *width)
+static bool
+intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width)
{
+ struct ntb_softc *ntb = device_get_softc(dev);
if (speed != NULL)
- *speed = ntb_link_sta_speed(ntb);
+ *speed = intel_ntb_link_sta_speed(ntb);
if (width != NULL)
- *width = ntb_link_sta_width(ntb);
+ *width = intel_ntb_link_sta_width(ntb);
return (link_is_up(ntb));
}
@@ -3350,17 +3073,42 @@ save_bar_parameters(struct ntb_pci_bar_info *bar)
bar->vbase = rman_get_virtual(bar->pci_resource);
}
-device_t
-ntb_get_device(struct ntb_softc *ntb)
-{
-
- return (ntb->device);
-}
-
-/* Export HW-specific errata information. */
-bool
-ntb_has_feature(struct ntb_softc *ntb, uint32_t feature)
-{
+static device_method_t ntb_intel_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, intel_ntb_probe),
+ DEVMETHOD(device_attach, intel_ntb_attach),
+ DEVMETHOD(device_detach, intel_ntb_detach),
+ /* NTB interface */
+ DEVMETHOD(ntb_link_is_up, intel_ntb_link_is_up),
+ DEVMETHOD(ntb_link_enable, intel_ntb_link_enable),
+ DEVMETHOD(ntb_link_disable, intel_ntb_link_disable),
+ DEVMETHOD(ntb_link_enabled, intel_ntb_link_enabled),
+ DEVMETHOD(ntb_mw_count, intel_ntb_mw_count),
+ DEVMETHOD(ntb_mw_get_range, intel_ntb_mw_get_range),
+ DEVMETHOD(ntb_mw_set_trans, intel_ntb_mw_set_trans),
+ DEVMETHOD(ntb_mw_clear_trans, intel_ntb_mw_clear_trans),
+ DEVMETHOD(ntb_mw_get_wc, intel_ntb_mw_get_wc),
+ DEVMETHOD(ntb_mw_set_wc, intel_ntb_mw_set_wc),
+ DEVMETHOD(ntb_spad_count, intel_ntb_spad_count),
+ DEVMETHOD(ntb_spad_clear, intel_ntb_spad_clear),
+ DEVMETHOD(ntb_spad_write, intel_ntb_spad_write),
+ DEVMETHOD(ntb_spad_read, intel_ntb_spad_read),
+ DEVMETHOD(ntb_peer_spad_write, intel_ntb_peer_spad_write),
+ DEVMETHOD(ntb_peer_spad_read, intel_ntb_peer_spad_read),
+ DEVMETHOD(ntb_db_valid_mask, intel_ntb_db_valid_mask),
+ DEVMETHOD(ntb_db_vector_count, intel_ntb_db_vector_count),
+ DEVMETHOD(ntb_db_vector_mask, intel_ntb_db_vector_mask),
+ DEVMETHOD(ntb_db_clear, intel_ntb_db_clear),
+ DEVMETHOD(ntb_db_clear_mask, intel_ntb_db_clear_mask),
+ DEVMETHOD(ntb_db_read, intel_ntb_db_read),
+ DEVMETHOD(ntb_db_set_mask, intel_ntb_db_set_mask),
+ DEVMETHOD(ntb_peer_db_addr, intel_ntb_peer_db_addr),
+ DEVMETHOD(ntb_peer_db_set, intel_ntb_peer_db_set),
+ DEVMETHOD_END
+};
- return (HAS_FEATURE(feature));
-}
+static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods,
+ sizeof(struct ntb_softc));
+DRIVER_MODULE(ntb_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL);
+MODULE_DEPEND(ntb_intel, ntb, 1, 1, 1);
+MODULE_VERSION(ntb_intel, 1);
diff --git a/sys/dev/ntb/ntb_hw/ntb_hw.h b/sys/dev/ntb/ntb_hw/ntb_hw.h
deleted file mode 100644
index f05acda..0000000
--- a/sys/dev/ntb/ntb_hw/ntb_hw.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*-
- * Copyright (C) 2013 Intel Corporation
- * Copyright (C) 2015 EMC Corporation
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _NTB_HW_H_
-#define _NTB_HW_H_
-
-struct ntb_softc;
-
-#define NTB_MAX_NUM_MW 3
-
-enum ntb_speed {
- NTB_SPEED_AUTO = -1,
- NTB_SPEED_NONE = 0,
- NTB_SPEED_GEN1 = 1,
- NTB_SPEED_GEN2 = 2,
- NTB_SPEED_GEN3 = 3,
-};
-
-enum ntb_width {
- NTB_WIDTH_AUTO = -1,
- NTB_WIDTH_NONE = 0,
- NTB_WIDTH_1 = 1,
- NTB_WIDTH_2 = 2,
- NTB_WIDTH_4 = 4,
- NTB_WIDTH_8 = 8,
- NTB_WIDTH_12 = 12,
- NTB_WIDTH_16 = 16,
- NTB_WIDTH_32 = 32,
-};
-
-SYSCTL_DECL(_hw_ntb);
-
-typedef void (*ntb_db_callback)(void *data, uint32_t vector);
-typedef void (*ntb_event_callback)(void *data);
-
-struct ntb_ctx_ops {
- ntb_event_callback link_event;
- ntb_db_callback db_event;
-};
-
-device_t ntb_get_device(struct ntb_softc *);
-
-bool ntb_link_is_up(struct ntb_softc *, enum ntb_speed *, enum ntb_width *);
-void ntb_link_event(struct ntb_softc *);
-int ntb_link_enable(struct ntb_softc *, enum ntb_speed, enum ntb_width);
-int ntb_link_disable(struct ntb_softc *);
-bool ntb_link_enabled(struct ntb_softc *);
-
-int ntb_set_ctx(struct ntb_softc *, void *, const struct ntb_ctx_ops *);
-void *ntb_get_ctx(struct ntb_softc *, const struct ntb_ctx_ops **);
-void ntb_clear_ctx(struct ntb_softc *);
-
-uint8_t ntb_mw_count(struct ntb_softc *);
-int ntb_mw_get_range(struct ntb_softc *, unsigned mw_idx, vm_paddr_t *base,
- caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
- bus_addr_t *plimit);
-int ntb_mw_set_trans(struct ntb_softc *, unsigned mw_idx, bus_addr_t, size_t);
-int ntb_mw_clear_trans(struct ntb_softc *, unsigned mw_idx);
-
-int ntb_mw_get_wc(struct ntb_softc *, unsigned mw_idx, vm_memattr_t *mode);
-int ntb_mw_set_wc(struct ntb_softc *, unsigned mw_idx, vm_memattr_t mode);
-
-uint8_t ntb_get_max_spads(struct ntb_softc *ntb);
-void ntb_spad_clear(struct ntb_softc *ntb);
-int ntb_spad_write(struct ntb_softc *ntb, unsigned int idx, uint32_t val);
-int ntb_spad_read(struct ntb_softc *ntb, unsigned int idx, uint32_t *val);
-int ntb_peer_spad_write(struct ntb_softc *ntb, unsigned int idx,
- uint32_t val);
-int ntb_peer_spad_read(struct ntb_softc *ntb, unsigned int idx,
- uint32_t *val);
-
-uint64_t ntb_db_valid_mask(struct ntb_softc *);
-uint64_t ntb_db_vector_mask(struct ntb_softc *, uint32_t vector);
-bus_addr_t ntb_get_peer_db_addr(struct ntb_softc *, vm_size_t *sz_out);
-
-void ntb_db_clear(struct ntb_softc *, uint64_t bits);
-void ntb_db_clear_mask(struct ntb_softc *, uint64_t bits);
-uint64_t ntb_db_read(struct ntb_softc *);
-void ntb_db_set_mask(struct ntb_softc *, uint64_t bits);
-void ntb_peer_db_set(struct ntb_softc *, uint64_t bits);
-
-#define XEON_SPAD_COUNT 16
-#define ATOM_SPAD_COUNT 16
-
-/* Hardware owns the low 16 bits of features. */
-#define NTB_BAR_SIZE_4K (1 << 0)
-#define NTB_SDOORBELL_LOCKUP (1 << 1)
-#define NTB_SB01BASE_LOCKUP (1 << 2)
-#define NTB_B2BDOORBELL_BIT14 (1 << 3)
-/* Software/configuration owns the top 16 bits. */
-#define NTB_SPLIT_BAR (1ull << 16)
-
-#define NTB_FEATURES_STR \
- "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
- "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"
-
-bool ntb_has_feature(struct ntb_softc *, uint32_t);
-
-#endif /* _NTB_HW_H_ */
diff --git a/sys/dev/ntb/ntb_hw/ntb_regs.h b/sys/dev/ntb/ntb_hw/ntb_regs.h
index fb445d7..a037736 100644
--- a/sys/dev/ntb/ntb_hw/ntb_regs.h
+++ b/sys/dev/ntb/ntb_hw/ntb_regs.h
@@ -1,4 +1,5 @@
/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
* Copyright (C) 2013 Intel Corporation
* Copyright (C) 2015 EMC Corporation
* All rights reserved.
@@ -76,6 +77,7 @@
#define XEON_SDBMSK_OFFSET 0x0066
#define XEON_USMEMMISS_OFFSET 0x0070
#define XEON_SPAD_OFFSET 0x0080
+#define XEON_SPAD_COUNT 16
#define XEON_SPADSEMA4_OFFSET 0x00c0
#define XEON_WCCNTRL_OFFSET 0x00e0
#define XEON_UNCERRSTS_OFFSET 0x014c
@@ -104,6 +106,7 @@
#define ATOM_NTBCNTL_OFFSET 0x0060
#define ATOM_EBDF_OFFSET 0x0064
#define ATOM_SPAD_OFFSET 0x0080
+#define ATOM_SPAD_COUNT 16
#define ATOM_SPADSEMA_OFFSET 0x00c0
#define ATOM_STKYSPAD_OFFSET 0x00c4
#define ATOM_PBAR2XLAT_OFFSET 0x8008
diff --git a/sys/dev/ntb/ntb_if.m b/sys/dev/ntb/ntb_if.m
new file mode 100644
index 0000000..d8ca227
--- /dev/null
+++ b/sys/dev/ntb/ntb_if.m
@@ -0,0 +1,210 @@
+#-
+# Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD$
+#
+
+#include <sys/bus.h>
+#include <machine/bus.h>
+
+INTERFACE ntb;
+
+HEADER {
+ enum ntb_speed {
+ NTB_SPEED_AUTO = -1,
+ NTB_SPEED_NONE = 0,
+ NTB_SPEED_GEN1 = 1,
+ NTB_SPEED_GEN2 = 2,
+ NTB_SPEED_GEN3 = 3,
+ };
+
+ enum ntb_width {
+ NTB_WIDTH_AUTO = -1,
+ NTB_WIDTH_NONE = 0,
+ NTB_WIDTH_1 = 1,
+ NTB_WIDTH_2 = 2,
+ NTB_WIDTH_4 = 4,
+ NTB_WIDTH_8 = 8,
+ NTB_WIDTH_12 = 12,
+ NTB_WIDTH_16 = 16,
+ NTB_WIDTH_32 = 32,
+ };
+
+ typedef void (*ntb_db_callback)(void *data, uint32_t vector);
+ typedef void (*ntb_event_callback)(void *data);
+ struct ntb_ctx_ops {
+ ntb_event_callback link_event;
+ ntb_db_callback db_event;
+ };
+};
+
+METHOD bool link_is_up {
+ device_t ntb;
+ enum ntb_speed *speed;
+ enum ntb_width *width;
+};
+
+METHOD int link_enable {
+ device_t ntb;
+ enum ntb_speed speed;
+ enum ntb_width width;
+};
+
+METHOD int link_disable {
+ device_t ntb;
+};
+
+METHOD bool link_enabled {
+ device_t ntb;
+};
+
+METHOD int set_ctx {
+ device_t ntb;
+ void *ctx;
+ const struct ntb_ctx_ops *ctx_ops;
+};
+
+METHOD void * get_ctx {
+ device_t ntb;
+ const struct ntb_ctx_ops **ctx_ops;
+};
+
+METHOD void clear_ctx {
+ device_t ntb;
+};
+
+METHOD uint8_t mw_count {
+ device_t ntb;
+};
+
+METHOD int mw_get_range {
+ device_t ntb;
+ unsigned mw_idx;
+ vm_paddr_t *base;
+ caddr_t *vbase;
+ size_t *size;
+ size_t *align;
+ size_t *align_size;
+ bus_addr_t *plimit;
+};
+
+METHOD int mw_set_trans {
+ device_t ntb;
+ unsigned mw_idx;
+ bus_addr_t addr;
+ size_t size;
+};
+
+METHOD int mw_clear_trans {
+ device_t ntb;
+ unsigned mw_idx;
+};
+
+METHOD int mw_get_wc {
+ device_t ntb;
+ unsigned mw_idx;
+ vm_memattr_t *mode;
+};
+
+METHOD int mw_set_wc {
+ device_t ntb;
+ unsigned mw_idx;
+ vm_memattr_t mode;
+};
+
+METHOD uint8_t spad_count {
+ device_t ntb;
+};
+
+METHOD void spad_clear {
+ device_t ntb;
+};
+
+METHOD int spad_write {
+ device_t ntb;
+ unsigned int idx;
+ uint32_t val;
+};
+
+METHOD int spad_read {
+ device_t ntb;
+ unsigned int idx;
+ uint32_t *val;
+};
+
+METHOD int peer_spad_write {
+ device_t ntb;
+ unsigned int idx;
+ uint32_t val;
+};
+
+METHOD int peer_spad_read {
+ device_t ntb;
+ unsigned int idx;
+ uint32_t *val;
+};
+
+METHOD uint64_t db_valid_mask {
+ device_t ntb;
+};
+
+METHOD int db_vector_count {
+ device_t ntb;
+};
+
+METHOD uint64_t db_vector_mask {
+ device_t ntb;
+ uint32_t vector;
+};
+
+METHOD int peer_db_addr {
+ device_t ntb;
+ bus_addr_t *db_addr;
+ vm_size_t *db_size;
+};
+
+METHOD void db_clear {
+ device_t ntb;
+ uint64_t bits;
+};
+
+METHOD void db_clear_mask {
+ device_t ntb;
+ uint64_t bits;
+};
+
+METHOD uint64_t db_read {
+ device_t ntb;
+};
+
+METHOD void db_set_mask {
+ device_t ntb;
+ uint64_t bits;
+};
+
+METHOD void peer_db_set {
+ device_t ntb;
+ uint64_t bits;
+};
diff --git a/sys/dev/ntb/ntb_transport.c b/sys/dev/ntb/ntb_transport.c
new file mode 100644
index 0000000..c7bc4da
--- /dev/null
+++ b/sys/dev/ntb/ntb_transport.c
@@ -0,0 +1,1518 @@
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
+ * Copyright (C) 2013 Intel Corporation
+ * Copyright (C) 2015 EMC Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * The Non-Transparent Bridge (NTB) is a device that allows you to connect
+ * two or more systems using a PCI-e links, providing remote memory access.
+ *
+ * This module contains a transport for sending and receiving messages by
+ * writing to remote memory window(s) provided by underlying NTB device.
+ *
+ * NOTE: Much of the code in this module is shared with Linux. Any patches may
+ * be picked up and redistributed in Linux with a dual GPL/BSD license.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/ktr.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/bus.h>
+
+#include "ntb.h"
+#include "ntb_transport.h"
+
+#define KTR_NTB KTR_SPARE3
+
+#define NTB_TRANSPORT_VERSION 4
+
+static SYSCTL_NODE(_hw, OID_AUTO, ntb_transport, CTLFLAG_RW, 0, "ntb_transport");
+
+static unsigned g_ntb_transport_debug_level;
+SYSCTL_UINT(_hw_ntb_transport, OID_AUTO, debug_level, CTLFLAG_RWTUN,
+ &g_ntb_transport_debug_level, 0,
+ "ntb_transport log level -- higher is more verbose");
+#define ntb_printf(lvl, ...) do { \
+ if ((lvl) <= g_ntb_transport_debug_level) { \
+ printf(__VA_ARGS__); \
+ } \
+} while (0)
+
+static unsigned transport_mtu = 0x10000;
+
+static uint64_t max_mw_size;
+SYSCTL_UQUAD(_hw_ntb_transport, OID_AUTO, max_mw_size, CTLFLAG_RDTUN, &max_mw_size, 0,
+ "If enabled (non-zero), limit the size of large memory windows. "
+ "Both sides of the NTB MUST set the same value here.");
+
+static unsigned enable_xeon_watchdog;
+SYSCTL_UINT(_hw_ntb_transport, OID_AUTO, enable_xeon_watchdog, CTLFLAG_RDTUN,
+ &enable_xeon_watchdog, 0, "If non-zero, write a register every second to "
+ "keep a watchdog from tearing down the NTB link");
+
+STAILQ_HEAD(ntb_queue_list, ntb_queue_entry);
+
+typedef uint32_t ntb_q_idx_t;
+
+struct ntb_queue_entry {
+ /* ntb_queue list reference */
+ STAILQ_ENTRY(ntb_queue_entry) entry;
+
+ /* info on data to be transferred */
+ void *cb_data;
+ void *buf;
+ uint32_t len;
+ uint32_t flags;
+
+ struct ntb_transport_qp *qp;
+ struct ntb_payload_header *x_hdr;
+ ntb_q_idx_t index;
+};
+
+struct ntb_rx_info {
+ ntb_q_idx_t entry;
+};
+
+struct ntb_transport_qp {
+ struct ntb_transport_ctx *transport;
+ device_t dev;
+
+ void *cb_data;
+
+ bool client_ready;
+ volatile bool link_is_up;
+ uint8_t qp_num; /* Only 64 QPs are allowed. 0-63 */
+
+ struct ntb_rx_info *rx_info;
+ struct ntb_rx_info *remote_rx_info;
+
+ void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+ void *data, int len);
+ struct ntb_queue_list tx_free_q;
+ struct mtx ntb_tx_free_q_lock;
+ caddr_t tx_mw;
+ bus_addr_t tx_mw_phys;
+ ntb_q_idx_t tx_index;
+ ntb_q_idx_t tx_max_entry;
+ uint64_t tx_max_frame;
+
+ void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+ void *data, int len);
+ struct ntb_queue_list rx_post_q;
+ struct ntb_queue_list rx_pend_q;
+ /* ntb_rx_q_lock: synchronize access to rx_XXXX_q */
+ struct mtx ntb_rx_q_lock;
+ struct task rxc_db_work;
+ struct taskqueue *rxc_tq;
+ caddr_t rx_buff;
+ ntb_q_idx_t rx_index;
+ ntb_q_idx_t rx_max_entry;
+ uint64_t rx_max_frame;
+
+ void (*event_handler)(void *data, enum ntb_link_event status);
+ struct callout link_work;
+ struct callout rx_full;
+
+ uint64_t last_rx_no_buf;
+
+ /* Stats */
+ uint64_t rx_bytes;
+ uint64_t rx_pkts;
+ uint64_t rx_ring_empty;
+ uint64_t rx_err_no_buf;
+ uint64_t rx_err_oflow;
+ uint64_t rx_err_ver;
+ uint64_t tx_bytes;
+ uint64_t tx_pkts;
+ uint64_t tx_ring_full;
+ uint64_t tx_err_no_buf;
+
+ struct mtx tx_lock;
+};
+
+struct ntb_transport_mw {
+ vm_paddr_t phys_addr;
+ size_t phys_size;
+ size_t xlat_align;
+ size_t xlat_align_size;
+ bus_addr_t addr_limit;
+ /* Tx buff is off vbase / phys_addr */
+ caddr_t vbase;
+ size_t xlat_size;
+ size_t buff_size;
+ /* Rx buff is off virt_addr / dma_addr */
+ caddr_t virt_addr;
+ bus_addr_t dma_addr;
+};
+
+struct ntb_transport_child {
+ device_t dev;
+ int qpoff;
+ int qpcnt;
+ struct ntb_transport_child *next;
+};
+
+struct ntb_transport_ctx {
+ device_t dev;
+ struct ntb_transport_child *child;
+ struct ntb_transport_mw *mw_vec;
+ struct ntb_transport_qp *qp_vec;
+ unsigned mw_count;
+ unsigned qp_count;
+ uint64_t qp_bitmap;
+ volatile bool link_is_up;
+ struct callout link_work;
+ struct callout link_watchdog;
+ struct task link_cleanup;
+};
+
+enum {
+ NTBT_DESC_DONE_FLAG = 1 << 0,
+ NTBT_LINK_DOWN_FLAG = 1 << 1,
+};
+
+struct ntb_payload_header {
+ ntb_q_idx_t ver;
+ uint32_t len;
+ uint32_t flags;
+};
+
+enum {
+ /*
+ * The order of this enum is part of the remote protocol. Do not
+ * reorder without bumping protocol version (and it's probably best
+ * to keep the protocol in lock-step with the Linux NTB driver.
+ */
+ NTBT_VERSION = 0,
+ NTBT_QP_LINKS,
+ NTBT_NUM_QPS,
+ NTBT_NUM_MWS,
+ /*
+ * N.B.: transport_link_work assumes MW1 enums = MW0 + 2.
+ */
+ NTBT_MW0_SZ_HIGH,
+ NTBT_MW0_SZ_LOW,
+ NTBT_MW1_SZ_HIGH,
+ NTBT_MW1_SZ_LOW,
+
+ /*
+ * Some NTB-using hardware have a watchdog to work around NTB hangs; if
+ * a register or doorbell isn't written every few seconds, the link is
+ * torn down. Write an otherwise unused register every few seconds to
+ * work around this watchdog.
+ */
+ NTBT_WATCHDOG_SPAD = 15
+};
+
+#define QP_TO_MW(nt, qp) ((qp) % nt->mw_count)
+#define NTB_QP_DEF_NUM_ENTRIES 100
+#define NTB_LINK_DOWN_TIMEOUT 10
+
+static int ntb_transport_probe(device_t dev);
+static int ntb_transport_attach(device_t dev);
+static int ntb_transport_detach(device_t dev);
+static void ntb_transport_init_queue(struct ntb_transport_ctx *nt,
+ unsigned int qp_num);
+static int ntb_process_tx(struct ntb_transport_qp *qp,
+ struct ntb_queue_entry *entry);
+static void ntb_transport_rxc_db(void *arg, int pending);
+static int ntb_process_rxc(struct ntb_transport_qp *qp);
+static void ntb_memcpy_rx(struct ntb_transport_qp *qp,
+ struct ntb_queue_entry *entry, void *offset);
+static inline void ntb_rx_copy_callback(struct ntb_transport_qp *qp,
+ void *data);
+static void ntb_complete_rxc(struct ntb_transport_qp *qp);
+static void ntb_transport_doorbell_callback(void *data, uint32_t vector);
+static void ntb_transport_event_callback(void *data);
+static void ntb_transport_link_work(void *arg);
+static int ntb_set_mw(struct ntb_transport_ctx *, int num_mw, size_t size);
+static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw);
+static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
+ unsigned int qp_num);
+static void ntb_qp_link_work(void *arg);
+static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt);
+static void ntb_transport_link_cleanup_work(void *, int);
+static void ntb_qp_link_down(struct ntb_transport_qp *qp);
+static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp);
+static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp);
+static void ntb_send_link_down(struct ntb_transport_qp *qp);
+static void ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
+ struct ntb_queue_list *list);
+static struct ntb_queue_entry *ntb_list_rm(struct mtx *lock,
+ struct ntb_queue_list *list);
+static struct ntb_queue_entry *ntb_list_mv(struct mtx *lock,
+ struct ntb_queue_list *from, struct ntb_queue_list *to);
+static void xeon_link_watchdog_hb(void *);
+
+static const struct ntb_ctx_ops ntb_transport_ops = {
+ .link_event = ntb_transport_event_callback,
+ .db_event = ntb_transport_doorbell_callback,
+};
+
+MALLOC_DEFINE(M_NTB_T, "ntb_transport", "ntb transport driver");
+
+static inline void
+iowrite32(uint32_t val, void *addr)
+{
+
+ bus_space_write_4(X86_BUS_SPACE_MEM, 0/* HACK */, (uintptr_t)addr,
+ val);
+}
+
+/* Transport Init and teardown */
+
+static void
+xeon_link_watchdog_hb(void *arg)
+{
+ struct ntb_transport_ctx *nt;
+
+ nt = arg;
+ ntb_spad_write(nt->dev, NTBT_WATCHDOG_SPAD, 0);
+ callout_reset(&nt->link_watchdog, 1 * hz, xeon_link_watchdog_hb, nt);
+}
+
+static int
+ntb_transport_probe(device_t dev)
+{
+
+ device_set_desc(dev, "NTB Transport");
+ return (0);
+}
+
+static int
+ntb_transport_attach(device_t dev)
+{
+ struct ntb_transport_ctx *nt = device_get_softc(dev);
+ struct ntb_transport_child **cpp = &nt->child;
+ struct ntb_transport_child *nc;
+ struct ntb_transport_mw *mw;
+ uint64_t db_bitmap;
+ int rc, i, db_count, spad_count, qp, qpu, qpo, qpt;
+ char cfg[128] = "";
+ char buf[32];
+ char *n, *np, *c, *name;
+
+ nt->dev = dev;
+ nt->mw_count = ntb_mw_count(dev);
+ spad_count = ntb_spad_count(dev);
+ db_bitmap = ntb_db_valid_mask(dev);
+ db_count = flsll(db_bitmap);
+ KASSERT(db_bitmap == (1 << db_count) - 1,
+ ("Doorbells are not sequential (%jx).\n", db_bitmap));
+
+ device_printf(dev, "%d memory windows, %d scratchpads, "
+ "%d doorbells\n", nt->mw_count, spad_count, db_count);
+
+ if (nt->mw_count == 0) {
+ device_printf(dev, "At least 1 memory window required.\n");
+ return (ENXIO);
+ }
+ if (spad_count < 6) {
+ device_printf(dev, "At least 6 scratchpads required.\n");
+ return (ENXIO);
+ }
+ if (spad_count < 4 + 2 * nt->mw_count) {
+ nt->mw_count = (spad_count - 4) / 2;
+ device_printf(dev, "Scratchpads enough only for %d "
+ "memory windows.\n", nt->mw_count);
+ }
+ if (db_bitmap == 0) {
+ device_printf(dev, "At least one doorbell required.\n");
+ return (ENXIO);
+ }
+
+ nt->mw_vec = malloc(nt->mw_count * sizeof(*nt->mw_vec), M_NTB_T,
+ M_WAITOK | M_ZERO);
+ for (i = 0; i < nt->mw_count; i++) {
+ mw = &nt->mw_vec[i];
+
+ rc = ntb_mw_get_range(dev, i, &mw->phys_addr, &mw->vbase,
+ &mw->phys_size, &mw->xlat_align, &mw->xlat_align_size,
+ &mw->addr_limit);
+ if (rc != 0)
+ goto err;
+
+ mw->buff_size = 0;
+ mw->xlat_size = 0;
+ mw->virt_addr = NULL;
+ mw->dma_addr = 0;
+
+ rc = ntb_mw_set_wc(dev, i, VM_MEMATTR_WRITE_COMBINING);
+ if (rc)
+ ntb_printf(0, "Unable to set mw%d caching\n", i);
+ }
+
+ qpu = 0;
+ qpo = imin(db_count, nt->mw_count);
+ qpt = db_count;
+
+ snprintf(buf, sizeof(buf), "hint.%s.%d.config", device_get_name(dev),
+ device_get_unit(dev));
+ TUNABLE_STR_FETCH(buf, cfg, sizeof(cfg));
+ n = cfg;
+ i = 0;
+ while ((c = strsep(&n, ",")) != NULL) {
+ np = c;
+ name = strsep(&np, ":");
+ if (name != NULL && name[0] == 0)
+ name = NULL;
+ qp = (np && np[0] != 0) ? strtol(np, NULL, 10) : qpo - qpu;
+ if (qp <= 0)
+ qp = 1;
+
+ if (qp > qpt - qpu) {
+ device_printf(dev, "Not enough resources for config\n");
+ break;
+ }
+
+ nc = malloc(sizeof(*nc), M_DEVBUF, M_WAITOK | M_ZERO);
+ nc->qpoff = qpu;
+ nc->qpcnt = qp;
+ nc->dev = device_add_child(dev, name, -1);
+ if (nc->dev == NULL) {
+ device_printf(dev, "Can not add child.\n");
+ break;
+ }
+ device_set_ivars(nc->dev, nc);
+ *cpp = nc;
+ cpp = &nc->next;
+
+ if (bootverbose) {
+ device_printf(dev, "%d \"%s\": queues %d",
+ i, name, qpu);
+ if (qp > 1)
+ printf("-%d", qpu + qp - 1);
+ printf("\n");
+ }
+
+ qpu += qp;
+ i++;
+ }
+ nt->qp_count = qpu;
+
+ nt->qp_vec = malloc(nt->qp_count * sizeof(*nt->qp_vec), M_NTB_T,
+ M_WAITOK | M_ZERO);
+
+ for (i = 0; i < nt->qp_count; i++)
+ ntb_transport_init_queue(nt, i);
+
+ callout_init(&nt->link_work, 0);
+ callout_init(&nt->link_watchdog, 0);
+ TASK_INIT(&nt->link_cleanup, 0, ntb_transport_link_cleanup_work, nt);
+
+ rc = ntb_set_ctx(dev, nt, &ntb_transport_ops);
+ if (rc != 0)
+ goto err;
+
+ nt->link_is_up = false;
+ ntb_link_enable(dev, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+
+ if (enable_xeon_watchdog != 0)
+ callout_reset(&nt->link_watchdog, 0, xeon_link_watchdog_hb, nt);
+
+ bus_generic_attach(dev);
+ return (0);
+
+err:
+ free(nt->qp_vec, M_NTB_T);
+ free(nt->mw_vec, M_NTB_T);
+ return (rc);
+}
+
+static int
+ntb_transport_detach(device_t dev)
+{
+ struct ntb_transport_ctx *nt = device_get_softc(dev);
+ struct ntb_transport_child **cpp = &nt->child;
+ struct ntb_transport_child *nc;
+ int error = 0, i;
+
+ while ((nc = *cpp) != NULL) {
+ *cpp = (*cpp)->next;
+ error = device_delete_child(dev, nc->dev);
+ if (error)
+ break;
+ free(nc, M_DEVBUF);
+ }
+ KASSERT(nt->qp_bitmap == 0,
+ ("Some queues not freed on detach (%jx)", nt->qp_bitmap));
+
+ ntb_transport_link_cleanup(nt);
+ taskqueue_drain(taskqueue_swi, &nt->link_cleanup);
+ callout_drain(&nt->link_work);
+ callout_drain(&nt->link_watchdog);
+
+ ntb_link_disable(dev);
+ ntb_clear_ctx(dev);
+
+ for (i = 0; i < nt->mw_count; i++)
+ ntb_free_mw(nt, i);
+
+ free(nt->qp_vec, M_NTB_T);
+ free(nt->mw_vec, M_NTB_T);
+ return (0);
+}
+
+int
+ntb_transport_queue_count(device_t dev)
+{
+ struct ntb_transport_child *nc = device_get_ivars(dev);
+
+ return (nc->qpcnt);
+}
+
+static void
+ntb_transport_init_queue(struct ntb_transport_ctx *nt, unsigned int qp_num)
+{
+ struct ntb_transport_mw *mw;
+ struct ntb_transport_qp *qp;
+ vm_paddr_t mw_base;
+ uint64_t mw_size, qp_offset;
+ size_t tx_size;
+ unsigned num_qps_mw, mw_num, mw_count;
+
+ mw_count = nt->mw_count;
+ mw_num = QP_TO_MW(nt, qp_num);
+ mw = &nt->mw_vec[mw_num];
+
+ qp = &nt->qp_vec[qp_num];
+ qp->qp_num = qp_num;
+ qp->transport = nt;
+ qp->dev = nt->dev;
+ qp->client_ready = false;
+ qp->event_handler = NULL;
+ ntb_qp_link_down_reset(qp);
+
+ if (mw_num < nt->qp_count % mw_count)
+ num_qps_mw = nt->qp_count / mw_count + 1;
+ else
+ num_qps_mw = nt->qp_count / mw_count;
+
+ mw_base = mw->phys_addr;
+ mw_size = mw->phys_size;
+
+ tx_size = mw_size / num_qps_mw;
+ qp_offset = tx_size * (qp_num / mw_count);
+
+ qp->tx_mw = mw->vbase + qp_offset;
+ KASSERT(qp->tx_mw != NULL, ("uh oh?"));
+
+ /* XXX Assumes that a vm_paddr_t is equivalent to bus_addr_t */
+ qp->tx_mw_phys = mw_base + qp_offset;
+ KASSERT(qp->tx_mw_phys != 0, ("uh oh?"));
+
+ tx_size -= sizeof(struct ntb_rx_info);
+ qp->rx_info = (void *)(qp->tx_mw + tx_size);
+
+ /* Due to house-keeping, there must be at least 2 buffs */
+ qp->tx_max_frame = qmin(transport_mtu, tx_size / 2);
+ qp->tx_max_entry = tx_size / qp->tx_max_frame;
+
+ callout_init(&qp->link_work, 0);
+ callout_init(&qp->rx_full, 1);
+
+ mtx_init(&qp->ntb_rx_q_lock, "ntb rx q", NULL, MTX_SPIN);
+ mtx_init(&qp->ntb_tx_free_q_lock, "ntb tx free q", NULL, MTX_SPIN);
+ mtx_init(&qp->tx_lock, "ntb transport tx", NULL, MTX_DEF);
+ TASK_INIT(&qp->rxc_db_work, 0, ntb_transport_rxc_db, qp);
+ qp->rxc_tq = taskqueue_create("ntbt_rx", M_WAITOK,
+ taskqueue_thread_enqueue, &qp->rxc_tq);
+ taskqueue_start_threads(&qp->rxc_tq, 1, PI_NET, "%s rx%d",
+ device_get_nameunit(nt->dev), qp_num);
+
+ STAILQ_INIT(&qp->rx_post_q);
+ STAILQ_INIT(&qp->rx_pend_q);
+ STAILQ_INIT(&qp->tx_free_q);
+}
+
+void
+ntb_transport_free_queue(struct ntb_transport_qp *qp)
+{
+ struct ntb_transport_ctx *nt = qp->transport;
+ struct ntb_queue_entry *entry;
+
+ callout_drain(&qp->link_work);
+
+ ntb_db_set_mask(qp->dev, 1ull << qp->qp_num);
+ taskqueue_drain_all(qp->rxc_tq);
+ taskqueue_free(qp->rxc_tq);
+
+ qp->cb_data = NULL;
+ qp->rx_handler = NULL;
+ qp->tx_handler = NULL;
+ qp->event_handler = NULL;
+
+ while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q)))
+ free(entry, M_NTB_T);
+
+ while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q)))
+ free(entry, M_NTB_T);
+
+ while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
+ free(entry, M_NTB_T);
+
+ nt->qp_bitmap &= ~(1 << qp->qp_num);
+}
+
+/**
+ * ntb_transport_create_queue - Create a new NTB transport layer queue
+ * @rx_handler: receive callback function
+ * @tx_handler: transmit callback function
+ * @event_handler: event callback function
+ *
+ * Create a new NTB transport layer queue and provide the queue with a callback
+ * routine for both transmit and receive. The receive callback routine will be
+ * used to pass up data when the transport has received it on the queue. The
+ * transmit callback routine will be called when the transport has completed the
+ * transmission of the data on the queue and the data is ready to be freed.
+ *
+ * RETURNS: pointer to newly created ntb_queue, NULL on error.
+ */
+struct ntb_transport_qp *
+ntb_transport_create_queue(device_t dev, int q,
+ const struct ntb_queue_handlers *handlers, void *data)
+{
+ struct ntb_transport_child *nc = device_get_ivars(dev);
+ struct ntb_transport_ctx *nt = device_get_softc(device_get_parent(dev));
+ struct ntb_queue_entry *entry;
+ struct ntb_transport_qp *qp;
+ int i;
+
+ if (q < 0 || q >= nc->qpcnt)
+ return (NULL);
+
+ qp = &nt->qp_vec[nc->qpoff + q];
+ nt->qp_bitmap |= (1 << qp->qp_num);
+ qp->cb_data = data;
+ qp->rx_handler = handlers->rx_handler;
+ qp->tx_handler = handlers->tx_handler;
+ qp->event_handler = handlers->event_handler;
+
+ for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
+ entry = malloc(sizeof(*entry), M_NTB_T, M_WAITOK | M_ZERO);
+ entry->cb_data = data;
+ entry->buf = NULL;
+ entry->len = transport_mtu;
+ entry->qp = qp;
+ ntb_list_add(&qp->ntb_rx_q_lock, entry, &qp->rx_pend_q);
+ }
+
+ for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
+ entry = malloc(sizeof(*entry), M_NTB_T, M_WAITOK | M_ZERO);
+ entry->qp = qp;
+ ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+ }
+
+ ntb_db_clear(dev, 1ull << qp->qp_num);
+ return (qp);
+}
+
+/**
+ * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
+ * @qp: NTB transport layer queue to be enabled
+ *
+ * Notify NTB transport layer of client readiness to use queue
+ */
+void
+ntb_transport_link_up(struct ntb_transport_qp *qp)
+{
+ struct ntb_transport_ctx *nt = qp->transport;
+
+ qp->client_ready = true;
+
+ ntb_printf(2, "qp %d client ready\n", qp->qp_num);
+
+ if (nt->link_is_up)
+ callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
+}
+
+
+
+/* Transport Tx */
+
+/**
+ * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry
+ * @qp: NTB transport layer queue the entry is to be enqueued on
+ * @cb: per buffer pointer for callback function to use
+ * @data: pointer to data buffer that will be sent
+ * @len: length of the data buffer
+ *
+ * Enqueue a new transmit buffer onto the transport queue from which a NTB
+ * payload will be transmitted. This assumes that a lock is being held to
+ * serialize access to the qp.
+ *
+ * RETURNS: An appropriate ERRNO error value on error, or zero for success.
+ */
+int
+ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+ unsigned int len)
+{
+ struct ntb_queue_entry *entry;
+ int rc;
+
+ if (!qp->link_is_up || len == 0) {
+ CTR0(KTR_NTB, "TX: link not up");
+ return (EINVAL);
+ }
+
+ entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
+ if (entry == NULL) {
+ CTR0(KTR_NTB, "TX: could not get entry from tx_free_q");
+ qp->tx_err_no_buf++;
+ return (EBUSY);
+ }
+ CTR1(KTR_NTB, "TX: got entry %p from tx_free_q", entry);
+
+ entry->cb_data = cb;
+ entry->buf = data;
+ entry->len = len;
+ entry->flags = 0;
+
+ mtx_lock(&qp->tx_lock);
+ rc = ntb_process_tx(qp, entry);
+ mtx_unlock(&qp->tx_lock);
+ if (rc != 0) {
+ ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+ CTR1(KTR_NTB,
+ "TX: process_tx failed. Returning entry %p to tx_free_q",
+ entry);
+ }
+ return (rc);
+}
+
+static void
+ntb_tx_copy_callback(void *data)
+{
+ struct ntb_queue_entry *entry = data;
+ struct ntb_transport_qp *qp = entry->qp;
+ struct ntb_payload_header *hdr = entry->x_hdr;
+
+ iowrite32(entry->flags | NTBT_DESC_DONE_FLAG, &hdr->flags);
+ CTR1(KTR_NTB, "TX: hdr %p set DESC_DONE", hdr);
+
+ ntb_peer_db_set(qp->dev, 1ull << qp->qp_num);
+
+ /*
+ * The entry length can only be zero if the packet is intended to be a
+ * "link down" or similar. Since no payload is being sent in these
+ * cases, there is nothing to add to the completion queue.
+ */
+ if (entry->len > 0) {
+ qp->tx_bytes += entry->len;
+
+ if (qp->tx_handler)
+ qp->tx_handler(qp, qp->cb_data, entry->buf,
+ entry->len);
+ else
+ m_freem(entry->buf);
+ entry->buf = NULL;
+ }
+
+ CTR3(KTR_NTB,
+ "TX: entry %p sent. hdr->ver = %u, hdr->flags = 0x%x, Returning "
+ "to tx_free_q", entry, hdr->ver, hdr->flags);
+ ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+}
+
+static void
+ntb_memcpy_tx(struct ntb_queue_entry *entry, void *offset)
+{
+
+ CTR2(KTR_NTB, "TX: copying %d bytes to offset %p", entry->len, offset);
+ if (entry->buf != NULL) {
+ m_copydata((struct mbuf *)entry->buf, 0, entry->len, offset);
+
+ /*
+ * Ensure that the data is fully copied before setting the
+ * flags
+ */
+ wmb();
+ }
+
+ ntb_tx_copy_callback(entry);
+}
+
+static void
+ntb_async_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry)
+{
+ struct ntb_payload_header *hdr;
+ void *offset;
+
+ offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
+ hdr = (struct ntb_payload_header *)((char *)offset + qp->tx_max_frame -
+ sizeof(struct ntb_payload_header));
+ entry->x_hdr = hdr;
+
+ iowrite32(entry->len, &hdr->len);
+ iowrite32(qp->tx_pkts, &hdr->ver);
+
+ ntb_memcpy_tx(entry, offset);
+}
+
+static int
+ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry)
+{
+
+ CTR3(KTR_NTB,
+ "TX: process_tx: tx_pkts=%lu, tx_index=%u, remote entry=%u",
+ qp->tx_pkts, qp->tx_index, qp->remote_rx_info->entry);
+ if (qp->tx_index == qp->remote_rx_info->entry) {
+ CTR0(KTR_NTB, "TX: ring full");
+ qp->tx_ring_full++;
+ return (EAGAIN);
+ }
+
+ if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
+ if (qp->tx_handler != NULL)
+ qp->tx_handler(qp, qp->cb_data, entry->buf,
+ EIO);
+ else
+ m_freem(entry->buf);
+
+ entry->buf = NULL;
+ ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
+ CTR1(KTR_NTB,
+ "TX: frame too big. returning entry %p to tx_free_q",
+ entry);
+ return (0);
+ }
+ CTR2(KTR_NTB, "TX: copying entry %p to index %u", entry, qp->tx_index);
+ ntb_async_tx(qp, entry);
+
+ qp->tx_index++;
+ qp->tx_index %= qp->tx_max_entry;
+
+ qp->tx_pkts++;
+
+ return (0);
+}
+
+/* Transport Rx */
+static void
+ntb_transport_rxc_db(void *arg, int pending __unused)
+{
+ struct ntb_transport_qp *qp = arg;
+ int rc;
+
+ CTR0(KTR_NTB, "RX: transport_rx");
+again:
+ while ((rc = ntb_process_rxc(qp)) == 0)
+ ;
+ CTR1(KTR_NTB, "RX: process_rxc returned %d", rc);
+
+ if ((ntb_db_read(qp->dev) & (1ull << qp->qp_num)) != 0) {
+ /* If db is set, clear it and check queue once more. */
+ ntb_db_clear(qp->dev, 1ull << qp->qp_num);
+ goto again;
+ }
+}
+
+static int
+ntb_process_rxc(struct ntb_transport_qp *qp)
+{
+ struct ntb_payload_header *hdr;
+ struct ntb_queue_entry *entry;
+ caddr_t offset;
+
+ offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index;
+ hdr = (void *)(offset + qp->rx_max_frame -
+ sizeof(struct ntb_payload_header));
+
+ CTR1(KTR_NTB, "RX: process_rxc rx_index = %u", qp->rx_index);
+ if ((hdr->flags & NTBT_DESC_DONE_FLAG) == 0) {
+ CTR0(KTR_NTB, "RX: hdr not done");
+ qp->rx_ring_empty++;
+ return (EAGAIN);
+ }
+
+ if ((hdr->flags & NTBT_LINK_DOWN_FLAG) != 0) {
+ CTR0(KTR_NTB, "RX: link down");
+ ntb_qp_link_down(qp);
+ hdr->flags = 0;
+ return (EAGAIN);
+ }
+
+ if (hdr->ver != (uint32_t)qp->rx_pkts) {
+ CTR2(KTR_NTB,"RX: ver != rx_pkts (%x != %lx). "
+ "Returning entry to rx_pend_q", hdr->ver, qp->rx_pkts);
+ qp->rx_err_ver++;
+ return (EIO);
+ }
+
+ entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q);
+ if (entry == NULL) {
+ qp->rx_err_no_buf++;
+ CTR0(KTR_NTB, "RX: No entries in rx_pend_q");
+ return (EAGAIN);
+ }
+ callout_stop(&qp->rx_full);
+ CTR1(KTR_NTB, "RX: rx entry %p from rx_pend_q", entry);
+
+ entry->x_hdr = hdr;
+ entry->index = qp->rx_index;
+
+ if (hdr->len > entry->len) {
+ CTR2(KTR_NTB, "RX: len too long. Wanted %ju got %ju",
+ (uintmax_t)hdr->len, (uintmax_t)entry->len);
+ qp->rx_err_oflow++;
+
+ entry->len = -EIO;
+ entry->flags |= NTBT_DESC_DONE_FLAG;
+
+ ntb_complete_rxc(qp);
+ } else {
+ qp->rx_bytes += hdr->len;
+ qp->rx_pkts++;
+
+ CTR1(KTR_NTB, "RX: received %ld rx_pkts", qp->rx_pkts);
+
+ entry->len = hdr->len;
+
+ ntb_memcpy_rx(qp, entry, offset);
+ }
+
+ qp->rx_index++;
+ qp->rx_index %= qp->rx_max_entry;
+ return (0);
+}
+
+static void
+ntb_memcpy_rx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry,
+ void *offset)
+{
+ struct ifnet *ifp = entry->cb_data;
+ unsigned int len = entry->len;
+
+ CTR2(KTR_NTB, "RX: copying %d bytes from offset %p", len, offset);
+
+ entry->buf = (void *)m_devget(offset, len, 0, ifp, NULL);
+ if (entry->buf == NULL)
+ entry->len = -ENOMEM;
+
+ /* Ensure that the data is globally visible before clearing the flag */
+ wmb();
+
+ CTR2(KTR_NTB, "RX: copied entry %p to mbuf %p.", entry, entry->buf);
+ ntb_rx_copy_callback(qp, entry);
+}
+
+static inline void
+ntb_rx_copy_callback(struct ntb_transport_qp *qp, void *data)
+{
+ struct ntb_queue_entry *entry;
+
+ entry = data;
+ entry->flags |= NTBT_DESC_DONE_FLAG;
+ ntb_complete_rxc(qp);
+}
+
+static void
+ntb_complete_rxc(struct ntb_transport_qp *qp)
+{
+ struct ntb_queue_entry *entry;
+ struct mbuf *m;
+ unsigned len;
+
+ CTR0(KTR_NTB, "RX: rx_completion_task");
+
+ mtx_lock_spin(&qp->ntb_rx_q_lock);
+
+ while (!STAILQ_EMPTY(&qp->rx_post_q)) {
+ entry = STAILQ_FIRST(&qp->rx_post_q);
+ if ((entry->flags & NTBT_DESC_DONE_FLAG) == 0)
+ break;
+
+ entry->x_hdr->flags = 0;
+ iowrite32(entry->index, &qp->rx_info->entry);
+
+ STAILQ_REMOVE_HEAD(&qp->rx_post_q, entry);
+
+ len = entry->len;
+ m = entry->buf;
+
+ /*
+ * Re-initialize queue_entry for reuse; rx_handler takes
+ * ownership of the mbuf.
+ */
+ entry->buf = NULL;
+ entry->len = transport_mtu;
+ entry->cb_data = qp->cb_data;
+
+ STAILQ_INSERT_TAIL(&qp->rx_pend_q, entry, entry);
+
+ mtx_unlock_spin(&qp->ntb_rx_q_lock);
+
+ CTR2(KTR_NTB, "RX: completing entry %p, mbuf %p", entry, m);
+ if (qp->rx_handler != NULL && qp->client_ready)
+ qp->rx_handler(qp, qp->cb_data, m, len);
+ else
+ m_freem(m);
+
+ mtx_lock_spin(&qp->ntb_rx_q_lock);
+ }
+
+ mtx_unlock_spin(&qp->ntb_rx_q_lock);
+}
+
+static void
+ntb_transport_doorbell_callback(void *data, uint32_t vector)
+{
+ struct ntb_transport_ctx *nt = data;
+ struct ntb_transport_qp *qp;
+ uint64_t vec_mask;
+ unsigned qp_num;
+
+ vec_mask = ntb_db_vector_mask(nt->dev, vector);
+ vec_mask &= nt->qp_bitmap;
+ if ((vec_mask & (vec_mask - 1)) != 0)
+ vec_mask &= ntb_db_read(nt->dev);
+ while (vec_mask != 0) {
+ qp_num = ffsll(vec_mask) - 1;
+
+ qp = &nt->qp_vec[qp_num];
+ if (qp->link_is_up)
+ taskqueue_enqueue(qp->rxc_tq, &qp->rxc_db_work);
+
+ vec_mask &= ~(1ull << qp_num);
+ }
+}
+
+/* Link Event handler */
+static void
+ntb_transport_event_callback(void *data)
+{
+ struct ntb_transport_ctx *nt = data;
+
+ if (ntb_link_is_up(nt->dev, NULL, NULL)) {
+ ntb_printf(1, "HW link up\n");
+ callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt);
+ } else {
+ ntb_printf(1, "HW link down\n");
+ taskqueue_enqueue(taskqueue_swi, &nt->link_cleanup);
+ }
+}
+
+/* Link bring up */
+static void
+ntb_transport_link_work(void *arg)
+{
+ struct ntb_transport_ctx *nt = arg;
+ device_t dev = nt->dev;
+ struct ntb_transport_qp *qp;
+ uint64_t val64, size;
+ uint32_t val;
+ unsigned i;
+ int rc;
+
+ /* send the local info, in the opposite order of the way we read it */
+ for (i = 0; i < nt->mw_count; i++) {
+ size = nt->mw_vec[i].phys_size;
+
+ if (max_mw_size != 0 && size > max_mw_size)
+ size = max_mw_size;
+
+ ntb_peer_spad_write(dev, NTBT_MW0_SZ_HIGH + (i * 2),
+ size >> 32);
+ ntb_peer_spad_write(dev, NTBT_MW0_SZ_LOW + (i * 2), size);
+ }
+ ntb_peer_spad_write(dev, NTBT_NUM_MWS, nt->mw_count);
+ ntb_peer_spad_write(dev, NTBT_NUM_QPS, nt->qp_count);
+ ntb_peer_spad_write(dev, NTBT_QP_LINKS, 0);
+ ntb_peer_spad_write(dev, NTBT_VERSION, NTB_TRANSPORT_VERSION);
+
+ /* Query the remote side for its info */
+ val = 0;
+ ntb_spad_read(dev, NTBT_VERSION, &val);
+ if (val != NTB_TRANSPORT_VERSION)
+ goto out;
+
+ ntb_spad_read(dev, NTBT_NUM_QPS, &val);
+ if (val != nt->qp_count)
+ goto out;
+
+ ntb_spad_read(dev, NTBT_NUM_MWS, &val);
+ if (val != nt->mw_count)
+ goto out;
+
+ for (i = 0; i < nt->mw_count; i++) {
+ ntb_spad_read(dev, NTBT_MW0_SZ_HIGH + (i * 2), &val);
+ val64 = (uint64_t)val << 32;
+
+ ntb_spad_read(dev, NTBT_MW0_SZ_LOW + (i * 2), &val);
+ val64 |= val;
+
+ rc = ntb_set_mw(nt, i, val64);
+ if (rc != 0)
+ goto free_mws;
+ }
+
+ nt->link_is_up = true;
+ ntb_printf(1, "transport link up\n");
+
+ for (i = 0; i < nt->qp_count; i++) {
+ qp = &nt->qp_vec[i];
+
+ ntb_transport_setup_qp_mw(nt, i);
+
+ if (qp->client_ready)
+ callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
+ }
+
+ return;
+
+free_mws:
+ for (i = 0; i < nt->mw_count; i++)
+ ntb_free_mw(nt, i);
+out:
+ if (ntb_link_is_up(dev, NULL, NULL))
+ callout_reset(&nt->link_work,
+ NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_transport_link_work, nt);
+}
+
+static int
+ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, size_t size)
+{
+ struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
+ size_t xlat_size, buff_size;
+ int rc;
+
+ if (size == 0)
+ return (EINVAL);
+
+ xlat_size = roundup(size, mw->xlat_align_size);
+ buff_size = xlat_size;
+
+ /* No need to re-setup */
+ if (mw->xlat_size == xlat_size)
+ return (0);
+
+ if (mw->buff_size != 0)
+ ntb_free_mw(nt, num_mw);
+
+ /* Alloc memory for receiving data. Must be aligned */
+ mw->xlat_size = xlat_size;
+ mw->buff_size = buff_size;
+
+ mw->virt_addr = contigmalloc(mw->buff_size, M_NTB_T, M_ZERO, 0,
+ mw->addr_limit, mw->xlat_align, 0);
+ if (mw->virt_addr == NULL) {
+ ntb_printf(0, "Unable to allocate MW buffer of size %zu/%zu\n",
+ mw->buff_size, mw->xlat_size);
+ mw->xlat_size = 0;
+ mw->buff_size = 0;
+ return (ENOMEM);
+ }
+ /* TODO: replace with bus_space_* functions */
+ mw->dma_addr = vtophys(mw->virt_addr);
+
+ /*
+ * Ensure that the allocation from contigmalloc is aligned as
+ * requested. XXX: This may not be needed -- brought in for parity
+ * with the Linux driver.
+ */
+ if (mw->dma_addr % mw->xlat_align != 0) {
+ ntb_printf(0,
+ "DMA memory 0x%jx not aligned to BAR size 0x%zx\n",
+ (uintmax_t)mw->dma_addr, size);
+ ntb_free_mw(nt, num_mw);
+ return (ENOMEM);
+ }
+
+ /* Notify HW the memory location of the receive buffer */
+ rc = ntb_mw_set_trans(nt->dev, num_mw, mw->dma_addr, mw->xlat_size);
+ if (rc) {
+ ntb_printf(0, "Unable to set mw%d translation\n", num_mw);
+ ntb_free_mw(nt, num_mw);
+ return (rc);
+ }
+
+ return (0);
+}
+
+static void
+ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
+{
+ struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
+
+ if (mw->virt_addr == NULL)
+ return;
+
+ ntb_mw_clear_trans(nt->dev, num_mw);
+ contigfree(mw->virt_addr, mw->xlat_size, M_NTB_T);
+ mw->xlat_size = 0;
+ mw->buff_size = 0;
+ mw->virt_addr = NULL;
+}
+
+static int
+ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, unsigned int qp_num)
+{
+ struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+ struct ntb_transport_mw *mw;
+ void *offset;
+ ntb_q_idx_t i;
+ size_t rx_size;
+ unsigned num_qps_mw, mw_num, mw_count;
+
+ mw_count = nt->mw_count;
+ mw_num = QP_TO_MW(nt, qp_num);
+ mw = &nt->mw_vec[mw_num];
+
+ if (mw->virt_addr == NULL)
+ return (ENOMEM);
+
+ if (mw_num < nt->qp_count % mw_count)
+ num_qps_mw = nt->qp_count / mw_count + 1;
+ else
+ num_qps_mw = nt->qp_count / mw_count;
+
+ rx_size = mw->xlat_size / num_qps_mw;
+ qp->rx_buff = mw->virt_addr + rx_size * (qp_num / mw_count);
+ rx_size -= sizeof(struct ntb_rx_info);
+
+ qp->remote_rx_info = (void*)(qp->rx_buff + rx_size);
+
+ /* Due to house-keeping, there must be at least 2 buffs */
+ qp->rx_max_frame = qmin(transport_mtu, rx_size / 2);
+ qp->rx_max_entry = rx_size / qp->rx_max_frame;
+ qp->rx_index = 0;
+
+ qp->remote_rx_info->entry = qp->rx_max_entry - 1;
+
+ /* Set up the hdr offsets with 0s */
+ for (i = 0; i < qp->rx_max_entry; i++) {
+ offset = (void *)(qp->rx_buff + qp->rx_max_frame * (i + 1) -
+ sizeof(struct ntb_payload_header));
+ memset(offset, 0, sizeof(struct ntb_payload_header));
+ }
+
+ qp->rx_pkts = 0;
+ qp->tx_pkts = 0;
+ qp->tx_index = 0;
+
+ return (0);
+}
+
+static void
+ntb_qp_link_work(void *arg)
+{
+ struct ntb_transport_qp *qp = arg;
+ device_t dev = qp->dev;
+ struct ntb_transport_ctx *nt = qp->transport;
+ int i;
+ uint32_t val;
+
+ /* Report queues that are up on our side */
+ for (i = 0, val = 0; i < nt->qp_count; i++) {
+ if (nt->qp_vec[i].client_ready)
+ val |= (1 << i);
+ }
+ ntb_peer_spad_write(dev, NTBT_QP_LINKS, val);
+
+ /* See if the remote side is up */
+ ntb_spad_read(dev, NTBT_QP_LINKS, &val);
+ if ((val & (1ull << qp->qp_num)) != 0) {
+ ntb_printf(2, "qp %d link up\n", qp->qp_num);
+ qp->link_is_up = true;
+
+ if (qp->event_handler != NULL)
+ qp->event_handler(qp->cb_data, NTB_LINK_UP);
+
+ ntb_db_clear_mask(dev, 1ull << qp->qp_num);
+ } else if (nt->link_is_up)
+ callout_reset(&qp->link_work,
+ NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp);
+}
+
+/* Link down event*/
+static void
+ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
+{
+ struct ntb_transport_qp *qp;
+ int i;
+
+ /* Pass along the info to any clients */
+ for (i = 0; i < nt->qp_count; i++) {
+ if ((nt->qp_bitmap & (1 << i)) != 0) {
+ qp = &nt->qp_vec[i];
+ ntb_qp_link_cleanup(qp);
+ callout_drain(&qp->link_work);
+ }
+ }
+
+ if (!nt->link_is_up)
+ callout_drain(&nt->link_work);
+
+ /*
+ * The scratchpad registers keep the values if the remote side
+ * goes down, blast them now to give them a sane value the next
+ * time they are accessed
+ */
+ ntb_spad_clear(nt->dev);
+}
+
+static void
+ntb_transport_link_cleanup_work(void *arg, int pending __unused)
+{
+
+ ntb_transport_link_cleanup(arg);
+}
+
+static void
+ntb_qp_link_down(struct ntb_transport_qp *qp)
+{
+
+ ntb_qp_link_cleanup(qp);
+}
+
+static void
+ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
+{
+
+ qp->link_is_up = false;
+ ntb_db_set_mask(qp->dev, 1ull << qp->qp_num);
+
+ qp->tx_index = qp->rx_index = 0;
+ qp->tx_bytes = qp->rx_bytes = 0;
+ qp->tx_pkts = qp->rx_pkts = 0;
+
+ qp->rx_ring_empty = 0;
+ qp->tx_ring_full = 0;
+
+ qp->rx_err_no_buf = qp->tx_err_no_buf = 0;
+ qp->rx_err_oflow = qp->rx_err_ver = 0;
+}
+
+static void
+ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
+{
+
+ callout_drain(&qp->link_work);
+ ntb_qp_link_down_reset(qp);
+
+ if (qp->event_handler != NULL)
+ qp->event_handler(qp->cb_data, NTB_LINK_DOWN);
+}
+
+/* Link commanded down */
+/**
+ * ntb_transport_link_down - Notify NTB transport to no longer enqueue data
+ * @qp: NTB transport layer queue to be disabled
+ *
+ * Notify NTB transport layer of client's desire to no longer receive data on
+ * transport queue specified. It is the client's responsibility to ensure all
+ * entries on queue are purged or otherwise handled appropriately.
+ */
+void
+ntb_transport_link_down(struct ntb_transport_qp *qp)
+{
+ struct ntb_transport_ctx *nt = qp->transport;
+ int i;
+ uint32_t val;
+
+ qp->client_ready = false;
+ for (i = 0, val = 0; i < nt->qp_count; i++) {
+ if (nt->qp_vec[i].client_ready)
+ val |= (1 << i);
+ }
+ ntb_peer_spad_write(qp->dev, NTBT_QP_LINKS, val);
+
+ if (qp->link_is_up)
+ ntb_send_link_down(qp);
+ else
+ callout_drain(&qp->link_work);
+}
+
+/**
+ * ntb_transport_link_query - Query transport link state
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query connectivity to the remote system of the NTB transport queue
+ *
+ * RETURNS: true for link up or false for link down
+ */
+bool
+ntb_transport_link_query(struct ntb_transport_qp *qp)
+{
+
+ return (qp->link_is_up);
+}
+
+static void
+ntb_send_link_down(struct ntb_transport_qp *qp)
+{
+ struct ntb_queue_entry *entry;
+ int i, rc;
+
+ if (!qp->link_is_up)
+ return;
+
+ for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) {
+ entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
+ if (entry != NULL)
+ break;
+ pause("NTB Wait for link down", hz / 10);
+ }
+
+ if (entry == NULL)
+ return;
+
+ entry->cb_data = NULL;
+ entry->buf = NULL;
+ entry->len = 0;
+ entry->flags = NTBT_LINK_DOWN_FLAG;
+
+ mtx_lock(&qp->tx_lock);
+ rc = ntb_process_tx(qp, entry);
+ mtx_unlock(&qp->tx_lock);
+ if (rc != 0)
+ printf("ntb: Failed to send link down\n");
+
+ ntb_qp_link_down_reset(qp);
+}
+
+
+/* List Management */
+
+static void
+ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
+ struct ntb_queue_list *list)
+{
+
+ mtx_lock_spin(lock);
+ STAILQ_INSERT_TAIL(list, entry, entry);
+ mtx_unlock_spin(lock);
+}
+
+static struct ntb_queue_entry *
+ntb_list_rm(struct mtx *lock, struct ntb_queue_list *list)
+{
+ struct ntb_queue_entry *entry;
+
+ mtx_lock_spin(lock);
+ if (STAILQ_EMPTY(list)) {
+ entry = NULL;
+ goto out;
+ }
+ entry = STAILQ_FIRST(list);
+ STAILQ_REMOVE_HEAD(list, entry);
+out:
+ mtx_unlock_spin(lock);
+
+ return (entry);
+}
+
+static struct ntb_queue_entry *
+ntb_list_mv(struct mtx *lock, struct ntb_queue_list *from,
+ struct ntb_queue_list *to)
+{
+ struct ntb_queue_entry *entry;
+
+ mtx_lock_spin(lock);
+ if (STAILQ_EMPTY(from)) {
+ entry = NULL;
+ goto out;
+ }
+ entry = STAILQ_FIRST(from);
+ STAILQ_REMOVE_HEAD(from, entry);
+ STAILQ_INSERT_TAIL(to, entry, entry);
+
+out:
+ mtx_unlock_spin(lock);
+ return (entry);
+}
+
+/**
+ * ntb_transport_qp_num - Query the qp number
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query qp number of the NTB transport queue
+ *
+ * RETURNS: a zero based number specifying the qp number
+ */
+unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp)
+{
+
+ return (qp->qp_num);
+}
+
+/**
+ * ntb_transport_max_size - Query the max payload size of a qp
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query the maximum payload size permissible on the given qp
+ *
+ * RETURNS: the max payload size of a qp
+ */
+unsigned int
+ntb_transport_max_size(struct ntb_transport_qp *qp)
+{
+
+ return (qp->tx_max_frame - sizeof(struct ntb_payload_header));
+}
+
+unsigned int
+ntb_transport_tx_free_entry(struct ntb_transport_qp *qp)
+{
+ unsigned int head = qp->tx_index;
+ unsigned int tail = qp->remote_rx_info->entry;
+
+ return (tail >= head ? tail - head : qp->tx_max_entry + tail - head);
+}
+
+static device_method_t ntb_transport_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, ntb_transport_probe),
+ DEVMETHOD(device_attach, ntb_transport_attach),
+ DEVMETHOD(device_detach, ntb_transport_detach),
+ DEVMETHOD_END
+};
+
+devclass_t ntb_transport_devclass;
+static DEFINE_CLASS_0(ntb_transport, ntb_transport_driver,
+ ntb_transport_methods, sizeof(struct ntb_transport_ctx));
+DRIVER_MODULE(ntb_transport, ntb_hw, ntb_transport_driver,
+ ntb_transport_devclass, NULL, NULL);
+MODULE_DEPEND(ntb_transport, ntb, 1, 1, 1);
+MODULE_VERSION(ntb_transport, 1);
diff --git a/sys/dev/ntb/ntb_transport.h b/sys/dev/ntb/ntb_transport.h
new file mode 100644
index 0000000..63cdbce
--- /dev/null
+++ b/sys/dev/ntb/ntb_transport.h
@@ -0,0 +1,61 @@
+/*-
+ * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+struct ntb_transport_qp;
+
+extern devclass_t ntb_transport_devclass;
+
+enum ntb_link_event {
+ NTB_LINK_DOWN = 0,
+ NTB_LINK_UP,
+};
+
+struct ntb_queue_handlers {
+ void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+ void *data, int len);
+ void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
+ void *data, int len);
+ void (*event_handler)(void *data, enum ntb_link_event status);
+};
+
+int ntb_transport_queue_count(device_t dev);
+struct ntb_transport_qp *
+ntb_transport_create_queue(device_t dev, int q,
+ const struct ntb_queue_handlers *handlers, void *data);
+void ntb_transport_free_queue(struct ntb_transport_qp *qp);
+unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp);
+unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp);
+int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+ unsigned int len);
+int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+ unsigned int len);
+void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len);
+void ntb_transport_link_up(struct ntb_transport_qp *qp);
+void ntb_transport_link_down(struct ntb_transport_qp *qp);
+bool ntb_transport_link_query(struct ntb_transport_qp *qp);
+unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp);
diff --git a/sys/dev/pci/pci_pci.c b/sys/dev/pci/pci_pci.c
index 6e8d076..dedc55a 100644
--- a/sys/dev/pci/pci_pci.c
+++ b/sys/dev/pci/pci_pci.c
@@ -918,6 +918,7 @@ static void
pcib_probe_hotplug(struct pcib_softc *sc)
{
device_t dev;
+ uint16_t link_sta, slot_sta;
if (!pci_enable_pcie_hp)
return;
@@ -932,15 +933,29 @@ pcib_probe_hotplug(struct pcib_softc *sc)
sc->pcie_link_cap = pcie_read_config(dev, PCIER_LINK_CAP, 4);
sc->pcie_slot_cap = pcie_read_config(dev, PCIER_SLOT_CAP, 4);
+ if ((sc->pcie_slot_cap & PCIEM_SLOT_CAP_HPC) == 0)
+ return;
+
/*
- * XXX: Handling of slots with a power controller needs to be
- * reexamined. Ignore hotplug on such slots for now.
+ * Some devices report that they have an MRL when they actually
+ * do not. Since they always report that the MRL is open, child
+ * devices would be ignored. Try to detect these devices and
+ * ignore their claim of HotPlug support.
+ *
+ * If there is an open MRL but the Data Link Layer is active,
+ * the MRL is not real.
*/
- if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_PCP)
- return;
-
- if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_HPC)
- sc->flags |= PCIB_HOTPLUG;
+ if ((sc->pcie_slot_cap & PCIEM_SLOT_CAP_MRLSP) != 0 &&
+ (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) != 0) {
+ link_sta = pcie_read_config(dev, PCIER_LINK_STA, 2);
+ slot_sta = pcie_read_config(dev, PCIER_SLOT_STA, 2);
+ if ((slot_sta & PCIEM_SLOT_STA_MRLSS) != 0 &&
+ (link_sta & PCIEM_LINK_STA_DL_ACTIVE) != 0) {
+ return;
+ }
+ }
+
+ sc->flags |= PCIB_HOTPLUG;
}
/*
@@ -966,6 +981,8 @@ pcib_pcie_hotplug_command(struct pcib_softc *sc, uint16_t val, uint16_t mask)
new = (ctl & ~mask) | val;
if (new == ctl)
return;
+ if (bootverbose)
+ device_printf(dev, "HotPlug command: %04x -> %04x\n", ctl, new);
pcie_write_config(dev, PCIER_SLOT_CTL, new, 2);
if (!(sc->pcie_slot_cap & PCIEM_SLOT_CAP_NCCS) &&
(ctl & new) & PCIEM_SLOT_CTL_CCIE) {
@@ -1028,9 +1045,6 @@ pcib_hotplug_inserted(struct pcib_softc *sc)
static int
pcib_hotplug_present(struct pcib_softc *sc)
{
- device_t dev;
-
- dev = sc->dev;
/* Card must be inserted. */
if (!pcib_hotplug_inserted(sc))
@@ -1059,7 +1073,7 @@ pcib_pcie_hotplug_update(struct pcib_softc *sc, uint16_t val, uint16_t mask,
{
bool card_inserted, ei_engaged;
- /* Clear DETACHING if Present Detect has cleared. */
+ /* Clear DETACHING if Presence Detect has cleared. */
if ((sc->pcie_slot_sta & (PCIEM_SLOT_STA_PDC | PCIEM_SLOT_STA_PDS)) ==
PCIEM_SLOT_STA_PDC)
sc->flags &= ~PCIB_DETACHING;
@@ -1101,14 +1115,15 @@ pcib_pcie_hotplug_update(struct pcib_softc *sc, uint16_t val, uint16_t mask,
/*
* Start a timer to see if the Data Link Layer times out.
- * Note that we only start the timer if Presence Detect
+ * Note that we only start the timer if Presence Detect or MRL Sensor
* changed on this interrupt. Stop any scheduled timer if
* the Data Link Layer is active.
*/
if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) {
if (card_inserted &&
!(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE) &&
- sc->pcie_slot_sta & PCIEM_SLOT_STA_PDC) {
+ sc->pcie_slot_sta &
+ (PCIEM_SLOT_STA_MRLSC | PCIEM_SLOT_STA_PDC)) {
if (cold)
device_printf(sc->dev,
"Data Link Layer inactive\n");
@@ -1144,6 +1159,10 @@ pcib_pcie_intr(void *arg)
/* Clear the events just reported. */
pcie_write_config(dev, PCIER_SLOT_STA, sc->pcie_slot_sta, 2);
+ if (bootverbose)
+ device_printf(dev, "HotPlug interrupt: %#x\n",
+ sc->pcie_slot_sta);
+
if (sc->pcie_slot_sta & PCIEM_SLOT_STA_ABP) {
if (sc->flags & PCIB_DETACH_PENDING) {
device_printf(dev,
@@ -1165,7 +1184,7 @@ pcib_pcie_intr(void *arg)
sc->pcie_slot_sta & PCIEM_SLOT_STA_MRLSS ? "open" :
"closed");
if (bootverbose && sc->pcie_slot_sta & PCIEM_SLOT_STA_PDC)
- device_printf(dev, "Present Detect Changed to %s\n",
+ device_printf(dev, "Presence Detect Changed to %s\n",
sc->pcie_slot_sta & PCIEM_SLOT_STA_PDS ? "card present" :
"empty");
if (sc->pcie_slot_sta & PCIEM_SLOT_STA_CC)
@@ -1234,7 +1253,7 @@ pcib_pcie_cc_timeout(void *arg)
sta = pcie_read_config(dev, PCIER_SLOT_STA, 2);
if (!(sta & PCIEM_SLOT_STA_CC)) {
device_printf(dev,
- "Hotplug Command Timed Out - forcing detach\n");
+ "HotPlug Command Timed Out - forcing detach\n");
sc->flags &= ~(PCIB_HOTPLUG_CMD_PENDING | PCIB_DETACH_PENDING);
sc->flags |= PCIB_DETACHING;
pcib_pcie_hotplug_update(sc, 0, 0, true);
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 575e42f..d40de0f 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -794,7 +794,7 @@ pmap_init(void)
* include at least one feature that is only supported by older Intel
* or newer AMD processors.
*/
- if (vm_guest == VM_GUEST_VM && (cpu_feature & CPUID_SS) == 0 &&
+ if (vm_guest != VM_GUEST_NO && (cpu_feature & CPUID_SS) == 0 &&
(cpu_feature2 & (CPUID2_SSSE3 | CPUID2_SSE41 | CPUID2_AESNI |
CPUID2_AVX | CPUID2_XSAVE)) == 0 && (amd_feature2 & (AMDID2_XOP |
AMDID2_FMA4)) == 0)
diff --git a/sys/kern/capabilities.conf b/sys/kern/capabilities.conf
index d0ea97c..24bac6b 100644
--- a/sys/kern/capabilities.conf
+++ b/sys/kern/capabilities.conf
@@ -64,9 +64,7 @@ __sysctl
##
## XXRW: Need to check this very carefully.
##
-_umtx_lock
_umtx_op
-_umtx_unlock
##
## Allow process termination using abort2(2).
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index c7447fb..b9e41af 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -736,6 +736,7 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *
if (fr->fr_flags & RFPPWAIT) {
td->td_pflags |= TDP_RFPPWAIT;
td->td_rfppwait_p = p2;
+ td->td_dbgflags |= TDB_VFORK;
}
PROC_UNLOCK(p2);
@@ -1074,7 +1075,7 @@ fork_return(struct thread *td, struct trapframe *frame)
* parent's children, do it now.
*/
dbg = p->p_pptr->p_pptr;
- proc_set_traced(p);
+ proc_set_traced(p, true);
CTR2(KTR_PTRACE,
"fork_return: attaching to new child pid %d: oppid %d",
p->p_pid, p->p_oppid);
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 7af14a0..5a1b05e 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -2537,7 +2537,7 @@ ptracestop(struct thread *td, int sig)
* a chance to report itself upon the next iteration.
*/
if ((td->td_dbgflags & TDB_FSTP) != 0 ||
- ((p->p_flag & P2_PTRACE_FSTP) == 0 &&
+ ((p->p_flag2 & P2_PTRACE_FSTP) == 0 &&
p->p_xthread == NULL)) {
p->p_xsig = sig;
p->p_xthread = td;
diff --git a/sys/kern/subr_gtaskqueue.c b/sys/kern/subr_gtaskqueue.c
new file mode 100644
index 0000000..2d655bd
--- /dev/null
+++ b/sys/kern/subr_gtaskqueue.c
@@ -0,0 +1,864 @@
+/*-
+ * Copyright (c) 2000 Doug Rabson
+ * Copyright (c) 2014 Jeff Roberson
+ * Copyright (c) 2016 Matthew Macy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/cpuset.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/libkern.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/gtaskqueue.h>
+#include <sys/unistd.h>
+#include <machine/stdarg.h>
+
+static MALLOC_DEFINE(M_GTASKQUEUE, "taskqueue", "Task Queues");
+static void gtaskqueue_thread_enqueue(void *);
+static void gtaskqueue_thread_loop(void *arg);
+
+
+struct gtaskqueue_busy {
+ struct gtask *tb_running;
+ TAILQ_ENTRY(gtaskqueue_busy) tb_link;
+};
+
+static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
+
+struct gtaskqueue {
+ STAILQ_HEAD(, gtask) tq_queue;
+ gtaskqueue_enqueue_fn tq_enqueue;
+ void *tq_context;
+ char *tq_name;
+ TAILQ_HEAD(, gtaskqueue_busy) tq_active;
+ struct mtx tq_mutex;
+ struct thread **tq_threads;
+ int tq_tcount;
+ int tq_spin;
+ int tq_flags;
+ int tq_callouts;
+ taskqueue_callback_fn tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
+ void *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
+};
+
+#define TQ_FLAGS_ACTIVE (1 << 0)
+#define TQ_FLAGS_BLOCKED (1 << 1)
+#define TQ_FLAGS_UNLOCKED_ENQUEUE (1 << 2)
+
+#define DT_CALLOUT_ARMED (1 << 0)
+
+#define TQ_LOCK(tq) \
+ do { \
+ if ((tq)->tq_spin) \
+ mtx_lock_spin(&(tq)->tq_mutex); \
+ else \
+ mtx_lock(&(tq)->tq_mutex); \
+ } while (0)
+#define TQ_ASSERT_LOCKED(tq) mtx_assert(&(tq)->tq_mutex, MA_OWNED)
+
+#define TQ_UNLOCK(tq) \
+ do { \
+ if ((tq)->tq_spin) \
+ mtx_unlock_spin(&(tq)->tq_mutex); \
+ else \
+ mtx_unlock(&(tq)->tq_mutex); \
+ } while (0)
+#define TQ_ASSERT_UNLOCKED(tq) mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
+
+static __inline int
+TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
+ int t)
+{
+ if (tq->tq_spin)
+ return (msleep_spin(p, m, wm, t));
+ return (msleep(p, m, pri, wm, t));
+}
+
+static struct gtaskqueue *
+_gtaskqueue_create(const char *name, int mflags,
+ taskqueue_enqueue_fn enqueue, void *context,
+ int mtxflags, const char *mtxname __unused)
+{
+ struct gtaskqueue *queue;
+ char *tq_name;
+
+ tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
+ if (!tq_name)
+ return (NULL);
+
+ snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
+
+ queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
+ if (!queue)
+ return (NULL);
+
+ STAILQ_INIT(&queue->tq_queue);
+ TAILQ_INIT(&queue->tq_active);
+ queue->tq_enqueue = enqueue;
+ queue->tq_context = context;
+ queue->tq_name = tq_name;
+ queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
+ queue->tq_flags |= TQ_FLAGS_ACTIVE;
+ if (enqueue == gtaskqueue_thread_enqueue)
+ queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
+ mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
+
+ return (queue);
+}
+
+
+/*
+ * Signal a taskqueue thread to terminate.
+ */
+static void
+gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
+{
+
+ while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
+ wakeup(tq);
+ TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
+ }
+}
+
+static void
+gtaskqueue_free(struct gtaskqueue *queue)
+{
+
+ TQ_LOCK(queue);
+ queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
+ gtaskqueue_terminate(queue->tq_threads, queue);
+ KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
+ KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
+ mtx_destroy(&queue->tq_mutex);
+ free(queue->tq_threads, M_GTASKQUEUE);
+ free(queue->tq_name, M_GTASKQUEUE);
+ free(queue, M_GTASKQUEUE);
+}
+
+int
+grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
+{
+ TQ_LOCK(queue);
+ if (gtask->ta_flags & TASK_ENQUEUED) {
+ TQ_UNLOCK(queue);
+ return (0);
+ }
+ STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
+ gtask->ta_flags |= TASK_ENQUEUED;
+ TQ_UNLOCK(queue);
+ if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
+ queue->tq_enqueue(queue->tq_context);
+ return (0);
+}
+
+static void
+gtaskqueue_task_nop_fn(void *context)
+{
+}
+
+/*
+ * Block until all currently queued tasks in this taskqueue
+ * have begun execution. Tasks queued during execution of
+ * this function are ignored.
+ */
+static void
+gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
+{
+ struct gtask t_barrier;
+
+ if (STAILQ_EMPTY(&queue->tq_queue))
+ return;
+
+ /*
+ * Enqueue our barrier after all current tasks, but with
+ * the highest priority so that newly queued tasks cannot
+ * pass it. Because of the high priority, we can not use
+ * taskqueue_enqueue_locked directly (which drops the lock
+ * anyway) so just insert it at tail while we have the
+ * queue lock.
+ */
+ GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
+ STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
+ t_barrier.ta_flags |= TASK_ENQUEUED;
+
+ /*
+ * Once the barrier has executed, all previously queued tasks
+ * have completed or are currently executing.
+ */
+ while (t_barrier.ta_flags & TASK_ENQUEUED)
+ TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
+}
+
+/*
+ * Block until all currently executing tasks for this taskqueue
+ * complete. Tasks that begin execution during the execution
+ * of this function are ignored.
+ */
+static void
+gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
+{
+ struct gtaskqueue_busy tb_marker, *tb_first;
+
+ if (TAILQ_EMPTY(&queue->tq_active))
+ return;
+
+ /* Block taskq_terminate().*/
+ queue->tq_callouts++;
+
+ /*
+ * Wait for all currently executing taskqueue threads
+ * to go idle.
+ */
+ tb_marker.tb_running = TB_DRAIN_WAITER;
+ TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
+ while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
+ TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
+ TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
+
+ /*
+ * Wakeup any other drain waiter that happened to queue up
+ * without any intervening active thread.
+ */
+ tb_first = TAILQ_FIRST(&queue->tq_active);
+ if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
+ wakeup(tb_first);
+
+ /* Release taskqueue_terminate(). */
+ queue->tq_callouts--;
+ if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
+ wakeup_one(queue->tq_threads);
+}
+
+void
+gtaskqueue_block(struct gtaskqueue *queue)
+{
+
+ TQ_LOCK(queue);
+ queue->tq_flags |= TQ_FLAGS_BLOCKED;
+ TQ_UNLOCK(queue);
+}
+
+void
+gtaskqueue_unblock(struct gtaskqueue *queue)
+{
+
+ TQ_LOCK(queue);
+ queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
+ if (!STAILQ_EMPTY(&queue->tq_queue))
+ queue->tq_enqueue(queue->tq_context);
+ TQ_UNLOCK(queue);
+}
+
+static void
+gtaskqueue_run_locked(struct gtaskqueue *queue)
+{
+ struct gtaskqueue_busy tb;
+ struct gtaskqueue_busy *tb_first;
+ struct gtask *gtask;
+
+ KASSERT(queue != NULL, ("tq is NULL"));
+ TQ_ASSERT_LOCKED(queue);
+ tb.tb_running = NULL;
+
+ while (STAILQ_FIRST(&queue->tq_queue)) {
+ TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
+
+ /*
+ * Carefully remove the first task from the queue and
+ * clear its TASK_ENQUEUED flag
+ */
+ gtask = STAILQ_FIRST(&queue->tq_queue);
+ KASSERT(gtask != NULL, ("task is NULL"));
+ STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
+ gtask->ta_flags &= ~TASK_ENQUEUED;
+ tb.tb_running = gtask;
+ TQ_UNLOCK(queue);
+
+ KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
+ gtask->ta_func(gtask->ta_context);
+
+ TQ_LOCK(queue);
+ tb.tb_running = NULL;
+ wakeup(gtask);
+
+ TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
+ tb_first = TAILQ_FIRST(&queue->tq_active);
+ if (tb_first != NULL &&
+ tb_first->tb_running == TB_DRAIN_WAITER)
+ wakeup(tb_first);
+ }
+}
+
+static int
+task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
+{
+ struct gtaskqueue_busy *tb;
+
+ TQ_ASSERT_LOCKED(queue);
+ TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
+ if (tb->tb_running == gtask)
+ return (1);
+ }
+ return (0);
+}
+
+static int
+gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
+{
+
+ if (gtask->ta_flags & TASK_ENQUEUED)
+ STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
+ gtask->ta_flags &= ~TASK_ENQUEUED;
+ return (task_is_running(queue, gtask) ? EBUSY : 0);
+}
+
+int
+gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
+{
+ int error;
+
+ TQ_LOCK(queue);
+ error = gtaskqueue_cancel_locked(queue, gtask);
+ TQ_UNLOCK(queue);
+
+ return (error);
+}
+
+void
+gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
+{
+
+ if (!queue->tq_spin)
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
+
+ TQ_LOCK(queue);
+ while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
+ TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
+ TQ_UNLOCK(queue);
+}
+
+void
+gtaskqueue_drain_all(struct gtaskqueue *queue)
+{
+
+ if (!queue->tq_spin)
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
+
+ TQ_LOCK(queue);
+ gtaskqueue_drain_tq_queue(queue);
+ gtaskqueue_drain_tq_active(queue);
+ TQ_UNLOCK(queue);
+}
+
+static int
+_gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
+ cpuset_t *mask, const char *name, va_list ap)
+{
+ char ktname[MAXCOMLEN + 1];
+ struct thread *td;
+ struct gtaskqueue *tq;
+ int i, error;
+
+ if (count <= 0)
+ return (EINVAL);
+
+ vsnprintf(ktname, sizeof(ktname), name, ap);
+ tq = *tqp;
+
+ tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
+ M_NOWAIT | M_ZERO);
+ if (tq->tq_threads == NULL) {
+ printf("%s: no memory for %s threads\n", __func__, ktname);
+ return (ENOMEM);
+ }
+
+ for (i = 0; i < count; i++) {
+ if (count == 1)
+ error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
+ &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
+ else
+ error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
+ &tq->tq_threads[i], RFSTOPPED, 0,
+ "%s_%d", ktname, i);
+ if (error) {
+ /* should be ok to continue, taskqueue_free will dtrt */
+ printf("%s: kthread_add(%s): error %d", __func__,
+ ktname, error);
+ tq->tq_threads[i] = NULL; /* paranoid */
+ } else
+ tq->tq_tcount++;
+ }
+ for (i = 0; i < count; i++) {
+ if (tq->tq_threads[i] == NULL)
+ continue;
+ td = tq->tq_threads[i];
+ if (mask) {
+ error = cpuset_setthread(td->td_tid, mask);
+ /*
+ * Failing to pin is rarely an actual fatal error;
+ * it'll just affect performance.
+ */
+ if (error)
+ printf("%s: curthread=%llu: can't pin; "
+ "error=%d\n",
+ __func__,
+ (unsigned long long) td->td_tid,
+ error);
+ }
+ thread_lock(td);
+ sched_prio(td, pri);
+ sched_add(td, SRQ_BORING);
+ thread_unlock(td);
+ }
+
+ return (0);
+}
+
+static int
+gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
+ const char *name, ...)
+{
+ va_list ap;
+ int error;
+
+ va_start(ap, name);
+ error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
+ va_end(ap);
+ return (error);
+}
+
+static inline void
+gtaskqueue_run_callback(struct gtaskqueue *tq,
+ enum taskqueue_callback_type cb_type)
+{
+ taskqueue_callback_fn tq_callback;
+
+ TQ_ASSERT_UNLOCKED(tq);
+ tq_callback = tq->tq_callbacks[cb_type];
+ if (tq_callback != NULL)
+ tq_callback(tq->tq_cb_contexts[cb_type]);
+}
+
+static void
+gtaskqueue_thread_loop(void *arg)
+{
+ struct gtaskqueue **tqp, *tq;
+
+ tqp = arg;
+ tq = *tqp;
+ gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
+ TQ_LOCK(tq);
+ while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
+ /* XXX ? */
+ gtaskqueue_run_locked(tq);
+ /*
+ * Because taskqueue_run() can drop tq_mutex, we need to
+ * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
+ * meantime, which means we missed a wakeup.
+ */
+ if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
+ break;
+ TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
+ }
+ gtaskqueue_run_locked(tq);
+ /*
+ * This thread is on its way out, so just drop the lock temporarily
+ * in order to call the shutdown callback. This allows the callback
+ * to look at the taskqueue, even just before it dies.
+ */
+ TQ_UNLOCK(tq);
+ gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
+ TQ_LOCK(tq);
+
+ /* rendezvous with thread that asked us to terminate */
+ tq->tq_tcount--;
+ wakeup_one(tq->tq_threads);
+ TQ_UNLOCK(tq);
+ kthread_exit();
+}
+
+static void
+gtaskqueue_thread_enqueue(void *context)
+{
+ struct gtaskqueue **tqp, *tq;
+
+ tqp = context;
+ tq = *tqp;
+ wakeup_one(tq);
+}
+
+
+static struct gtaskqueue *
+gtaskqueue_create_fast(const char *name, int mflags,
+ taskqueue_enqueue_fn enqueue, void *context)
+{
+ return _gtaskqueue_create(name, mflags, enqueue, context,
+ MTX_SPIN, "fast_taskqueue");
+}
+
+
+struct taskqgroup_cpu {
+ LIST_HEAD(, grouptask) tgc_tasks;
+ struct gtaskqueue *tgc_taskq;
+ int tgc_cnt;
+ int tgc_cpu;
+};
+
+struct taskqgroup {
+ struct taskqgroup_cpu tqg_queue[MAXCPU];
+ struct mtx tqg_lock;
+ char * tqg_name;
+ int tqg_adjusting;
+ int tqg_stride;
+ int tqg_cnt;
+};
+
+struct taskq_bind_task {
+ struct gtask bt_task;
+ int bt_cpuid;
+};
+
+static void
+taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx)
+{
+ struct taskqgroup_cpu *qcpu;
+
+ qcpu = &qgroup->tqg_queue[idx];
+ LIST_INIT(&qcpu->tgc_tasks);
+ qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
+ taskqueue_thread_enqueue, &qcpu->tgc_taskq);
+ gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
+ "%s_%d", qgroup->tqg_name, idx);
+ qcpu->tgc_cpu = idx * qgroup->tqg_stride;
+}
+
+static void
+taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
+{
+
+ gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
+}
+
+/*
+ * Find the taskq with least # of tasks that doesn't currently have any
+ * other queues from the uniq identifier.
+ */
+static int
+taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
+{
+ struct grouptask *n;
+ int i, idx, mincnt;
+ int strict;
+
+ mtx_assert(&qgroup->tqg_lock, MA_OWNED);
+ if (qgroup->tqg_cnt == 0)
+ return (0);
+ idx = -1;
+ mincnt = INT_MAX;
+ /*
+ * Two passes; First scan for a queue with the least tasks that
+ * does not already service this uniq id. If that fails simply find
+ * the queue with the least total tasks;
+ */
+ for (strict = 1; mincnt == INT_MAX; strict = 0) {
+ for (i = 0; i < qgroup->tqg_cnt; i++) {
+ if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
+ continue;
+ if (strict) {
+ LIST_FOREACH(n,
+ &qgroup->tqg_queue[i].tgc_tasks, gt_list)
+ if (n->gt_uniq == uniq)
+ break;
+ if (n != NULL)
+ continue;
+ }
+ mincnt = qgroup->tqg_queue[i].tgc_cnt;
+ idx = i;
+ }
+ }
+ if (idx == -1)
+ panic("taskqgroup_find: Failed to pick a qid.");
+
+ return (idx);
+}
+
+void
+taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
+ void *uniq, int irq, char *name)
+{
+ cpuset_t mask;
+ int qid;
+
+ gtask->gt_uniq = uniq;
+ gtask->gt_name = name;
+ gtask->gt_irq = irq;
+ gtask->gt_cpu = -1;
+ mtx_lock(&qgroup->tqg_lock);
+ qid = taskqgroup_find(qgroup, uniq);
+ qgroup->tqg_queue[qid].tgc_cnt++;
+ LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
+ gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
+ if (irq != -1 && smp_started) {
+ CPU_ZERO(&mask);
+ CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
+ mtx_unlock(&qgroup->tqg_lock);
+ intr_setaffinity(irq, &mask);
+ } else
+ mtx_unlock(&qgroup->tqg_lock);
+}
+
+int
+taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
+ void *uniq, int cpu, int irq, char *name)
+{
+ cpuset_t mask;
+ int i, qid;
+
+ qid = -1;
+ gtask->gt_uniq = uniq;
+ gtask->gt_name = name;
+ gtask->gt_irq = irq;
+ gtask->gt_cpu = cpu;
+ mtx_lock(&qgroup->tqg_lock);
+ if (smp_started) {
+ for (i = 0; i < qgroup->tqg_cnt; i++)
+ if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
+ qid = i;
+ break;
+ }
+ if (qid == -1) {
+ mtx_unlock(&qgroup->tqg_lock);
+ return (EINVAL);
+ }
+ } else
+ qid = 0;
+ qgroup->tqg_queue[qid].tgc_cnt++;
+ LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
+ gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
+ if (irq != -1 && smp_started) {
+ CPU_ZERO(&mask);
+ CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
+ mtx_unlock(&qgroup->tqg_lock);
+ intr_setaffinity(irq, &mask);
+ } else
+ mtx_unlock(&qgroup->tqg_lock);
+ return (0);
+}
+
+void
+taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
+{
+ int i;
+
+ mtx_lock(&qgroup->tqg_lock);
+ for (i = 0; i < qgroup->tqg_cnt; i++)
+ if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
+ break;
+ if (i == qgroup->tqg_cnt)
+ panic("taskqgroup_detach: task not in group\n");
+ qgroup->tqg_queue[i].tgc_cnt--;
+ LIST_REMOVE(gtask, gt_list);
+ mtx_unlock(&qgroup->tqg_lock);
+ gtask->gt_taskqueue = NULL;
+}
+
+static void
+taskqgroup_binder(void *ctx)
+{
+ struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
+ cpuset_t mask;
+ int error;
+
+ CPU_ZERO(&mask);
+ CPU_SET(gtask->bt_cpuid, &mask);
+ error = cpuset_setthread(curthread->td_tid, &mask);
+ thread_lock(curthread);
+ sched_bind(curthread, gtask->bt_cpuid);
+ thread_unlock(curthread);
+
+ if (error)
+ printf("taskqgroup_binder: setaffinity failed: %d\n",
+ error);
+ free(gtask, M_DEVBUF);
+}
+
+static void
+taskqgroup_bind(struct taskqgroup *qgroup)
+{
+ struct taskq_bind_task *gtask;
+ int i;
+
+ /*
+ * Bind taskqueue threads to specific CPUs, if they have been assigned
+ * one.
+ */
+ for (i = 0; i < qgroup->tqg_cnt; i++) {
+ gtask = malloc(sizeof (*gtask), M_DEVBUF, M_NOWAIT);
+ GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
+ gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
+ grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
+ &gtask->bt_task);
+ }
+}
+
+static int
+_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
+{
+ LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
+ cpuset_t mask;
+ struct grouptask *gtask;
+ int i, old_cnt, qid;
+
+ mtx_assert(&qgroup->tqg_lock, MA_OWNED);
+
+ if (cnt < 1 || cnt * stride > mp_ncpus || !smp_started) {
+ printf("taskqgroup_adjust failed cnt: %d stride: %d mp_ncpus: %d smp_started: %d\n",
+ cnt, stride, mp_ncpus, smp_started);
+ return (EINVAL);
+ }
+ if (qgroup->tqg_adjusting) {
+ printf("taskqgroup_adjust failed: adjusting\n");
+ return (EBUSY);
+ }
+ qgroup->tqg_adjusting = 1;
+ old_cnt = qgroup->tqg_cnt;
+ mtx_unlock(&qgroup->tqg_lock);
+ /*
+ * Set up queue for tasks added before boot.
+ */
+ if (old_cnt == 0) {
+ LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
+ grouptask, gt_list);
+ qgroup->tqg_queue[0].tgc_cnt = 0;
+ }
+
+ /*
+ * If new taskq threads have been added.
+ */
+ for (i = old_cnt; i < cnt; i++)
+ taskqgroup_cpu_create(qgroup, i);
+ mtx_lock(&qgroup->tqg_lock);
+ qgroup->tqg_cnt = cnt;
+ qgroup->tqg_stride = stride;
+
+ /*
+ * Adjust drivers to use new taskqs.
+ */
+ for (i = 0; i < old_cnt; i++) {
+ while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
+ LIST_REMOVE(gtask, gt_list);
+ qgroup->tqg_queue[i].tgc_cnt--;
+ LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
+ }
+ }
+
+ while ((gtask = LIST_FIRST(&gtask_head))) {
+ LIST_REMOVE(gtask, gt_list);
+ if (gtask->gt_cpu == -1)
+ qid = taskqgroup_find(qgroup, gtask->gt_uniq);
+ else {
+ for (i = 0; i < qgroup->tqg_cnt; i++)
+ if (qgroup->tqg_queue[i].tgc_cpu == gtask->gt_cpu) {
+ qid = i;
+ break;
+ }
+ }
+ qgroup->tqg_queue[qid].tgc_cnt++;
+ LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
+ gt_list);
+ gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
+ }
+ /*
+ * Set new CPU and IRQ affinity
+ */
+ for (i = 0; i < cnt; i++) {
+ qgroup->tqg_queue[i].tgc_cpu = i * qgroup->tqg_stride;
+ CPU_ZERO(&mask);
+ CPU_SET(qgroup->tqg_queue[i].tgc_cpu, &mask);
+ LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list) {
+ if (gtask->gt_irq == -1)
+ continue;
+ intr_setaffinity(gtask->gt_irq, &mask);
+ }
+ }
+ mtx_unlock(&qgroup->tqg_lock);
+
+ /*
+ * If taskq thread count has been reduced.
+ */
+ for (i = cnt; i < old_cnt; i++)
+ taskqgroup_cpu_remove(qgroup, i);
+
+ mtx_lock(&qgroup->tqg_lock);
+ qgroup->tqg_adjusting = 0;
+
+ taskqgroup_bind(qgroup);
+
+ return (0);
+}
+
+int
+taskqgroup_adjust(struct taskqgroup *qgroup, int cpu, int stride)
+{
+ int error;
+
+ mtx_lock(&qgroup->tqg_lock);
+ error = _taskqgroup_adjust(qgroup, cpu, stride);
+ mtx_unlock(&qgroup->tqg_lock);
+
+ return (error);
+}
+
+struct taskqgroup *
+taskqgroup_create(char *name)
+{
+ struct taskqgroup *qgroup;
+
+ qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
+ mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
+ qgroup->tqg_name = name;
+ LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
+
+ return (qgroup);
+}
+
+void
+taskqgroup_destroy(struct taskqgroup *qgroup)
+{
+
+}
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
index 65fb3e7..e48072c 100644
--- a/sys/kern/subr_param.c
+++ b/sys/kern/subr_param.c
@@ -148,6 +148,7 @@ static const char *const vm_guest_sysctl_names[] = {
"xen",
"hv",
"vmware",
+ "kvm",
NULL
};
CTASSERT(nitems(vm_guest_sysctl_names) - 1 == VM_LAST);
diff --git a/sys/kern/subr_syscall.c b/sys/kern/subr_syscall.c
index aad2a1e..3e2a3b3 100644
--- a/sys/kern/subr_syscall.c
+++ b/sys/kern/subr_syscall.c
@@ -242,5 +242,13 @@ again:
cv_timedwait(&p2->p_pwait, &p2->p_mtx, hz);
}
PROC_UNLOCK(p2);
+
+ if (td->td_dbgflags & TDB_VFORK) {
+ PROC_LOCK(p);
+ if (p->p_ptevents & PTRACE_VFORK)
+ ptracestop(td, SIGTRAP);
+ td->td_dbgflags &= ~TDB_VFORK;
+ PROC_UNLOCK(p);
+ }
}
}
diff --git a/sys/kern/subr_taskqueue.c b/sys/kern/subr_taskqueue.c
index 12124b8..5a20148 100644
--- a/sys/kern/subr_taskqueue.c
+++ b/sys/kern/subr_taskqueue.c
@@ -261,22 +261,6 @@ taskqueue_enqueue_locked(struct taskqueue *queue, struct task *task)
}
int
-grouptaskqueue_enqueue(struct taskqueue *queue, struct task *task)
-{
- TQ_LOCK(queue);
- if (task->ta_pending) {
- TQ_UNLOCK(queue);
- return (0);
- }
- STAILQ_INSERT_TAIL(&queue->tq_queue, task, ta_link);
- task->ta_pending = 1;
- TQ_UNLOCK(queue);
- if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
- queue->tq_enqueue(queue->tq_context);
- return (0);
-}
-
-int
taskqueue_enqueue(struct taskqueue *queue, struct task *task)
{
int res;
@@ -806,347 +790,3 @@ taskqueue_member(struct taskqueue *queue, struct thread *td)
}
return (ret);
}
-
-struct taskqgroup_cpu {
- LIST_HEAD(, grouptask) tgc_tasks;
- struct taskqueue *tgc_taskq;
- int tgc_cnt;
- int tgc_cpu;
-};
-
-struct taskqgroup {
- struct taskqgroup_cpu tqg_queue[MAXCPU];
- struct mtx tqg_lock;
- char * tqg_name;
- int tqg_adjusting;
- int tqg_stride;
- int tqg_cnt;
-};
-
-struct taskq_bind_task {
- struct task bt_task;
- int bt_cpuid;
-};
-
-static void
-taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx)
-{
- struct taskqgroup_cpu *qcpu;
- int i, j;
-
- qcpu = &qgroup->tqg_queue[idx];
- LIST_INIT(&qcpu->tgc_tasks);
- qcpu->tgc_taskq = taskqueue_create_fast(NULL, M_WAITOK,
- taskqueue_thread_enqueue, &qcpu->tgc_taskq);
- taskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
- "%s_%d", qgroup->tqg_name, idx);
-
- for (i = CPU_FIRST(), j = 0; j < idx * qgroup->tqg_stride;
- j++, i = CPU_NEXT(i)) {
- /*
- * Wait: evaluate the idx * qgroup->tqg_stride'th CPU,
- * potentially wrapping the actual count
- */
- }
- qcpu->tgc_cpu = i;
-}
-
-static void
-taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
-{
-
- taskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
-}
-
-/*
- * Find the taskq with least # of tasks that doesn't currently have any
- * other queues from the uniq identifier.
- */
-static int
-taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
-{
- struct grouptask *n;
- int i, idx, mincnt;
- int strict;
-
- mtx_assert(&qgroup->tqg_lock, MA_OWNED);
- if (qgroup->tqg_cnt == 0)
- return (0);
- idx = -1;
- mincnt = INT_MAX;
- /*
- * Two passes; First scan for a queue with the least tasks that
- * does not already service this uniq id. If that fails simply find
- * the queue with the least total tasks;
- */
- for (strict = 1; mincnt == INT_MAX; strict = 0) {
- for (i = 0; i < qgroup->tqg_cnt; i++) {
- if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
- continue;
- if (strict) {
- LIST_FOREACH(n,
- &qgroup->tqg_queue[i].tgc_tasks, gt_list)
- if (n->gt_uniq == uniq)
- break;
- if (n != NULL)
- continue;
- }
- mincnt = qgroup->tqg_queue[i].tgc_cnt;
- idx = i;
- }
- }
- if (idx == -1)
- panic("taskqgroup_find: Failed to pick a qid.");
-
- return (idx);
-}
-
-void
-taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
- void *uniq, int irq, char *name)
-{
- cpuset_t mask;
- int qid;
-
- gtask->gt_uniq = uniq;
- gtask->gt_name = name;
- gtask->gt_irq = irq;
- gtask->gt_cpu = -1;
- mtx_lock(&qgroup->tqg_lock);
- qid = taskqgroup_find(qgroup, uniq);
- qgroup->tqg_queue[qid].tgc_cnt++;
- LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
- gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
- if (irq != -1 && smp_started) {
- CPU_ZERO(&mask);
- CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
- mtx_unlock(&qgroup->tqg_lock);
- intr_setaffinity(irq, &mask);
- } else
- mtx_unlock(&qgroup->tqg_lock);
-}
-
-int
-taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
- void *uniq, int cpu, int irq, char *name)
-{
- cpuset_t mask;
- int i, qid;
-
- qid = -1;
- gtask->gt_uniq = uniq;
- gtask->gt_name = name;
- gtask->gt_irq = irq;
- gtask->gt_cpu = cpu;
- mtx_lock(&qgroup->tqg_lock);
- if (smp_started) {
- for (i = 0; i < qgroup->tqg_cnt; i++)
- if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
- qid = i;
- break;
- }
- if (qid == -1) {
- mtx_unlock(&qgroup->tqg_lock);
- return (EINVAL);
- }
- } else
- qid = 0;
- qgroup->tqg_queue[qid].tgc_cnt++;
- LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
- gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
- if (irq != -1 && smp_started) {
- CPU_ZERO(&mask);
- CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
- mtx_unlock(&qgroup->tqg_lock);
- intr_setaffinity(irq, &mask);
- } else
- mtx_unlock(&qgroup->tqg_lock);
- return (0);
-}
-
-void
-taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
-{
- int i;
-
- mtx_lock(&qgroup->tqg_lock);
- for (i = 0; i < qgroup->tqg_cnt; i++)
- if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
- break;
- if (i == qgroup->tqg_cnt)
- panic("taskqgroup_detach: task not in group\n");
- qgroup->tqg_queue[i].tgc_cnt--;
- LIST_REMOVE(gtask, gt_list);
- mtx_unlock(&qgroup->tqg_lock);
- gtask->gt_taskqueue = NULL;
-}
-
-static void
-taskqgroup_binder(void *ctx, int pending)
-{
- struct taskq_bind_task *task = (struct taskq_bind_task *)ctx;
- cpuset_t mask;
- int error;
-
- CPU_ZERO(&mask);
- CPU_SET(task->bt_cpuid, &mask);
- error = cpuset_setthread(curthread->td_tid, &mask);
- thread_lock(curthread);
- sched_bind(curthread, task->bt_cpuid);
- thread_unlock(curthread);
-
- if (error)
- printf("taskqgroup_binder: setaffinity failed: %d\n",
- error);
- free(task, M_DEVBUF);
-}
-
-static void
-taskqgroup_bind(struct taskqgroup *qgroup)
-{
- struct taskq_bind_task *task;
- int i;
-
- /*
- * Bind taskqueue threads to specific CPUs, if they have been assigned
- * one.
- */
- for (i = 0; i < qgroup->tqg_cnt; i++) {
- task = malloc(sizeof (*task), M_DEVBUF, M_NOWAIT);
- TASK_INIT(&task->bt_task, 0, taskqgroup_binder, task);
- task->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
- taskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
- &task->bt_task);
- }
-}
-
-static int
-_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
-{
- LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
- cpuset_t mask;
- struct grouptask *gtask;
- int i, k, old_cnt, qid, cpu;
-
- mtx_assert(&qgroup->tqg_lock, MA_OWNED);
-
- if (cnt < 1 || cnt * stride > mp_ncpus || !smp_started) {
- printf("taskqgroup_adjust failed cnt: %d stride: %d "
- "mp_ncpus: %d smp_started: %d\n", cnt, stride, mp_ncpus,
- smp_started);
- return (EINVAL);
- }
- if (qgroup->tqg_adjusting) {
- printf("taskqgroup_adjust failed: adjusting\n");
- return (EBUSY);
- }
- qgroup->tqg_adjusting = 1;
- old_cnt = qgroup->tqg_cnt;
- mtx_unlock(&qgroup->tqg_lock);
- /*
- * Set up queue for tasks added before boot.
- */
- if (old_cnt == 0) {
- LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
- grouptask, gt_list);
- qgroup->tqg_queue[0].tgc_cnt = 0;
- }
-
- /*
- * If new taskq threads have been added.
- */
- for (i = old_cnt; i < cnt; i++)
- taskqgroup_cpu_create(qgroup, i);
- mtx_lock(&qgroup->tqg_lock);
- qgroup->tqg_cnt = cnt;
- qgroup->tqg_stride = stride;
-
- /*
- * Adjust drivers to use new taskqs.
- */
- for (i = 0; i < old_cnt; i++) {
- while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
- LIST_REMOVE(gtask, gt_list);
- qgroup->tqg_queue[i].tgc_cnt--;
- LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
- }
- }
-
- while ((gtask = LIST_FIRST(&gtask_head))) {
- LIST_REMOVE(gtask, gt_list);
- if (gtask->gt_cpu == -1)
- qid = taskqgroup_find(qgroup, gtask->gt_uniq);
- else {
- for (i = 0; i < qgroup->tqg_cnt; i++)
- if (qgroup->tqg_queue[i].tgc_cpu == gtask->gt_cpu) {
- qid = i;
- break;
- }
- }
- qgroup->tqg_queue[qid].tgc_cnt++;
- LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
- gt_list);
- gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
- }
- /*
- * Set new CPU and IRQ affinity
- */
- cpu = CPU_FIRST();
- for (i = 0; i < cnt; i++) {
- qgroup->tqg_queue[i].tgc_cpu = cpu;
- for (k = 0; k < qgroup->tqg_stride; k++)
- cpu = CPU_NEXT(cpu);
- CPU_ZERO(&mask);
- CPU_SET(qgroup->tqg_queue[i].tgc_cpu, &mask);
- LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list) {
- if (gtask->gt_irq == -1)
- continue;
- intr_setaffinity(gtask->gt_irq, &mask);
- }
- }
- mtx_unlock(&qgroup->tqg_lock);
-
- /*
- * If taskq thread count has been reduced.
- */
- for (i = cnt; i < old_cnt; i++)
- taskqgroup_cpu_remove(qgroup, i);
-
- mtx_lock(&qgroup->tqg_lock);
- qgroup->tqg_adjusting = 0;
-
- taskqgroup_bind(qgroup);
-
- return (0);
-}
-
-int
-taskqgroup_adjust(struct taskqgroup *qgroup, int cpu, int stride)
-{
- int error;
-
- mtx_lock(&qgroup->tqg_lock);
- error = _taskqgroup_adjust(qgroup, cpu, stride);
- mtx_unlock(&qgroup->tqg_lock);
-
- return (error);
-}
-
-struct taskqgroup *
-taskqgroup_create(char *name)
-{
- struct taskqgroup *qgroup;
-
- qgroup = malloc(sizeof(*qgroup), M_TASKQUEUE, M_WAITOK | M_ZERO);
- mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
- qgroup->tqg_name = name;
- LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
-
- return (qgroup);
-}
-
-void
-taskqgroup_destroy(struct taskqgroup *qgroup)
-{
-
-}
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index 751db15..69a3e4b 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -693,12 +693,13 @@ sys_ptrace(struct thread *td, struct ptrace_args *uap)
#endif
void
-proc_set_traced(struct proc *p)
+proc_set_traced(struct proc *p, bool stop)
{
PROC_LOCK_ASSERT(p, MA_OWNED);
p->p_flag |= P_TRACED;
- p->p_flag2 |= P2_PTRACE_FSTP;
+ if (stop)
+ p->p_flag2 |= P2_PTRACE_FSTP;
p->p_ptevents = PTRACE_DEFAULT;
p->p_oppid = p->p_pptr->p_pid;
}
@@ -910,7 +911,7 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
switch (req) {
case PT_TRACE_ME:
/* set my trace flag and "owner" so it can read/write me */
- proc_set_traced(p);
+ proc_set_traced(p, false);
if (p->p_flag & P_PPWAIT)
p->p_flag |= P_PPTRACE;
CTR1(KTR_PTRACE, "PT_TRACE_ME: pid %d", p->p_pid);
@@ -927,7 +928,7 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
* The old parent is remembered so we can put things back
* on a "detach".
*/
- proc_set_traced(p);
+ proc_set_traced(p, true);
if (p->p_pptr != td->td_proc) {
proc_reparent(p, td->td_proc);
}
@@ -1000,7 +1001,7 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
}
tmp = *(int *)addr;
if ((tmp & ~(PTRACE_EXEC | PTRACE_SCE | PTRACE_SCX |
- PTRACE_FORK | PTRACE_LWP)) != 0) {
+ PTRACE_FORK | PTRACE_LWP | PTRACE_VFORK)) != 0) {
error = EINVAL;
break;
}
@@ -1321,7 +1322,11 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
if (td2->td_dbgflags & TDB_FORK) {
pl->pl_flags |= PL_FLAG_FORKED;
pl->pl_child_pid = td2->td_dbg_forked;
- }
+ if (td2->td_dbgflags & TDB_VFORK)
+ pl->pl_flags |= PL_FLAG_VFORKED;
+ } else if ((td2->td_dbgflags & (TDB_SCX | TDB_VFORK)) ==
+ TDB_VFORK)
+ pl->pl_flags |= PL_FLAG_VFORK_DONE;
if (td2->td_dbgflags & TDB_CHILD)
pl->pl_flags |= PL_FLAG_CHILD;
if (td2->td_dbgflags & TDB_BORN)
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index b48eea1..dce0e1a 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -1441,7 +1441,7 @@ static struct aiocb_ops aiocb_ops_osigevent = {
*/
int
aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
- int type, struct aiocb_ops *ops)
+ int type, struct aiocb_ops *ops)
{
struct proc *p = td->td_proc;
cap_rights_t rights;
@@ -2476,14 +2476,9 @@ static int
kern_aio_fsync(struct thread *td, int op, struct aiocb *ujob,
struct aiocb_ops *ops)
{
- struct proc *p = td->td_proc;
- struct kaioinfo *ki;
if (op != O_SYNC) /* XXX lack of O_DSYNC */
return (EINVAL);
- ki = p->p_aioinfo;
- if (ki == NULL)
- aio_init_aioinfo(p);
return (aio_aqueue(td, ujob, NULL, LIO_SYNC, ops));
}
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index a7977bf..d9182f2 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -640,7 +640,6 @@ int
vop_stdfsync(ap)
struct vop_fsync_args /* {
struct vnode *a_vp;
- struct ucred *a_cred;
int a_waitfor;
struct thread *a_td;
} */ *ap;
diff --git a/sys/modules/ntb/Makefile b/sys/modules/ntb/Makefile
index a5169a0..3eaf751 100644
--- a/sys/modules/ntb/Makefile
+++ b/sys/modules/ntb/Makefile
@@ -1,5 +1,5 @@
# $FreeBSD$
-SUBDIR= ntb_hw if_ntb
+SUBDIR= ntb ntb_hw ntb_transport if_ntb
.include <bsd.subdir.mk>
diff --git a/sys/modules/ntb/ntb/Makefile b/sys/modules/ntb/ntb/Makefile
new file mode 100644
index 0000000..c3f647b
--- /dev/null
+++ b/sys/modules/ntb/ntb/Makefile
@@ -0,0 +1,9 @@
+# $FreeBSD$
+
+.PATH: ${.CURDIR}/../../../dev/ntb
+
+KMOD = ntb
+SRCS = ntb.c ntb_if.c
+SRCS += device_if.h bus_if.h ntb_if.h
+
+.include <bsd.kmod.mk>
diff --git a/sys/modules/ntb/ntb_hw/Makefile b/sys/modules/ntb/ntb_hw/Makefile
index fc46b46..f3e7645 100644
--- a/sys/modules/ntb/ntb_hw/Makefile
+++ b/sys/modules/ntb/ntb_hw/Makefile
@@ -4,6 +4,6 @@
KMOD = ntb_hw
SRCS = ntb_hw.c
-SRCS += device_if.h bus_if.h pci_if.h
+SRCS += device_if.h bus_if.h pci_if.h ntb_if.h
.include <bsd.kmod.mk>
diff --git a/sys/modules/ntb/ntb_transport/Makefile b/sys/modules/ntb/ntb_transport/Makefile
new file mode 100644
index 0000000..2a8fcbc
--- /dev/null
+++ b/sys/modules/ntb/ntb_transport/Makefile
@@ -0,0 +1,9 @@
+# $FreeBSD$
+
+.PATH: ${.CURDIR}/../../../dev/ntb
+
+KMOD = ntb_transport
+SRCS = ntb_transport.c
+SRCS += device_if.h bus_if.h ntb_if.h
+
+.include <bsd.kmod.mk>
diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c
index 8109479..3b723c1 100644
--- a/sys/net/if_bridge.c
+++ b/sys/net/if_bridge.c
@@ -166,7 +166,8 @@ __FBSDID("$FreeBSD$");
/*
* List of capabilities to possibly mask on the member interface.
*/
-#define BRIDGE_IFCAPS_MASK (IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM)
+#define BRIDGE_IFCAPS_MASK (IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM|\
+ IFCAP_TXCSUM_IPV6)
/*
* List of capabilities to strip
diff --git a/sys/net/if_epair.c b/sys/net/if_epair.c
index 86e8379..6ac65c6 100644
--- a/sys/net/if_epair.c
+++ b/sys/net/if_epair.c
@@ -807,9 +807,9 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
* cache locality but we can at least allow parallelism.
*/
sca->cpuid =
- netisr_get_cpuid(sca->ifp->if_index % netisr_get_cpucount());
+ netisr_get_cpuid(sca->ifp->if_index);
scb->cpuid =
- netisr_get_cpuid(scb->ifp->if_index % netisr_get_cpucount());
+ netisr_get_cpuid(scb->ifp->if_index);
/* Initialise pseudo media types. */
ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status);
diff --git a/sys/net/ifdi_if.m b/sys/net/ifdi_if.m
index 60629e4..301a6ee 100644
--- a/sys/net/ifdi_if.m
+++ b/sys/net/ifdi_if.m
@@ -60,9 +60,10 @@ CODE {
return (0);
}
- static void
+ static int
null_queue_intr_enable(if_ctx_t _ctx __unused, uint16_t _qid __unused)
{
+ return (ENOTSUP);
}
static void
@@ -194,7 +195,7 @@ METHOD void intr_disable {
if_ctx_t _ctx;
};
-METHOD void queue_intr_enable {
+METHOD int queue_intr_enable {
if_ctx_t _ctx;
uint16_t _qid;
} DEFAULT null_queue_intr_enable;
diff --git a/sys/net/iflib.c b/sys/net/iflib.c
index c11da16..0d54f2c 100644
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/taskqueue.h>
+#include <sys/limits.h>
#include <net/if.h>
@@ -101,7 +102,6 @@ __FBSDID("$FreeBSD$");
* Enable mbuf vectors for compressing long mbuf chains
*/
-
/*
* NB:
* - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead
@@ -181,8 +181,10 @@ struct iflib_ctx {
struct sysctl_oid *ifc_sysctl_node;
uint16_t ifc_sysctl_ntxqs;
uint16_t ifc_sysctl_nrxqs;
- uint16_t ifc_sysctl_ntxds;
- uint16_t ifc_sysctl_nrxds;
+ uint16_t ifc_sysctl_qs_eq_override;
+
+ uint16_t ifc_sysctl_ntxds[8];
+ uint16_t ifc_sysctl_nrxds[8];
struct if_txrx ifc_txrx;
#define isc_txd_encap ifc_txrx.ift_txd_encap
#define isc_txd_flush ifc_txrx.ift_txd_flush
@@ -294,10 +296,11 @@ typedef struct iflib_sw_tx_desc_array {
#define IFLIB_RESTART_BUDGET 8
-#define IFC_LEGACY 0x1
-#define IFC_QFLUSH 0x2
-#define IFC_MULTISEG 0x4
-#define IFC_DMAR 0x8
+#define IFC_LEGACY 0x01
+#define IFC_QFLUSH 0x02
+#define IFC_MULTISEG 0x04
+#define IFC_DMAR 0x08
+#define IFC_SC_ALLOCATED 0x10
#define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \
CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \
@@ -311,6 +314,7 @@ struct iflib_txq {
uint8_t ift_db_pending;
uint8_t ift_db_pending_queued;
uint8_t ift_npending;
+ uint8_t ift_br_offset;
/* implicit pad */
uint64_t ift_processed;
uint64_t ift_cleaned;
@@ -414,6 +418,7 @@ struct iflib_rxq {
uint16_t ifr_cq_cidx;
uint16_t ifr_cq_pidx;
uint8_t ifr_cq_gen;
+ uint8_t ifr_fl_offset;
if_ctx_t ifr_ctx;
iflib_fl_t ifr_fl;
@@ -604,7 +609,7 @@ static void iflib_tx_structures_free(if_ctx_t ctx);
static void iflib_rx_structures_free(if_ctx_t ctx);
static int iflib_queues_alloc(if_ctx_t ctx);
static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq);
-static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx);
+static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget);
static int iflib_qset_structures_setup(if_ctx_t ctx);
static int iflib_msix_init(if_ctx_t ctx);
static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, char *str);
@@ -875,7 +880,7 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags)
for (fl = rxq->ifr_fl, i = 0; i < rxq->ifr_nfl; i++, fl++) {
nic_i = fl->ifl_cidx;
nm_i = netmap_idx_n2k(kring, nic_i);
- avail = ctx->isc_rxd_available(ctx->ifc_softc, kring->ring_id, nic_i);
+ avail = ctx->isc_rxd_available(ctx->ifc_softc, kring->ring_id, nic_i, INT_MAX);
for (n = 0; avail > 0; n++, avail--) {
error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
if (error)
@@ -930,7 +935,7 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags)
/*
* XXX we should be batching this operation - TODO
*/
- ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i, &paddr, &vaddr, 1);
+ ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i, &paddr, &vaddr, 1, fl->ifl_buf_size);
bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds[nic_i].ifsd_map,
BUS_DMASYNC_PREREAD);
nm_i = nm_next(nm_i, lim);
@@ -958,6 +963,7 @@ static int
iflib_netmap_attach(if_ctx_t ctx)
{
struct netmap_adapter na;
+ if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
bzero(&na, sizeof(na));
@@ -966,8 +972,8 @@ iflib_netmap_attach(if_ctx_t ctx)
MPASS(ctx->ifc_softc_ctx.isc_ntxqsets);
MPASS(ctx->ifc_softc_ctx.isc_nrxqsets);
- na.num_tx_desc = ctx->ifc_sctx->isc_ntxd;
- na.num_rx_desc = ctx->ifc_sctx->isc_ntxd;
+ na.num_tx_desc = scctx->isc_ntxd[0];
+ na.num_rx_desc = scctx->isc_nrxd[0];
na.nm_txsync = iflib_netmap_txsync;
na.nm_rxsync = iflib_netmap_rxsync;
na.nm_register = iflib_netmap_register;
@@ -986,7 +992,7 @@ iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq)
if (slot == 0)
return;
- for (int i = 0; i < ctx->ifc_sctx->isc_ntxd; i++) {
+ for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) {
/*
* In netmap mode, set the map for the packet buffer.
@@ -1011,7 +1017,7 @@ iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq)
if (slot == 0)
return;
sd = rxq->ifr_fl[0].ifl_sds;
- nrxd = ctx->ifc_sctx->isc_nrxd;
+ nrxd = ctx->ifc_softc_ctx.isc_nrxd[0];
for (int i = 0; i < nrxd; i++, sd++) {
int sj = netmap_idx_n2k(&na->rx_rings[rxq->ifr_id], i);
uint64_t paddr;
@@ -1021,7 +1027,7 @@ iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq)
vaddr = addr = PNMB(na, slot + sj, &paddr);
netmap_load_map(na, rxq->ifr_fl[0].ifl_ifdi->idi_tag, sd->ifsd_map, addr);
/* Update descriptor and the cached value */
- ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, i, &paddr, &vaddr, 1);
+ ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, i, &paddr, &vaddr, 1, rxq->ifr_fl[0].ifl_buf_size);
}
/* preserve queue */
if (ctx->ifc_ifp->if_capenable & IFCAP_NETMAP) {
@@ -1236,7 +1242,8 @@ iflib_txsd_alloc(iflib_txq_t txq)
nsegments = scctx->isc_tx_nsegments;
ntsosegments = scctx->isc_tx_tso_segments_max;
- MPASS(sctx->isc_ntxd > 0);
+ MPASS(scctx->isc_ntxd[0] > 0);
+ MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0);
MPASS(nsegments > 0);
MPASS(ntsosegments > 0);
/*
@@ -1259,13 +1266,11 @@ iflib_txsd_alloc(iflib_txq_t txq)
sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize);
goto fail;
}
-#ifdef INVARIANTS
+#ifdef IFLIB_DIAGNOSTICS
device_printf(dev,"maxsize: %zd nsegments: %d maxsegsize: %zd\n",
sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize);
+
#endif
- device_printf(dev,"TSO maxsize: %d ntsosegments: %d maxsegsize: %d\n",
- scctx->isc_tx_tso_size_max, ntsosegments,
- scctx->isc_tx_tso_segsize_max);
if ((err = bus_dma_tag_create(bus_get_dma_tag(dev),
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
@@ -1282,21 +1287,21 @@ iflib_txsd_alloc(iflib_txq_t txq)
goto fail;
}
-#ifdef INVARIANTS
+#ifdef IFLIB_DIAGNOSTICS
device_printf(dev,"TSO maxsize: %d ntsosegments: %d maxsegsize: %d\n",
scctx->isc_tx_tso_size_max, ntsosegments,
scctx->isc_tx_tso_segsize_max);
#endif
if (!(txq->ift_sds.ifsd_flags =
(uint8_t *) malloc(sizeof(uint8_t) *
- sctx->isc_ntxd, M_IFLIB, M_NOWAIT | M_ZERO))) {
+ scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
device_printf(dev, "Unable to allocate tx_buffer memory\n");
err = ENOMEM;
goto fail;
}
if (!(txq->ift_sds.ifsd_m =
(struct mbuf **) malloc(sizeof(struct mbuf *) *
- sctx->isc_ntxd, M_IFLIB, M_NOWAIT | M_ZERO))) {
+ scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
device_printf(dev, "Unable to allocate tx_buffer memory\n");
err = ENOMEM;
goto fail;
@@ -1308,13 +1313,13 @@ iflib_txsd_alloc(iflib_txq_t txq)
return (0);
if (!(txq->ift_sds.ifsd_map =
- (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * sctx->isc_ntxd, M_IFLIB, M_NOWAIT | M_ZERO))) {
+ (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
device_printf(dev, "Unable to allocate tx_buffer map memory\n");
err = ENOMEM;
goto fail;
}
- for (int i = 0; i < sctx->isc_ntxd; i++) {
+ for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) {
err = bus_dmamap_create(txq->ift_desc_tag, 0, &txq->ift_sds.ifsd_map[i]);
if (err != 0) {
device_printf(dev, "Unable to create TX DMA map\n");
@@ -1348,9 +1353,8 @@ static void
iflib_txq_destroy(iflib_txq_t txq)
{
if_ctx_t ctx = txq->ift_ctx;
- if_shared_ctx_t sctx = ctx->ifc_sctx;
- for (int i = 0; i < sctx->isc_ntxd; i++)
+ for (int i = 0; i < txq->ift_size; i++)
iflib_txsd_destroy(ctx, txq, i);
if (txq->ift_sds.ifsd_map != NULL) {
free(txq->ift_sds.ifsd_map, M_IFLIB);
@@ -1390,7 +1394,7 @@ iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i)
bus_dmamap_unload(txq->ift_desc_tag,
txq->ift_sds.ifsd_map[i]);
}
- m_freem(*mp);
+ m_free(*mp);
DBG_COUNTER_INC(tx_frees);
*mp = NULL;
}
@@ -1399,7 +1403,7 @@ static int
iflib_txq_setup(iflib_txq_t txq)
{
if_ctx_t ctx = txq->ift_ctx;
- if_shared_ctx_t sctx = ctx->ifc_sctx;
+ if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
iflib_dma_info_t di;
int i;
@@ -1408,7 +1412,7 @@ iflib_txq_setup(iflib_txq_t txq)
/* Reset indices */
txq->ift_cidx_processed = txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0;
- txq->ift_size = sctx->isc_ntxd;
+ txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset];
for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++)
bzero((void *)di->idi_vaddr, di->idi_size);
@@ -1433,22 +1437,25 @@ iflib_rxsd_alloc(iflib_rxq_t rxq)
{
if_ctx_t ctx = rxq->ifr_ctx;
if_shared_ctx_t sctx = ctx->ifc_sctx;
+ if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
device_t dev = ctx->ifc_dev;
iflib_fl_t fl;
iflib_rxsd_t rxsd;
int err;
- MPASS(sctx->isc_nrxd > 0);
+ MPASS(scctx->isc_nrxd[0] > 0);
+ MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0);
fl = rxq->ifr_fl;
for (int i = 0; i < rxq->ifr_nfl; i++, fl++) {
fl->ifl_sds = malloc(sizeof(struct iflib_sw_rx_desc) *
- sctx->isc_nrxd, M_IFLIB, M_WAITOK | M_ZERO);
+ scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB,
+ M_WAITOK | M_ZERO);
if (fl->ifl_sds == NULL) {
device_printf(dev, "Unable to allocate rx sw desc memory\n");
return (ENOMEM);
}
- fl->ifl_size = sctx->isc_nrxd; /* this isn't necessarily the same */
+ fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */
err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1, 0, /* alignment, bounds */
BUS_SPACE_MAXADDR, /* lowaddr */
@@ -1468,7 +1475,7 @@ iflib_rxsd_alloc(iflib_rxq_t rxq)
}
rxsd = fl->ifl_sds;
- for (int i = 0; i < sctx->isc_nrxd; i++, rxsd++) {
+ for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++, rxsd++) {
err = bus_dmamap_create(fl->ifl_desc_tag, 0, &rxsd->ifsd_map);
if (err) {
device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
@@ -1626,7 +1633,7 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
}
if (n == 0 || i == IFLIB_MAX_RX_REFRESH) {
ctx->isc_rxd_refill(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx,
- fl->ifl_bus_addrs, fl->ifl_vm_addrs, i);
+ fl->ifl_bus_addrs, fl->ifl_vm_addrs, i, fl->ifl_buf_size);
i = 0;
pidx = fl->ifl_pidx;
}
@@ -1854,7 +1861,11 @@ iflib_init_locked(if_ctx_t ctx)
for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
iflib_netmap_rxq_init(ctx, rxq);
}
+#ifdef INVARIANTS
+ i = if_getdrvflags(ifp);
+#endif
IFDI_INIT(ctx);
+ MPASS(if_getdrvflags(ifp) == i);
for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
if (iflib_fl_setup(fl)) {
@@ -1902,7 +1913,6 @@ iflib_stop(if_ctx_t ctx)
iflib_txq_t txq = ctx->ifc_txqs;
iflib_rxq_t rxq = ctx->ifc_rxqs;
if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
- if_shared_ctx_t sctx = ctx->ifc_sctx;
iflib_dma_info_t di;
iflib_fl_t fl;
int i, j;
@@ -1920,7 +1930,7 @@ iflib_stop(if_ctx_t ctx)
/* clean any enqueued buffers */
iflib_txq_check_drain(txq, 0);
/* Free any existing tx buffers. */
- for (j = 0; j < sctx->isc_ntxd; j++) {
+ for (j = 0; j < txq->ift_size; j++) {
iflib_txsd_free(ctx, txq, j);
}
txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0;
@@ -1990,13 +2000,24 @@ assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri)
caddr_t cl;
i = 0;
+ mh = NULL;
do {
sd = rxd_frag_to_sd(rxq, &ri->iri_frags[i], &cltype, TRUE);
MPASS(sd->ifsd_cl != NULL);
MPASS(sd->ifsd_m != NULL);
+
+ /* Don't include zero-length frags */
+ if (ri->iri_frags[i].irf_len == 0) {
+ /* XXX we can save the cluster here, but not the mbuf */
+ m_init(sd->ifsd_m, M_NOWAIT, MT_DATA, 0);
+ m_free(sd->ifsd_m);
+ sd->ifsd_m = NULL;
+ continue;
+ }
+
m = sd->ifsd_m;
- if (i == 0) {
+ if (mh == NULL) {
flags = M_PKTHDR|M_EXT;
mh = mt = m;
padlen = ri->iri_pad;
@@ -2019,14 +2040,12 @@ assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri)
*/
m->m_data += padlen;
ri->iri_len -= padlen;
- m->m_len = ri->iri_len;
+ m->m_len = ri->iri_frags[i].irf_len;
} while (++i < ri->iri_nfrags);
return (mh);
}
-
-
/*
* Process one software descriptor
*/
@@ -2037,13 +2056,14 @@ iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri)
iflib_rxsd_t sd;
/* should I merge this back in now that the two paths are basically duplicated? */
- if (ri->iri_len <= IFLIB_RX_COPY_THRESH) {
+ if (ri->iri_nfrags == 1 &&
+ ri->iri_frags[0].irf_len <= IFLIB_RX_COPY_THRESH) {
sd = rxd_frag_to_sd(rxq, &ri->iri_frags[0], NULL, FALSE);
m = sd->ifsd_m;
sd->ifsd_m = NULL;
m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR);
memcpy(m->m_data, sd->ifsd_cl, ri->iri_len);
- m->m_len = ri->iri_len;
+ m->m_len = ri->iri_frags[0].irf_len;
} else {
m = assemble_segments(rxq, ri);
}
@@ -2063,13 +2083,13 @@ iflib_rxeof(iflib_rxq_t rxq, int budget)
{
if_ctx_t ctx = rxq->ifr_ctx;
if_shared_ctx_t sctx = ctx->ifc_sctx;
+ if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
int avail, i;
uint16_t *cidxp;
struct if_rxd_info ri;
int err, budget_left, rx_bytes, rx_pkts;
iflib_fl_t fl;
struct ifnet *ifp;
- struct lro_entry *queued;
int lro_enabled;
/*
* XXX early demux data packets so that if_input processing only handles
@@ -2084,11 +2104,11 @@ iflib_rxeof(iflib_rxq_t rxq, int budget)
mh = mt = NULL;
MPASS(budget > 0);
rx_pkts = rx_bytes = 0;
- if (sctx->isc_flags & IFLIB_HAS_CQ)
+ if (sctx->isc_flags & IFLIB_HAS_RXCQ)
cidxp = &rxq->ifr_cq_cidx;
else
cidxp = &rxq->ifr_fl[0].ifl_cidx;
- if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp)) == 0) {
+ if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) {
for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++)
__iflib_fl_refill_lt(ctx, fl, budget + 8);
DBG_COUNTER_INC(rx_unavail);
@@ -2112,10 +2132,11 @@ iflib_rxeof(iflib_rxq_t rxq, int budget)
/* in lieu of handling correctly - make sure it isn't being unhandled */
MPASS(err == 0);
- if (sctx->isc_flags & IFLIB_HAS_CQ) {
- /* we know we consumed _one_ CQ entry */
- if (++rxq->ifr_cq_cidx == sctx->isc_nrxd) {
- rxq->ifr_cq_cidx = 0;
+ if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
+ *cidxp = ri.iri_cidx;
+ /* Update our consumer index */
+ while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) {
+ rxq->ifr_cq_cidx -= scctx->isc_nrxd[0];
rxq->ifr_cq_gen = 0;
}
/* was this only a completion queue message? */
@@ -2128,7 +2149,7 @@ iflib_rxeof(iflib_rxq_t rxq, int budget)
/* will advance the cidx on the corresponding free lists */
m = iflib_rxd_pkt_get(rxq, &ri);
if (avail == 0 && budget_left)
- avail = iflib_rxd_avail(ctx, rxq, *cidxp);
+ avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left);
if (__predict_false(m == NULL)) {
DBG_COUNTER_INC(rx_mbuf_null);
@@ -2148,7 +2169,6 @@ iflib_rxeof(iflib_rxq_t rxq, int budget)
ifp = ctx->ifc_ifp;
lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO);
-
while (mh != NULL) {
m = mh;
mh = mh->m_nextpkt;
@@ -2162,32 +2182,33 @@ iflib_rxeof(iflib_rxq_t rxq, int budget)
DBG_COUNTER_INC(rx_if_input);
ifp->if_input(ifp, m);
}
+
if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes);
if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts);
/*
* Flush any outstanding LRO work
*/
- while ((queued = LIST_FIRST(&rxq->ifr_lc.lro_active)) != NULL) {
- LIST_REMOVE(queued, next);
#if defined(INET6) || defined(INET)
- tcp_lro_flush(&rxq->ifr_lc, queued);
+ tcp_lro_flush_all(&rxq->ifr_lc);
#endif
- }
- return (iflib_rxd_avail(ctx, rxq, *cidxp));
+ if (avail)
+ return true;
+ return (iflib_rxd_avail(ctx, rxq, *cidxp, 1));
}
#define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags)
#define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG)
-#define TXQ_MAX_DB_DEFERRED(ctx) (ctx->ifc_sctx->isc_ntxd >> 5)
-#define TXQ_MAX_DB_CONSUMED(ctx) (ctx->ifc_sctx->isc_ntxd >> 4)
+#define TXQ_MAX_DB_DEFERRED(size) (size >> 5)
+#define TXQ_MAX_DB_CONSUMED(size) (size >> 4)
static __inline void
iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring)
{
uint32_t dbval;
- if (ring || txq->ift_db_pending >= TXQ_MAX_DB_DEFERRED(ctx)) {
+ if (ring || txq->ift_db_pending >=
+ TXQ_MAX_DB_DEFERRED(txq->ift_size)) {
/* the lock will only ever be contended in the !min_latency case */
if (!TXDB_TRYLOCK(txq))
@@ -2233,9 +2254,9 @@ static int
iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
{
struct ether_vlan_header *eh;
- struct mbuf *m;
+ struct mbuf *m, *n;
- m = *mp;
+ n = m = *mp;
/*
* Determine where frame payload starts.
* Jump over vlan headers if already present,
@@ -2261,7 +2282,6 @@ iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
{
struct ip *ip = NULL;
struct tcphdr *th = NULL;
- struct mbuf *n;
int minthlen;
minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th));
@@ -2403,37 +2423,31 @@ collapse_pkthdr(struct mbuf *m0)
/*
* If dodgy hardware rejects the scatter gather chain we've handed it
- * we'll need to rebuild the mbuf chain before we can call m_defrag
+ * we'll need to remove the mbuf chain from ifsg_m[] before we can add the
+ * m_defrag'd mbufs
*/
static __noinline struct mbuf *
-iflib_rebuild_mbuf(iflib_txq_t txq)
+iflib_remove_mbuf(iflib_txq_t txq)
{
-
- int ntxd, mhlen, len, i, pidx;
+ int ntxd, i, pidx;
struct mbuf *m, *mh, **ifsd_m;
- if_shared_ctx_t sctx;
pidx = txq->ift_pidx;
ifsd_m = txq->ift_sds.ifsd_m;
- sctx = txq->ift_ctx->ifc_sctx;
- ntxd = sctx->isc_ntxd;
+ ntxd = txq->ift_size;
mh = m = ifsd_m[pidx];
ifsd_m[pidx] = NULL;
#if MEMORY_LOGGING
txq->ift_dequeued++;
#endif
- len = m->m_len;
- mhlen = m->m_pkthdr.len;
i = 1;
- while (len < mhlen && (m->m_next == NULL)) {
- m->m_next = ifsd_m[(pidx + i) & (ntxd-1)];
+ while (m) {
ifsd_m[(pidx + i) & (ntxd -1)] = NULL;
#if MEMORY_LOGGING
txq->ift_dequeued++;
#endif
m = m->m_next;
- len += m->m_len;
i++;
}
return (mh);
@@ -2446,6 +2460,7 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map,
{
if_ctx_t ctx;
if_shared_ctx_t sctx;
+ if_softc_ctx_t scctx;
int i, next, pidx, mask, err, maxsegsz, ntxd, count;
struct mbuf *m, *tmp, **ifsd_m, **mp;
@@ -2459,8 +2474,9 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map,
ctx = txq->ift_ctx;
sctx = ctx->ifc_sctx;
+ scctx = &ctx->ifc_softc_ctx;
ifsd_m = txq->ift_sds.ifsd_m;
- ntxd = sctx->isc_ntxd;
+ ntxd = txq->ift_size;
pidx = txq->ift_pidx;
if (map != NULL) {
uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags;
@@ -2472,13 +2488,12 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map,
ifsd_flags[pidx] |= TX_SW_DESC_MAPPED;
i = 0;
next = pidx;
- mask = (sctx->isc_ntxd-1);
+ mask = (txq->ift_size-1);
m = *m0;
do {
mp = &ifsd_m[next];
*mp = m;
m = m->m_next;
- (*mp)->m_next = NULL;
if (__predict_false((*mp)->m_len == 0)) {
m_free(*mp);
*mp = NULL;
@@ -2529,13 +2544,12 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map,
count++;
tmp = m;
m = m->m_next;
- tmp->m_next = NULL;
} while (m != NULL);
*nsegs = i;
}
return (0);
err:
- *m0 = iflib_rebuild_mbuf(txq);
+ *m0 = iflib_remove_mbuf(txq);
return (EFBIG);
}
@@ -2558,7 +2572,7 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp)
sctx = ctx->ifc_sctx;
scctx = &ctx->ifc_softc_ctx;
segs = txq->ift_segs;
- ntxd = sctx->isc_ntxd;
+ ntxd = txq->ift_size;
m_head = *m_headp;
map = NULL;
@@ -2645,14 +2659,14 @@ defrag:
if (map != NULL)
bus_dmamap_unload(desc_tag, map);
DBG_COUNTER_INC(encap_txq_avail_fail);
- if (txq->ift_task.gt_task.ta_pending == 0)
+ if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0)
GROUPTASK_ENQUEUE(&txq->ift_task);
return (ENOBUFS);
}
pi.ipi_segs = segs;
pi.ipi_nsegs = nsegs;
- MPASS(pidx >= 0 && pidx < sctx->isc_ntxd);
+ MPASS(pidx >= 0 && pidx < txq->ift_size);
#ifdef PKT_DEBUG
print_pkt(&pi);
#endif
@@ -2661,11 +2675,12 @@ defrag:
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
DBG_COUNTER_INC(tx_encap);
- MPASS(pi.ipi_new_pidx >= 0 && pi.ipi_new_pidx < sctx->isc_ntxd);
+ MPASS(pi.ipi_new_pidx >= 0 &&
+ pi.ipi_new_pidx < txq->ift_size);
ndesc = pi.ipi_new_pidx - pi.ipi_pidx;
if (pi.ipi_new_pidx < pi.ipi_pidx) {
- ndesc += sctx->isc_ntxd;
+ ndesc += txq->ift_size;
txq->ift_gen = 1;
}
MPASS(pi.ipi_new_pidx != pidx);
@@ -2678,7 +2693,7 @@ defrag:
txq->ift_pidx = pi.ipi_new_pidx;
txq->ift_npending += pi.ipi_ndescs;
} else if (__predict_false(err == EFBIG && remap < 2)) {
- *m_headp = m_head = iflib_rebuild_mbuf(txq);
+ *m_headp = m_head = iflib_remove_mbuf(txq);
remap = 1;
txq->ift_txd_encap_efbig++;
goto defrag;
@@ -2700,7 +2715,7 @@ defrag_failed:
#define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets)
#define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets)
-#define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NRXQSETS(ctx)) + FIRST_QSET(ctx))
+#define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx))
#define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments))
#define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh)
#define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max)
@@ -2712,7 +2727,7 @@ defrag_failed:
*
* ORing with 2 assures that min occupancy is never less than 2 without any conditional logic
*/
-#define TXQ_MIN_OCCUPANCY(ctx) ((ctx->ifc_sctx->isc_ntxd >> 6)| 0x2)
+#define TXQ_MIN_OCCUPANCY(size) ((size >> 6)| 0x2)
static inline int
iflib_txq_min_occupancy(iflib_txq_t txq)
@@ -2720,7 +2735,9 @@ iflib_txq_min_occupancy(iflib_txq_t txq)
if_ctx_t ctx;
ctx = txq->ift_ctx;
- return (get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen) < TXQ_MIN_OCCUPANCY(ctx) + MAX_TX_DESC(ctx));
+ return (get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx,
+ txq->ift_gen) < TXQ_MIN_OCCUPANCY(txq->ift_size) +
+ MAX_TX_DESC(ctx));
}
static void
@@ -2734,7 +2751,7 @@ iflib_tx_desc_free(iflib_txq_t txq, int n)
cidx = txq->ift_cidx;
gen = txq->ift_gen;
- qsize = txq->ift_ctx->ifc_sctx->isc_ntxd;
+ qsize = txq->ift_size;
mask = qsize-1;
hasmap = txq->ift_sds.ifsd_map != NULL;
ifsd_flags = txq->ift_sds.ifsd_flags;
@@ -2760,7 +2777,7 @@ iflib_tx_desc_free(iflib_txq_t txq, int n)
/* XXX we don't support any drivers that batch packets yet */
MPASS(m->m_nextpkt == NULL);
- m_freem(m);
+ m_free(m);
ifsd_m[cidx] = NULL;
#if MEMORY_LOGGING
txq->ift_dequeued++;
@@ -2856,7 +2873,7 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) {
DBG_COUNTER_INC(txq_drain_flushing);
for (i = 0; i < avail; i++) {
- m_freem(r->items[(cidx + i) & (r->size-1)]);
+ m_free(r->items[(cidx + i) & (r->size-1)]);
r->items[(cidx + i) & (r->size-1)] = NULL;
}
return (avail);
@@ -2903,7 +2920,7 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
break;
- if (desc_used > TXQ_MAX_DB_CONSUMED(ctx))
+ if (desc_used > TXQ_MAX_DB_CONSUMED(txq->ift_size))
break;
}
@@ -2924,7 +2941,7 @@ iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
}
static void
-_task_fn_tx(void *context, int pending)
+_task_fn_tx(void *context)
{
iflib_txq_t txq = context;
if_ctx_t ctx = txq->ift_ctx;
@@ -2935,11 +2952,12 @@ _task_fn_tx(void *context, int pending)
}
static void
-_task_fn_rx(void *context, int pending)
+_task_fn_rx(void *context)
{
iflib_rxq_t rxq = context;
if_ctx_t ctx = rxq->ifr_ctx;
bool more;
+ int rc;
DBG_COUNTER_INC(task_fn_rxs);
if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
@@ -2950,7 +2968,8 @@ _task_fn_rx(void *context, int pending)
IFDI_INTR_ENABLE(ctx);
else {
DBG_COUNTER_INC(rx_intr_enables);
- IFDI_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
+ rc = IFDI_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
+ KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver"));
}
}
if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
@@ -2960,7 +2979,7 @@ _task_fn_rx(void *context, int pending)
}
static void
-_task_fn_admin(void *context, int pending)
+_task_fn_admin(void *context)
{
if_ctx_t ctx = context;
if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
@@ -2990,7 +3009,7 @@ _task_fn_admin(void *context, int pending)
static void
-_task_fn_iov(void *context, int pending)
+_task_fn_iov(void *context)
{
if_ctx_t ctx = context;
@@ -3049,8 +3068,7 @@ iflib_if_transmit(if_t ifp, struct mbuf *m)
if_ctx_t ctx = if_getsoftc(ifp);
iflib_txq_t txq;
- struct mbuf *marr[8], **mp, *next;
- int err, i, count, qidx;
+ int err, qidx;
if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) {
DBG_COUNTER_INC(tx_frees);
@@ -3058,6 +3076,7 @@ iflib_if_transmit(if_t ifp, struct mbuf *m)
return (0);
}
+ MPASS(m->m_nextpkt == NULL);
qidx = 0;
if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m))
qidx = QIDX(ctx, m);
@@ -3077,6 +3096,7 @@ iflib_if_transmit(if_t ifp, struct mbuf *m)
return (ENOBUFS);
}
#endif
+#ifdef notyet
qidx = count = 0;
mp = marr;
next = m;
@@ -3098,22 +3118,21 @@ iflib_if_transmit(if_t ifp, struct mbuf *m)
next = next->m_nextpkt;
mp[i]->m_nextpkt = NULL;
}
+#endif
DBG_COUNTER_INC(tx_seen);
- err = ifmp_ring_enqueue(txq->ift_br[0], (void **)mp, count, TX_BATCH_SIZE);
+ err = ifmp_ring_enqueue(txq->ift_br[0], (void **)&m, 1, TX_BATCH_SIZE);
- if (iflib_txq_can_drain(txq->ift_br[0]))
- GROUPTASK_ENQUEUE(&txq->ift_task);
if (err) {
+ GROUPTASK_ENQUEUE(&txq->ift_task);
/* support forthcoming later */
#ifdef DRIVER_BACKPRESSURE
txq->ift_closed = TRUE;
#endif
- for (i = 0; i < count; i++)
- m_freem(mp[i]);
ifmp_ring_check_drainage(txq->ift_br[0], TX_BATCH_SIZE);
+ m_freem(m);
+ } else if (TXQ_AVAIL(txq) < (txq->ift_size >> 1)) {
+ GROUPTASK_ENQUEUE(&txq->ift_task);
}
- if (count > nitems(marr))
- free(mp, M_IFLIB);
return (err);
}
@@ -3138,13 +3157,13 @@ iflib_if_qflush(if_t ifp)
if_qflush(ifp);
}
-#define IFCAP_REINIT (IFCAP_HWCSUM|IFCAP_TSO4|IFCAP_TSO6|IFCAP_VLAN_HWTAGGING|IFCAP_VLAN_MTU | \
- IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO)
-#define IFCAP_FLAGS (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \
+#define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \
IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | \
IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO)
+#define IFCAP_REINIT IFCAP_FLAGS
+
static int
iflib_if_ioctl(if_t ifp, u_long command, caddr_t data)
{
@@ -3428,6 +3447,9 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
if_ctx_t ctx;
if_t ifp;
if_softc_ctx_t scctx;
+ int i;
+ uint16_t main_txq;
+ uint16_t main_rxq;
ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO);
@@ -3435,6 +3457,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
if (sc == NULL) {
sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO);
device_set_softc(dev, ctx);
+ ctx->ifc_flags |= IFC_SC_ALLOCATED;
}
ctx->ifc_sctx = sctx;
@@ -3447,28 +3470,112 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
return (err);
}
iflib_add_device_sysctl_pre(ctx);
+
+ scctx = &ctx->ifc_softc_ctx;
+ /*
+ * XXX sanity check that ntxd & nrxd are a power of 2
+ */
+ if (ctx->ifc_sysctl_ntxqs != 0)
+ scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs;
+ if (ctx->ifc_sysctl_nrxqs != 0)
+ scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs;
+
+ for (i = 0; i < sctx->isc_ntxqs; i++) {
+ if (ctx->ifc_sysctl_ntxds[i] != 0)
+ scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i];
+ else
+ scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i];
+ }
+
+ for (i = 0; i < sctx->isc_nrxqs; i++) {
+ if (ctx->ifc_sysctl_nrxds[i] != 0)
+ scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i];
+ else
+ scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i];
+ }
+
+ for (i = 0; i < sctx->isc_nrxqs; i++) {
+ if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) {
+ device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n",
+ i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]);
+ scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i];
+ }
+ if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) {
+ device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n",
+ i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]);
+ scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i];
+ }
+ }
+
+ for (i = 0; i < sctx->isc_ntxqs; i++) {
+ if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) {
+ device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n",
+ i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]);
+ scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i];
+ }
+ if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) {
+ device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n",
+ i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]);
+ scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i];
+ }
+ }
+
if ((err = IFDI_ATTACH_PRE(ctx)) != 0) {
device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err);
return (err);
}
+ if (scctx->isc_ntxqsets_max)
+ scctx->isc_ntxqsets = min(scctx->isc_ntxqsets, scctx->isc_ntxqsets_max);
+ if (scctx->isc_nrxqsets_max)
+ scctx->isc_nrxqsets = min(scctx->isc_nrxqsets, scctx->isc_nrxqsets_max);
+
#ifdef ACPI_DMAR
if (dmar_get_dma_tag(device_get_parent(dev), dev) != NULL)
ctx->ifc_flags |= IFC_DMAR;
#endif
- scctx = &ctx->ifc_softc_ctx;
msix_bar = scctx->isc_msix_bar;
- if (scctx->isc_tx_nsegments > sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION)
- scctx->isc_tx_nsegments = max(1, sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION);
- if (scctx->isc_tx_tso_segments_max > sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION)
- scctx->isc_tx_tso_segments_max = max(1, sctx->isc_ntxd / MAX_SINGLE_PACKET_FRACTION);
-
ifp = ctx->ifc_ifp;
- /*
- * XXX sanity check that ntxd & nrxd are a power of 2
- */
+ if(sctx->isc_flags & IFLIB_HAS_TXCQ)
+ main_txq = 1;
+ else
+ main_txq = 0;
+
+ if(sctx->isc_flags & IFLIB_HAS_RXCQ)
+ main_rxq = 1;
+ else
+ main_rxq = 0;
+
+ /* XXX change for per-queue sizes */
+ device_printf(dev, "using %d tx descriptors and %d rx descriptors\n",
+ scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]);
+ for (i = 0; i < sctx->isc_nrxqs; i++) {
+ if (!powerof2(scctx->isc_nrxd[i])) {
+ /* round down instead? */
+ device_printf(dev, "# rx descriptors must be a power of 2\n");
+ err = EINVAL;
+ goto fail;
+ }
+ }
+ for (i = 0; i < sctx->isc_ntxqs; i++) {
+ if (!powerof2(scctx->isc_ntxd[i])) {
+ device_printf(dev,
+ "# tx descriptors must be a power of 2");
+ err = EINVAL;
+ goto fail;
+ }
+ }
+
+ if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] /
+ MAX_SINGLE_PACKET_FRACTION)
+ scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] /
+ MAX_SINGLE_PACKET_FRACTION);
+ if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] /
+ MAX_SINGLE_PACKET_FRACTION)
+ scctx->isc_tx_tso_segments_max = max(1,
+ scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION);
/*
* Protect the stack against modern hardware
@@ -3482,7 +3589,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
ifp->if_hw_tsomaxsegsize = scctx->isc_tx_tso_segsize_max;
if (scctx->isc_rss_table_size == 0)
scctx->isc_rss_table_size = 64;
- scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1;;
+ scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1;
/*
** Now setup MSI or MSI/X, should
** return us the number of supported
@@ -3520,7 +3627,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
MPASS(msix == 1);
rid = 1;
}
- if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx, &rid, "irq0")) != 0) {
+ if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) {
device_printf(dev, "iflib_legacy_setup failed %d\n", err);
goto fail_intr_free;
}
@@ -3536,6 +3643,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
}
*ctxp = ctx;
+ if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter);
iflib_add_device_sysctl_post(ctx);
return (0);
fail_detach:
@@ -3599,7 +3707,7 @@ iflib_device_deregister(if_ctx_t ctx)
led_destroy(ctx->ifc_led_dev);
/* XXX drain any dependent tasks */
tqg = qgroup_if_io_tqg;
- for (txq = ctx->ifc_txqs, i = 0, rxq = ctx->ifc_rxqs; i < NTXQSETS(ctx); i++, txq++) {
+ for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) {
callout_drain(&txq->ift_timer);
callout_drain(&txq->ift_db_check);
if (txq->ift_task.gt_uniq != NULL)
@@ -3616,6 +3724,7 @@ iflib_device_deregister(if_ctx_t ctx)
taskqgroup_detach(tqg, &ctx->ifc_vflr_task);
IFDI_DETACH(ctx);
+ device_set_softc(ctx->ifc_dev, NULL);
if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) {
pci_release_msi(dev);
}
@@ -3633,6 +3742,9 @@ iflib_device_deregister(if_ctx_t ctx)
iflib_tx_structures_free(ctx);
iflib_rx_structures_free(ctx);
+ if (ctx->ifc_flags & IFC_SC_ALLOCATED)
+ free(ctx->ifc_softc, M_IFLIB);
+ free(ctx, M_IFLIB);
return (0);
}
@@ -3782,7 +3894,13 @@ _iflib_assert(if_shared_ctx_t sctx)
MPASS(sctx->isc_txrx->ift_rxd_pkt_get);
MPASS(sctx->isc_txrx->ift_rxd_refill);
MPASS(sctx->isc_txrx->ift_rxd_flush);
- MPASS(sctx->isc_nrxd);
+
+ MPASS(sctx->isc_nrxd_min[0]);
+ MPASS(sctx->isc_nrxd_max[0]);
+ MPASS(sctx->isc_nrxd_default[0]);
+ MPASS(sctx->isc_ntxd_min[0]);
+ MPASS(sctx->isc_ntxd_max[0]);
+ MPASS(sctx->isc_ntxd_default[0]);
}
static int
@@ -3796,7 +3914,6 @@ iflib_register(if_ctx_t ctx)
_iflib_assert(sctx);
CTX_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev));
- MPASS(ctx->ifc_flags == 0);
ifp = ctx->ifc_ifp = if_gethandle(IFT_ETHER);
if (ifp == NULL) {
@@ -3818,7 +3935,6 @@ iflib_register(if_ctx_t ctx)
if_setioctlfn(ifp, iflib_if_ioctl);
if_settransmitfn(ifp, iflib_if_transmit);
if_setqflushfn(ifp, iflib_if_qflush);
- if_setgetcounterfn(ifp, iflib_if_get_counter);
if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
if_setcapabilities(ifp, 0);
@@ -3842,16 +3958,17 @@ static int
iflib_queues_alloc(if_ctx_t ctx)
{
if_shared_ctx_t sctx = ctx->ifc_sctx;
+ if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
device_t dev = ctx->ifc_dev;
- int nrxqsets = ctx->ifc_softc_ctx.isc_nrxqsets;
- int ntxqsets = ctx->ifc_softc_ctx.isc_ntxqsets;
+ int nrxqsets = scctx->isc_nrxqsets;
+ int ntxqsets = scctx->isc_ntxqsets;
iflib_txq_t txq;
iflib_rxq_t rxq;
iflib_fl_t fl = NULL;
- int i, j, cpu, err, txconf, rxconf, fl_ifdi_offset;
+ int i, j, cpu, err, txconf, rxconf;
iflib_dma_info_t ifdip;
- uint32_t *rxqsizes = sctx->isc_rxqsizes;
- uint32_t *txqsizes = sctx->isc_txqsizes;
+ uint32_t *rxqsizes = scctx->isc_rxqsizes;
+ uint32_t *txqsizes = scctx->isc_txqsizes;
uint8_t nrxqs = sctx->isc_nrxqs;
uint8_t ntxqs = sctx->isc_ntxqs;
int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1;
@@ -3860,10 +3977,11 @@ iflib_queues_alloc(if_ctx_t ctx)
struct ifmp_ring **brscp;
int nbuf_rings = 1; /* XXX determine dynamically */
- KASSERT(ntxqs > 0, ("number of queues must be at least 1"));
- KASSERT(nrxqs > 0, ("number of queues must be at least 1"));
+ KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1"));
+ KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1"));
brscp = NULL;
+ txq = NULL;
rxq = NULL;
/* Allocate the TX ring struct memory */
@@ -3891,8 +4009,6 @@ iflib_queues_alloc(if_ctx_t ctx)
ctx->ifc_txqs = txq;
ctx->ifc_rxqs = rxq;
- txq = NULL;
- rxq = NULL;
/*
* XXX handle allocation failure
@@ -3916,6 +4032,11 @@ iflib_queues_alloc(if_ctx_t ctx)
}
txq->ift_ctx = ctx;
txq->ift_id = i;
+ if (sctx->isc_flags & IFLIB_HAS_TXCQ) {
+ txq->ift_br_offset = 1;
+ } else {
+ txq->ift_br_offset = 0;
+ }
/* XXX fix this */
txq->ift_timer.c_cpu = cpu;
txq->ift_db_check.c_cpu = cpu;
@@ -3970,10 +4091,10 @@ iflib_queues_alloc(if_ctx_t ctx)
}
rxq->ifr_ctx = ctx;
rxq->ifr_id = i;
- if (sctx->isc_flags & IFLIB_HAS_CQ) {
- fl_ifdi_offset = 1;
+ if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
+ rxq->ifr_fl_offset = 1;
} else {
- fl_ifdi_offset = 0;
+ rxq->ifr_fl_offset = 0;
}
rxq->ifr_nfl = nfree_lists;
if (!(fl =
@@ -3986,7 +4107,8 @@ iflib_queues_alloc(if_ctx_t ctx)
for (j = 0; j < nfree_lists; j++) {
rxq->ifr_fl[j].ifl_rxq = rxq;
rxq->ifr_fl[j].ifl_id = j;
- rxq->ifr_fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + fl_ifdi_offset];
+ rxq->ifr_fl[j].ifl_ifdi =
+ &rxq->ifr_ifdi[j + rxq->ifr_fl_offset];
}
/* Allocate receive buffers for the ring*/
if (iflib_rxsd_alloc(rxq)) {
@@ -4106,12 +4228,13 @@ iflib_rx_structures_setup(if_ctx_t ctx)
for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) {
#if defined(INET6) || defined(INET)
tcp_lro_free(&rxq->ifr_lc);
- if ((err = tcp_lro_init(&rxq->ifr_lc)) != 0) {
+ if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp,
+ TCP_LRO_ENTRIES, min(1024,
+ ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) {
device_printf(ctx->ifc_dev, "LRO Initialization failed!\n");
goto fail;
}
rxq->ifr_lro_enabled = TRUE;
- rxq->ifr_lc.ifp = ctx->ifc_ifp;
#endif
IFDI_RXQ_SETUP(ctx, rxq->ifr_id);
}
@@ -4142,7 +4265,7 @@ iflib_rx_structures_free(if_ctx_t ctx)
{
iflib_rxq_t rxq = ctx->ifc_rxqs;
- for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, rxq++) {
+ for (int i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) {
iflib_rx_sds_free(rxq);
}
}
@@ -4193,7 +4316,7 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
struct taskqgroup *tqg;
iflib_filter_info_t info;
cpuset_t cpus;
- task_fn_t *fn;
+ gtask_fn_t *fn;
int tqrid, err;
void *q;
@@ -4254,7 +4377,7 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type, void
{
struct grouptask *gtask;
struct taskqgroup *tqg;
- task_fn_t *fn;
+ gtask_fn_t *fn;
void *q;
switch (type) {
@@ -4310,7 +4433,7 @@ iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *
iflib_filter_info_t info;
struct grouptask *gtask;
struct taskqgroup *tqg;
- task_fn_t *fn;
+ gtask_fn_t *fn;
int tqrid;
void *q;
int err;
@@ -4385,7 +4508,7 @@ iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name)
}
void
-iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, task_fn_t *fn,
+iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn,
char *name)
{
@@ -4394,14 +4517,21 @@ iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, task_fn_t *fn,
}
void
-iflib_link_state_change(if_ctx_t ctx, int link_state)
+iflib_config_gtask_deinit(struct grouptask *gtask)
+{
+
+ taskqgroup_detach(qgroup_if_config_tqg, gtask);
+}
+
+void
+iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate)
{
if_t ifp = ctx->ifc_ifp;
iflib_txq_t txq = ctx->ifc_txqs;
-#if 0
+
if_setbaudrate(ifp, baudrate);
-#endif
+
/* If link down, disable watchdog */
if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) {
for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++)
@@ -4431,10 +4561,11 @@ iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq)
}
static int
-iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx)
+iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget)
{
- return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx));
+ return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx,
+ budget));
}
void
@@ -4468,8 +4599,9 @@ iflib_msix_init(if_ctx_t ctx)
int iflib_num_tx_queues, iflib_num_rx_queues;
int err, admincnt, bar;
- iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs;
- iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs;
+ iflib_num_tx_queues = scctx->isc_ntxqsets;
+ iflib_num_rx_queues = scctx->isc_nrxqsets;
+
bar = ctx->ifc_softc_ctx.isc_msix_bar;
admincnt = sctx->isc_admin_intrcnt;
/* Override by tuneable */
@@ -4549,18 +4681,31 @@ iflib_msix_init(if_ctx_t ctx)
if (queues > rss_getnumbuckets())
queues = rss_getnumbuckets();
#endif
- if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queues)
- queues = rx_queues = iflib_num_rx_queues;
+ if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt)
+ rx_queues = iflib_num_rx_queues;
else
rx_queues = queues;
+ /*
+ * We want this to be all logical CPUs by default
+ */
if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues)
tx_queues = iflib_num_tx_queues;
else
- tx_queues = queues;
+ tx_queues = mp_ncpus;
+
+ if (ctx->ifc_sysctl_qs_eq_override == 0) {
+#ifdef INVARIANTS
+ if (tx_queues != rx_queues)
+ device_printf(dev, "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n",
+ min(rx_queues, tx_queues), min(rx_queues, tx_queues));
+#endif
+ tx_queues = min(rx_queues, tx_queues);
+ rx_queues = min(rx_queues, tx_queues);
+ }
device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues);
- vectors = queues + admincnt;
+ vectors = rx_queues + admincnt;
if ((err = pci_alloc_msix(dev, &vectors)) == 0) {
device_printf(dev,
"Using MSIX interrupts with %d vectors\n", vectors);
@@ -4568,6 +4713,7 @@ iflib_msix_init(if_ctx_t ctx)
scctx->isc_nrxqsets = rx_queues;
scctx->isc_ntxqsets = tx_queues;
scctx->isc_intr = IFLIB_INTR_MSIX;
+
return (vectors);
} else {
device_printf(dev, "failed to allocate %d msix vectors, err: %d - using MSI\n", vectors, err);
@@ -4617,7 +4763,58 @@ mp_ring_state_handler(SYSCTL_HANDLER_ARGS)
return(rc);
}
+enum iflib_ndesc_handler {
+ IFLIB_NTXD_HANDLER,
+ IFLIB_NRXD_HANDLER,
+};
+static int
+mp_ndesc_handler(SYSCTL_HANDLER_ARGS)
+{
+ if_ctx_t ctx = (void *)arg1;
+ enum iflib_ndesc_handler type = arg2;
+ char buf[256] = {0};
+ uint16_t *ndesc;
+ char *p, *next;
+ int nqs, rc, i;
+
+ MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER);
+
+ nqs = 8;
+ switch(type) {
+ case IFLIB_NTXD_HANDLER:
+ ndesc = ctx->ifc_sysctl_ntxds;
+ if (ctx->ifc_sctx)
+ nqs = ctx->ifc_sctx->isc_ntxqs;
+ break;
+ case IFLIB_NRXD_HANDLER:
+ ndesc = ctx->ifc_sysctl_nrxds;
+ if (ctx->ifc_sctx)
+ nqs = ctx->ifc_sctx->isc_nrxqs;
+ break;
+ }
+ if (nqs == 0)
+ nqs = 8;
+
+ for (i=0; i<8; i++) {
+ if (i >= nqs)
+ break;
+ if (i)
+ strcat(buf, ",");
+ sprintf(strchr(buf, 0), "%d", ndesc[i]);
+ }
+
+ rc = sysctl_handle_string(oidp, buf, sizeof(buf), req);
+ if (rc || req->newptr == NULL)
+ return rc;
+
+ for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p;
+ i++, p = strsep(&next, " ,")) {
+ ndesc[i] = strtoul(p, NULL, 10);
+ }
+
+ return(rc);
+}
#define NAME_BUFLEN 32
static void
@@ -4634,19 +4831,29 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx)
CTLFLAG_RD, NULL, "IFLIB fields");
oid_list = SYSCTL_CHILDREN(node);
+ SYSCTL_ADD_STRING(ctx_list, oid_list, OID_AUTO, "driver_version",
+ CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 0,
+ "driver version");
+
SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs",
CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0,
"# of txqs to use, 0 => use default #");
SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs",
- CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0,
- "# of txqs to use, 0 => use default #");
- SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxds",
- CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxds, 0,
- "# of tx descriptors to use, 0 => use default #");
- SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxds",
- CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxds, 0,
- "# of rx descriptors to use, 0 => use default #");
-
+ CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0,
+ "# of rxqs to use, 0 => use default #");
+ SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable",
+ CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0,
+ "permit #txq != #rxq");
+
+ /* XXX change for per-queue sizes */
+ SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",
+ CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER,
+ mp_ndesc_handler, "A",
+ "list of # of tx descriptors to use, 0 = use default #");
+ SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds",
+ CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER,
+ mp_ndesc_handler, "A",
+ "list of # of rx descriptors to use, 0 = use default #");
}
static void
@@ -4700,7 +4907,7 @@ iflib_add_device_sysctl_post(if_ctx_t ctx)
&txq->ift_mbuf_defrag_failed, "# of times m_defrag failed");
SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail",
CTLFLAG_RD,
- &txq->ift_mbuf_defrag_failed, "# of times no descriptors were available");
+ &txq->ift_no_desc_avail, "# of times no descriptors were available");
SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed",
CTLFLAG_RD,
&txq->ift_map_failed, "# of times dma map failed");
@@ -4763,7 +4970,7 @@ iflib_add_device_sysctl_post(if_ctx_t ctx)
queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf,
CTLFLAG_RD, NULL, "Queue Name");
queue_list = SYSCTL_CHILDREN(queue_node);
- if (sctx->isc_flags & IFLIB_HAS_CQ) {
+ if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_pidx",
CTLFLAG_RD,
&rxq->ifr_cq_pidx, 1, "Producer Index");
diff --git a/sys/net/iflib.h b/sys/net/iflib.h
index c301b91..cf4a786 100644
--- a/sys/net/iflib.h
+++ b/sys/net/iflib.h
@@ -35,6 +35,7 @@
#include <machine/bus.h>
#include <sys/bus_dma.h>
#include <sys/nv.h>
+#include <sys/gtaskqueue.h>
/*
@@ -63,12 +64,14 @@ typedef struct if_int_delay_info *if_int_delay_info_t;
typedef struct if_rxd_frag {
uint8_t irf_flid;
uint16_t irf_idx;
+ uint16_t irf_len;
} *if_rxd_frag_t;
typedef struct if_rxd_info {
/* set by iflib */
uint16_t iri_qsidx; /* qset index */
uint16_t iri_vtag; /* vlan tag - if flag set */
+ /* XXX redundant with the new irf_len field */
uint16_t iri_len; /* packet length */
uint16_t iri_cidx; /* consumer index of cq */
struct ifnet *iri_ifp; /* some drivers >1 interface per softc */
@@ -156,10 +159,11 @@ typedef struct if_txrx {
void (*ift_txd_flush) (void *, uint16_t, uint32_t);
int (*ift_txd_credits_update) (void *, uint16_t, uint32_t, bool);
- int (*ift_rxd_available) (void *, uint16_t qsidx, uint32_t pidx);
+ int (*ift_rxd_available) (void *, uint16_t qsidx, uint32_t pidx,
+ int budget);
int (*ift_rxd_pkt_get) (void *, if_rxd_info_t ri);
void (*ift_rxd_refill) (void * , uint16_t qsidx, uint8_t flidx, uint32_t pidx,
- uint64_t *paddrs, caddr_t *vaddrs, uint16_t count);
+ uint64_t *paddrs, caddr_t *vaddrs, uint16_t count, uint16_t buf_size);
void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, uint32_t pidx);
int (*ift_legacy_intr) (void *);
} *if_txrx_t;
@@ -170,11 +174,20 @@ typedef struct if_softc_ctx {
int isc_ntxqsets;
int isc_msix_bar; /* can be model specific - initialize in attach_pre */
int isc_tx_nsegments; /* can be model specific - initialize in attach_pre */
+ int isc_ntxd[8];
+ int isc_nrxd[8];
+
+ uint32_t isc_txqsizes[8];
+ uint32_t isc_rxqsizes[8];
+ int isc_max_txqsets;
+ int isc_max_rxqsets;
int isc_tx_tso_segments_max;
int isc_tx_tso_size_max;
int isc_tx_tso_segsize_max;
int isc_rss_table_size;
int isc_rss_table_mask;
+ int isc_nrxqsets_max;
+ int isc_ntxqsets_max;
iflib_intr_mode_t isc_intr;
uint16_t isc_max_frame_size; /* set at init time by driver */
@@ -188,8 +201,6 @@ struct if_shared_ctx {
int isc_magic;
if_txrx_t isc_txrx;
driver_t *isc_driver;
- int isc_ntxd;
- int isc_nrxd;
int isc_nfl;
int isc_flags;
bus_size_t isc_q_align;
@@ -199,14 +210,11 @@ struct if_shared_ctx {
bus_size_t isc_rx_maxsegsize;
int isc_rx_nsegments;
int isc_rx_process_limit;
-
-
- uint32_t isc_txqsizes[8];
int isc_ntxqs; /* # of tx queues per tx qset - usually 1 */
- uint32_t isc_rxqsizes[8];
int isc_nrxqs; /* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */
int isc_admin_intrcnt; /* # of admin/link interrupts */
+
int isc_tx_reclaim_thresh;
/* fields necessary for probe */
@@ -215,6 +223,12 @@ struct if_shared_ctx {
/* optional function to transform the read values to match the table*/
void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id,
uint16_t *subdevice_id, uint16_t *rev_id);
+ int isc_nrxd_min[8];
+ int isc_nrxd_default[8];
+ int isc_nrxd_max[8];
+ int isc_ntxd_min[8];
+ int isc_ntxd_default[8];
+ int isc_ntxd_max[8];
};
typedef struct iflib_dma_info {
@@ -240,9 +254,9 @@ typedef enum {
/*
- * Interface has a separate command queue
+ * Interface has a separate command queue for RX
*/
-#define IFLIB_HAS_CQ 0x1
+#define IFLIB_HAS_RXCQ 0x1
/*
* Driver has already allocated vectors
*/
@@ -252,6 +266,10 @@ typedef enum {
* Interface is a virtual function
*/
#define IFLIB_IS_VF 0x4
+/*
+ * Interface has a separate command queue for TX
+ */
+#define IFLIB_HAS_TXCQ 0x8
/*
@@ -308,7 +326,10 @@ void iflib_irq_free(if_ctx_t ctx, if_irq_t irq);
void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name);
void iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask,
- task_fn_t *fn, char *name);
+ gtask_fn_t *fn, char *name);
+
+void iflib_config_gtask_deinit(struct grouptask *gtask);
+
void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid);
@@ -317,7 +338,7 @@ void iflib_admin_intr_deferred(if_ctx_t ctx);
void iflib_iov_intr_deferred(if_ctx_t ctx);
-void iflib_link_state_change(if_ctx_t ctx, int linkstate);
+void iflib_link_state_change(if_ctx_t ctx, int linkstate, uint64_t baudrate);
int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags);
void iflib_dma_free(iflib_dma_info_t dma);
diff --git a/sys/net/netisr.c b/sys/net/netisr.c
index 318e1a3..931d86c 100644
--- a/sys/net/netisr.c
+++ b/sys/net/netisr.c
@@ -272,10 +272,7 @@ u_int
netisr_get_cpuid(u_int cpunumber)
{
- KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber,
- nws_count));
-
- return (nws_array[cpunumber]);
+ return (nws_array[cpunumber % nws_count]);
}
/*
@@ -810,10 +807,12 @@ netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy,
* dispatch. In the queued case, fall back on the SOURCE
* policy.
*/
- if (*cpuidp != NETISR_CPUID_NONE)
+ if (*cpuidp != NETISR_CPUID_NONE) {
+ *cpuidp = netisr_get_cpuid(*cpuidp);
return (m);
+ }
if (dispatch_policy == NETISR_DISPATCH_HYBRID) {
- *cpuidp = curcpu;
+ *cpuidp = netisr_get_cpuid(curcpu);
return (m);
}
policy = NETISR_POLICY_SOURCE;
diff --git a/sys/net80211/ieee80211_ioctl.c b/sys/net80211/ieee80211_ioctl.c
index b65f23a..5d323c0 100644
--- a/sys/net80211/ieee80211_ioctl.c
+++ b/sys/net80211/ieee80211_ioctl.c
@@ -3394,10 +3394,12 @@ ieee80211_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
* Check if the MAC address was changed
* via SIOCSIFLLADDR ioctl.
*/
+ if_addr_rlock(ifp);
if ((ifp->if_flags & IFF_UP) == 0 &&
!IEEE80211_ADDR_EQ(vap->iv_myaddr, IF_LLADDR(ifp)))
IEEE80211_ADDR_COPY(vap->iv_myaddr,
IF_LLADDR(ifp));
+ if_addr_runlock(ifp);
}
break;
case SIOCADDMULTI:
diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c
index 621b25e..88c0895 100644
--- a/sys/netinet/sctp_output.c
+++ b/sys/netinet/sctp_output.c
@@ -12639,7 +12639,10 @@ sctp_lower_sosend(struct socket *so,
}
SCTP_INP_RUNLOCK(inp);
} else if (sinfo_assoc_id) {
- stcb = sctp_findassociation_ep_asocid(inp, sinfo_assoc_id, 0);
+ stcb = sctp_findassociation_ep_asocid(inp, sinfo_assoc_id, 1);
+ if (stcb != NULL) {
+ hold_tcblock = 1;
+ }
} else if (addr) {
/*-
* Since we did not use findep we must
@@ -13404,6 +13407,10 @@ skip_preblock:
}
}
SCTP_TCB_SEND_LOCK(stcb);
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ goto out_unlocked;
+ }
if (sp) {
if (sp->msg_is_complete == 0) {
strm->last_msg_incomplete = 1;
diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c
index fc2b9bb..77dd634 100644
--- a/sys/netinet/sctp_usrreq.c
+++ b/sys/netinet/sctp_usrreq.c
@@ -1506,11 +1506,6 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
}
SCTP_TCB_UNLOCK(stcb);
- if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
- stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
- /* Set the connected flag so we can queue data */
- soisconnecting(so);
- }
out_now:
if (creat_lock_on) {
SCTP_ASOC_CREATE_UNLOCK(inp);
diff --git a/sys/netinet/sctputil.c b/sys/netinet/sctputil.c
index 148b047..4d5d497 100644
--- a/sys/netinet/sctputil.c
+++ b/sys/netinet/sctputil.c
@@ -1280,6 +1280,7 @@ sctp_iterator_work(struct sctp_iterator *it)
SCTP_INP_INFO_RLOCK();
SCTP_ITERATOR_LOCK();
+ sctp_it_ctl.cur_it = it;
if (it->inp) {
SCTP_INP_RLOCK(it->inp);
SCTP_INP_DECR_REF(it->inp);
@@ -1287,6 +1288,7 @@ sctp_iterator_work(struct sctp_iterator *it)
if (it->inp == NULL) {
/* iterator is complete */
done_with_iterator:
+ sctp_it_ctl.cur_it = NULL;
SCTP_ITERATOR_UNLOCK();
SCTP_INP_INFO_RUNLOCK();
if (it->function_atend != NULL) {
@@ -1427,13 +1429,11 @@ sctp_iterator_worker(void)
sctp_it_ctl.iterator_running = 1;
TAILQ_FOREACH_SAFE(it, &sctp_it_ctl.iteratorhead, sctp_nxt_itr, nit) {
- sctp_it_ctl.cur_it = it;
/* now lets work on this one */
TAILQ_REMOVE(&sctp_it_ctl.iteratorhead, it, sctp_nxt_itr);
SCTP_IPI_ITERATOR_WQ_UNLOCK();
CURVNET_SET(it->vn);
sctp_iterator_work(it);
- sctp_it_ctl.cur_it = NULL;
CURVNET_RESTORE();
SCTP_IPI_ITERATOR_WQ_LOCK();
/* sa_ignore FREED_MEMORY */
@@ -6356,7 +6356,7 @@ sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
struct sctp_tcb *stcb = NULL;
unsigned int incr, at, i;
- at = incr = 0;
+ at = 0;
sa = addr;
*error = *num_v6 = *num_v4 = 0;
/* account and validate addresses */
@@ -6364,6 +6364,7 @@ sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
switch (sa->sa_family) {
#ifdef INET
case AF_INET:
+ incr = (unsigned int)sizeof(struct sockaddr_in);
if (sa->sa_len != incr) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
*error = EINVAL;
@@ -6371,7 +6372,6 @@ sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
return (NULL);
}
(*num_v4) += 1;
- incr = (unsigned int)sizeof(struct sockaddr_in);
break;
#endif
#ifdef INET6
@@ -6387,6 +6387,7 @@ sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
*bad_addr = 1;
return (NULL);
}
+ incr = (unsigned int)sizeof(struct sockaddr_in6);
if (sa->sa_len != incr) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
*error = EINVAL;
@@ -6394,7 +6395,6 @@ sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
return (NULL);
}
(*num_v6) += 1;
- incr = (unsigned int)sizeof(struct sockaddr_in6);
break;
}
#endif
diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c
index b44fb9e..0904e86 100644
--- a/sys/netinet6/udp6_usrreq.c
+++ b/sys/netinet6/udp6_usrreq.c
@@ -898,7 +898,7 @@ udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
UDPSTAT_INC(udps_opackets);
- error = ip6_output(m, optp, &inp->inp_route6, flags,
+ error = ip6_output(m, optp, NULL, flags,
inp->in6p_moptions, NULL, inp);
break;
case AF_INET:
diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c
index 52dfe7c..43ff56a 100644
--- a/sys/netpfil/ipfw/ip_fw_sockopt.c
+++ b/sys/netpfil/ipfw/ip_fw_sockopt.c
@@ -1414,8 +1414,10 @@ manage_sets(struct ip_fw_chain *chain, ip_fw3_opheader *op3,
if (rh->range.head.length != sizeof(ipfw_range_tlv))
return (1);
- if (rh->range.set >= IPFW_MAX_SETS ||
- rh->range.new_set >= IPFW_MAX_SETS)
+ /* enable_sets() expects bitmasks. */
+ if (op3->opcode != IP_FW_SET_ENABLE &&
+ (rh->range.set >= IPFW_MAX_SETS ||
+ rh->range.new_set >= IPFW_MAX_SETS))
return (EINVAL);
ret = 0;
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index da7d2c1..17797fa 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -2764,8 +2764,8 @@ pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
switch (af) {
#ifdef INET
case AF_INET:
- if ((a->addr32[0] < b->addr32[0]) ||
- (a->addr32[0] > e->addr32[0]))
+ if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) ||
+ (ntohl(a->addr32[0]) > ntohl(e->addr32[0])))
return (0);
break;
#endif /* INET */
@@ -2775,15 +2775,15 @@ pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
/* check a >= b */
for (i = 0; i < 4; ++i)
- if (a->addr32[i] > b->addr32[i])
+ if (ntohl(a->addr32[i]) > ntohl(b->addr32[i]))
break;
- else if (a->addr32[i] < b->addr32[i])
+ else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i]))
return (0);
/* check a <= e */
for (i = 0; i < 4; ++i)
- if (a->addr32[i] < e->addr32[i])
+ if (ntohl(a->addr32[i]) < ntohl(e->addr32[i]))
break;
- else if (a->addr32[i] > e->addr32[i])
+ else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i]))
return (0);
break;
}
diff --git a/sys/sys/_task.h b/sys/sys/_task.h
index ce89781..d3be719 100644
--- a/sys/sys/_task.h
+++ b/sys/sys/_task.h
@@ -42,6 +42,7 @@
* (q) taskqueue lock
*/
typedef void task_fn_t(void *context, int pending);
+typedef void gtask_fn_t(void *context);
struct task {
STAILQ_ENTRY(task) ta_link; /* (q) link for queue */
@@ -51,8 +52,16 @@ struct task {
void *ta_context; /* (c) argument for handler */
};
+struct gtask {
+ STAILQ_ENTRY(gtask) ta_link; /* (q) link for queue */
+ uint16_t ta_flags; /* (q) state flags */
+ u_short ta_priority; /* (c) Priority */
+ gtask_fn_t *ta_func; /* (c) task handler */
+ void *ta_context; /* (c) argument for handler */
+};
+
struct grouptask {
- struct task gt_task;
+ struct gtask gt_task;
void *gt_taskqueue;
LIST_ENTRY(grouptask) gt_list;
void *gt_uniq;
diff --git a/sys/sys/gtaskqueue.h b/sys/sys/gtaskqueue.h
new file mode 100644
index 0000000..88d4b54
--- /dev/null
+++ b/sys/sys/gtaskqueue.h
@@ -0,0 +1,125 @@
+/*-
+ * Copyright (c) 2014 Jeffrey Roberson <jeff@freebsd.org>
+ * Copyright (c) 2016 Matthew Macy <mmacy@nextbsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_GTASKQUEUE_H_
+#define _SYS_GTASKQUEUE_H_
+#include <sys/taskqueue.h>
+
+#ifndef _KERNEL
+#error "no user-servicable parts inside"
+#endif
+
+struct gtaskqueue;
+typedef void (*gtaskqueue_enqueue_fn)(void *context);
+
+/*
+ * Taskqueue groups. Manages dynamic thread groups and irq binding for
+ * device and other tasks.
+ */
+
+void gtaskqueue_block(struct gtaskqueue *queue);
+void gtaskqueue_unblock(struct gtaskqueue *queue);
+
+int gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask);
+void gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *task);
+void gtaskqueue_drain_all(struct gtaskqueue *queue);
+
+int grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *task);
+void taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *grptask,
+ void *uniq, int irq, char *name);
+int taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *grptask,
+ void *uniq, int cpu, int irq, char *name);
+void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask);
+struct taskqgroup *taskqgroup_create(char *name);
+void taskqgroup_destroy(struct taskqgroup *qgroup);
+int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride);
+
+#define TASK_ENQUEUED 0x1
+#define TASK_SKIP_WAKEUP 0x2
+
+
+#define GTASK_INIT(task, flags, priority, func, context) do { \
+ (task)->ta_flags = flags; \
+ (task)->ta_priority = (priority); \
+ (task)->ta_func = (func); \
+ (task)->ta_context = (context); \
+} while (0)
+
+#define GROUPTASK_INIT(gtask, priority, func, context) \
+ GTASK_INIT(&(gtask)->gt_task, TASK_SKIP_WAKEUP, priority, func, context)
+
+#define GROUPTASK_ENQUEUE(gtask) \
+ grouptaskqueue_enqueue((gtask)->gt_taskqueue, &(gtask)->gt_task)
+
+#define TASKQGROUP_DECLARE(name) \
+extern struct taskqgroup *qgroup_##name
+
+
+#ifdef EARLY_AP_STARTUP
+#define TASKQGROUP_DEFINE(name, cnt, stride) \
+ \
+struct taskqgroup *qgroup_##name; \
+ \
+static void \
+taskqgroup_define_##name(void *arg) \
+{ \
+ qgroup_##name = taskqgroup_create(#name); \
+ taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \
+} \
+ \
+SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \
+ taskqgroup_define_##name, NULL)
+#else
+#define TASKQGROUP_DEFINE(name, cnt, stride) \
+ \
+struct taskqgroup *qgroup_##name; \
+ \
+static void \
+taskqgroup_define_##name(void *arg) \
+{ \
+ qgroup_##name = taskqgroup_create(#name); \
+} \
+ \
+SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \
+ taskqgroup_define_##name, NULL); \
+ \
+static void \
+taskqgroup_adjust_##name(void *arg) \
+{ \
+ taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \
+} \
+ \
+SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY, \
+ taskqgroup_adjust_##name, NULL); \
+ \
+struct __hack
+#endif
+TASKQGROUP_DECLARE(net);
+
+#endif /* !_SYS_GTASKQUEUE_H_ */
diff --git a/sys/sys/param.h b/sys/sys/param.h
index 0343508..ee25c7a 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -58,7 +58,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1100500 /* Master, propagated to newvers */
+#define __FreeBSD_version 1100501 /* Master, propagated to newvers */
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index e25a64b..153b9f9 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -422,6 +422,7 @@ do { \
#define TDB_CHILD 0x00000100 /* New child indicator for ptrace() */
#define TDB_BORN 0x00000200 /* New LWP indicator for ptrace() */
#define TDB_EXIT 0x00000400 /* Exiting LWP indicator for ptrace() */
+#define TDB_VFORK 0x00000800 /* vfork indicator for ptrace() */
#define TDB_FSTP 0x00001000 /* The thread is PT_ATTACH leader */
/*
@@ -1004,7 +1005,7 @@ void proc_linkup(struct proc *p, struct thread *td);
struct proc *proc_realparent(struct proc *child);
void proc_reap(struct thread *td, struct proc *p, int *status, int options);
void proc_reparent(struct proc *child, struct proc *newparent);
-void proc_set_traced(struct proc *p);
+void proc_set_traced(struct proc *p, bool stop);
struct pstats *pstats_alloc(void);
void pstats_fork(struct pstats *src, struct pstats *dst);
void pstats_free(struct pstats *ps);
diff --git a/sys/sys/ptrace.h b/sys/sys/ptrace.h
index 0cf25e8..e2045c8 100644
--- a/sys/sys/ptrace.h
+++ b/sys/sys/ptrace.h
@@ -89,6 +89,7 @@
#define PTRACE_SYSCALL (PTRACE_SCE | PTRACE_SCX)
#define PTRACE_FORK 0x0008
#define PTRACE_LWP 0x0010
+#define PTRACE_VFORK 0x0020
#define PTRACE_DEFAULT (PTRACE_EXEC)
@@ -124,6 +125,8 @@ struct ptrace_lwpinfo {
#define PL_FLAG_CHILD 0x80 /* I am from child */
#define PL_FLAG_BORN 0x100 /* new LWP */
#define PL_FLAG_EXITED 0x200 /* exiting LWP */
+#define PL_FLAG_VFORKED 0x400 /* new child via vfork */
+#define PL_FLAG_VFORK_DONE 0x800 /* vfork parent has resumed */
sigset_t pl_sigmask; /* LWP signal mask */
sigset_t pl_siglist; /* LWP pending signal */
struct __siginfo pl_siginfo; /* siginfo for signal */
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index f47ba2d..caffd7b 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -74,7 +74,7 @@ extern int vm_guest; /* Running as virtual machine guest? */
* Keep in sync with vm_guest_sysctl_names[].
*/
enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN, VM_GUEST_HV,
- VM_GUEST_VMWARE, VM_LAST };
+ VM_GUEST_VMWARE, VM_GUEST_KVM, VM_LAST };
#if defined(WITNESS) || defined(INVARIANT_SUPPORT)
void kassert_panic(const char *fmt, ...) __printflike(1, 2);
diff --git a/sys/sys/taskqueue.h b/sys/sys/taskqueue.h
index c986ffb..a6c6655 100644
--- a/sys/sys/taskqueue.h
+++ b/sys/sys/taskqueue.h
@@ -204,78 +204,4 @@ struct taskqueue *taskqueue_create_fast(const char *name, int mflags,
taskqueue_enqueue_fn enqueue,
void *context);
-/*
- * Taskqueue groups. Manages dynamic thread groups and irq binding for
- * device and other tasks.
- */
-int grouptaskqueue_enqueue(struct taskqueue *queue, struct task *task);
-void taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
- void *uniq, int irq, char *name);
-int taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
- void *uniq, int cpu, int irq, char *name);
-void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask);
-struct taskqgroup *taskqgroup_create(char *name);
-void taskqgroup_destroy(struct taskqgroup *qgroup);
-int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride);
-
-#define TASK_SKIP_WAKEUP 0x1
-
-#define GTASK_INIT(task, priority, func, context) do { \
- (task)->ta_pending = 0; \
- (task)->ta_priority = (priority); \
- (task)->ta_func = (func); \
- (task)->ta_context = (context); \
-} while (0)
-
-#define GROUPTASK_INIT(gtask, priority, func, context) \
- GTASK_INIT(&(gtask)->gt_task, priority, func, context)
-
-#define GROUPTASK_ENQUEUE(gtask) \
- grouptaskqueue_enqueue((gtask)->gt_taskqueue, &(gtask)->gt_task)
-
-#define TASKQGROUP_DECLARE(name) \
-extern struct taskqgroup *qgroup_##name
-
-#ifdef EARLY_AP_STARTUP
-#define TASKQGROUP_DEFINE(name, cnt, stride) \
- \
-struct taskqgroup *qgroup_##name; \
- \
-static void \
-taskqgroup_define_##name(void *arg) \
-{ \
- qgroup_##name = taskqgroup_create(#name); \
- taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \
-} \
- \
-SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \
- taskqgroup_define_##name, NULL)
-#else
-#define TASKQGROUP_DEFINE(name, cnt, stride) \
- \
-struct taskqgroup *qgroup_##name; \
- \
-static void \
-taskqgroup_define_##name(void *arg) \
-{ \
- qgroup_##name = taskqgroup_create(#name); \
-} \
- \
-SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST, \
- taskqgroup_define_##name, NULL); \
- \
-static void \
-taskqgroup_adjust_##name(void *arg) \
-{ \
- taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \
-} \
- \
-SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY, \
- taskqgroup_adjust_##name, NULL); \
- \
-struct __hack
-#endif
-
-TASKQGROUP_DECLARE(net);
-
#endif /* !_SYS_TASKQUEUE_H_ */
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index 8551085..04e0ae9 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -255,6 +255,8 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
}
pref = newb + fs->fs_frag;
nb = newb;
+ MPASS(allocblk < allociblk + nitems(allociblk));
+ MPASS(lbns_remfree < lbns + nitems(lbns));
*allocblk++ = nb;
*lbns_remfree++ = indirs[1].in_lbn;
bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
@@ -309,7 +311,7 @@ retry:
if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags | IO_BUFLOCKED, cred, &newb)) != 0) {
brelse(bp);
- if (++reclaimed == 1) {
+ if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
UFS_LOCK(ump);
softdep_request_cleanup(fs, vp, cred,
FLUSH_BLOCKS_WAIT);
@@ -325,6 +327,8 @@ retry:
}
pref = newb + fs->fs_frag;
nb = newb;
+ MPASS(allocblk < allociblk + nitems(allociblk));
+ MPASS(lbns_remfree < lbns + nitems(lbns));
*allocblk++ = nb;
*lbns_remfree++ = indirs[i].in_lbn;
nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
@@ -386,7 +390,7 @@ retry:
flags | IO_BUFLOCKED, cred, &newb);
if (error) {
brelse(bp);
- if (++reclaimed == 1) {
+ if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
UFS_LOCK(ump);
softdep_request_cleanup(fs, vp, cred,
FLUSH_BLOCKS_WAIT);
@@ -401,6 +405,8 @@ retry:
goto fail;
}
nb = newb;
+ MPASS(allocblk < allociblk + nitems(allociblk));
+ MPASS(lbns_remfree < lbns + nitems(lbns));
*allocblk++ = nb;
*lbns_remfree++ = lbn;
nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
@@ -478,10 +484,16 @@ fail:
* We shall not leave the freed blocks on the vnode
* buffer object lists.
*/
- bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
+ bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+ GB_NOCREAT | GB_UNMAPPED);
if (bp != NULL) {
- bp->b_flags |= (B_INVAL | B_RELBUF);
- bp->b_flags &= ~B_ASYNC;
+ KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
+ ("mismatch1 l %jd %jd b %ju %ju",
+ (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
+ (uintmax_t)bp->b_blkno,
+ (uintmax_t)fsbtodb(fs, *blkp)));
+ bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
+ bp->b_flags &= ~(B_ASYNC | B_CACHE);
brelse(bp);
}
deallocated += fs->fs_bsize;
@@ -524,6 +536,18 @@ fail:
* cleared, free the blocks.
*/
for (blkp = allociblk; blkp < allocblk; blkp++) {
+#ifdef INVARIANTS
+ if (blkp == allociblk)
+ lbns_remfree = lbns;
+ bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+ GB_NOCREAT | GB_UNMAPPED);
+ if (bp != NULL) {
+ panic("zombie1 %jd %ju %ju",
+ (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
+ (uintmax_t)fsbtodb(fs, *blkp));
+ }
+ lbns_remfree++;
+#endif
ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
ip->i_number, vp->v_type, NULL);
}
@@ -818,6 +842,8 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
}
pref = newb + fs->fs_frag;
nb = newb;
+ MPASS(allocblk < allociblk + nitems(allociblk));
+ MPASS(lbns_remfree < lbns + nitems(lbns));
*allocblk++ = nb;
*lbns_remfree++ = indirs[1].in_lbn;
bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
@@ -873,7 +899,7 @@ retry:
if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags | IO_BUFLOCKED, cred, &newb)) != 0) {
brelse(bp);
- if (++reclaimed == 1) {
+ if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
UFS_LOCK(ump);
softdep_request_cleanup(fs, vp, cred,
FLUSH_BLOCKS_WAIT);
@@ -889,6 +915,8 @@ retry:
}
pref = newb + fs->fs_frag;
nb = newb;
+ MPASS(allocblk < allociblk + nitems(allociblk));
+ MPASS(lbns_remfree < lbns + nitems(lbns));
*allocblk++ = nb;
*lbns_remfree++ = indirs[i].in_lbn;
nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
@@ -951,7 +979,7 @@ retry:
flags | IO_BUFLOCKED, cred, &newb);
if (error) {
brelse(bp);
- if (++reclaimed == 1) {
+ if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
UFS_LOCK(ump);
softdep_request_cleanup(fs, vp, cred,
FLUSH_BLOCKS_WAIT);
@@ -966,6 +994,8 @@ retry:
goto fail;
}
nb = newb;
+ MPASS(allocblk < allociblk + nitems(allociblk));
+ MPASS(lbns_remfree < lbns + nitems(lbns));
*allocblk++ = nb;
*lbns_remfree++ = lbn;
nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
@@ -1049,10 +1079,16 @@ fail:
* We shall not leave the freed blocks on the vnode
* buffer object lists.
*/
- bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
+ bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+ GB_NOCREAT | GB_UNMAPPED);
if (bp != NULL) {
- bp->b_flags |= (B_INVAL | B_RELBUF);
- bp->b_flags &= ~B_ASYNC;
+ KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
+ ("mismatch2 l %jd %jd b %ju %ju",
+ (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
+ (uintmax_t)bp->b_blkno,
+ (uintmax_t)fsbtodb(fs, *blkp)));
+ bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
+ bp->b_flags &= ~(B_ASYNC | B_CACHE);
brelse(bp);
}
deallocated += fs->fs_bsize;
@@ -1095,6 +1131,18 @@ fail:
* cleared, free the blocks.
*/
for (blkp = allociblk; blkp < allocblk; blkp++) {
+#ifdef INVARIANTS
+ if (blkp == allociblk)
+ lbns_remfree = lbns;
+ bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
+ GB_NOCREAT | GB_UNMAPPED);
+ if (bp != NULL) {
+ panic("zombie2 %jd %ju %ju",
+ (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
+ (uintmax_t)fsbtodb(fs, *blkp));
+ }
+ lbns_remfree++;
+#endif
ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
ip->i_number, vp->v_type, NULL);
}
diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c
index 48e07ea..152866f 100644
--- a/sys/x86/x86/identcpu.c
+++ b/sys/x86/x86/identcpu.c
@@ -1300,6 +1300,8 @@ identify_hypervisor(void)
vm_guest = VM_GUEST_VMWARE;
else if (strcmp(hv_vendor, "Microsoft Hv") == 0)
vm_guest = VM_GUEST_HV;
+ else if (strcmp(hv_vendor, "KVMKVMKVM") == 0)
+ vm_guest = VM_GUEST_KVM;
}
return;
}
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index 203e9d0..cd774df 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -499,8 +499,7 @@ native_lapic_init(vm_paddr_t addr)
ver = lapic_read32(LAPIC_VERSION);
if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) {
lapic_eoi_suppression = 1;
- if (vm_guest == VM_GUEST_VM &&
- !strcmp(hv_vendor, "KVMKVMKVM")) {
+ if (vm_guest == VM_GUEST_KVM) {
if (bootverbose)
printf(
"KVM -- disabling lapic eoi suppression\n");
OpenPOWER on IntegriCloud