summaryrefslogtreecommitdiffstats
path: root/sys/dev/e1000/if_em.c
diff options
context:
space:
mode:
authorsbruno <sbruno@FreeBSD.org>2015-06-17 18:50:57 +0000
committersbruno <sbruno@FreeBSD.org>2015-06-17 18:50:57 +0000
commitd2f2bb87e8f9590ff5d8eb0745530c9f78bad751 (patch)
tree088c72f10f4fa1342bdd688750a34d0bc527ee29 /sys/dev/e1000/if_em.c
parent6f160602884a965fff58f52f3747e205b403df7e (diff)
downloadFreeBSD-src-d2f2bb87e8f9590ff5d8eb0745530c9f78bad751.zip
FreeBSD-src-d2f2bb87e8f9590ff5d8eb0745530c9f78bad751.tar.gz
MFC r284179, r283959
Implement multiqueue (max 2 tx/rx queues) for the 82574L chipset. Change default tuning parameters to handle this new configuration if EM_MULTIQUEUE is set in the kernel configuration. Off by default. See r283959 changelog for the scope of these changes. Relnotes: Yes Sponsored by: Limelight Networks
Diffstat (limited to 'sys/dev/e1000/if_em.c')
-rw-r--r--sys/dev/e1000/if_em.c630
1 files changed, 441 insertions, 189 deletions
diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c
index 1afc6b8..9ed3f36 100644
--- a/sys/dev/e1000/if_em.c
+++ b/sys/dev/e1000/if_em.c
@@ -32,6 +32,8 @@
******************************************************************************/
/*$FreeBSD$*/
+#include "opt_em.h"
+#include "opt_ddb.h"
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -41,6 +43,10 @@
#include <sys/param.h>
#include <sys/systm.h>
+#ifdef DDB
+#include <sys/types.h>
+#include <ddb/ddb.h>
+#endif
#if __FreeBSD_version >= 800000
#include <sys/buf_ring.h>
#endif
@@ -52,6 +58,7 @@
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/rman.h>
+#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
@@ -207,7 +214,7 @@ static int em_resume(device_t);
#ifdef EM_MULTIQUEUE
static int em_mq_start(struct ifnet *, struct mbuf *);
static int em_mq_start_locked(struct ifnet *,
- struct tx_ring *, struct mbuf *);
+ struct tx_ring *);
static void em_qflush(struct ifnet *);
#else
static void em_start(struct ifnet *);
@@ -297,6 +304,10 @@ static void em_handle_tx(void *context, int pending);
static void em_handle_rx(void *context, int pending);
static void em_handle_link(void *context, int pending);
+#ifdef EM_MULTIQUEUE
+static void em_enable_vectors_82574(struct adapter *);
+#endif
+
static void em_set_sysctl_value(struct adapter *, const char *,
const char *, int *, int);
static int em_set_flowcntl(SYSCTL_HANDLER_ARGS);
@@ -395,6 +406,20 @@ TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
"Enable MSI-X interrupts");
+#ifdef EM_MULTIQUEUE
+static int em_num_queues = 1;
+TUNABLE_INT("hw.em.num_queues", &em_num_queues);
+SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
+ "82574 only: Number of queues to configure, 0 indicates autoconfigure");
+#endif
+
+/*
+** Global variable to store last used CPU when binding queues
+** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
+** queue is bound to a cpu.
+*/
+static int em_last_bind_cpu = -1;
+
/* How many packets rxeof tries to clean at a time */
static int em_rx_process_limit = 100;
TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
@@ -429,10 +454,10 @@ static int
em_probe(device_t dev)
{
char adapter_name[60];
- u16 pci_vendor_id = 0;
- u16 pci_device_id = 0;
- u16 pci_subvendor_id = 0;
- u16 pci_subdevice_id = 0;
+ uint16_t pci_vendor_id = 0;
+ uint16_t pci_device_id = 0;
+ uint16_t pci_subvendor_id = 0;
+ uint16_t pci_subdevice_id = 0;
em_vendor_info_t *ent;
INIT_DEBUGOUT("em_probe: begin");
@@ -559,6 +584,11 @@ em_attach(device_t dev)
goto err_pci;
}
+ /*
+ * Setup MSI/X or MSI if PCI Express
+ */
+ adapter->msix = em_setup_msix(adapter);
+
e1000_get_bus_info(hw);
/* Set up some sysctls for the tunable interrupt delays */
@@ -890,7 +920,7 @@ em_resume(device_t dev)
EM_TX_LOCK(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
em_start_locked(ifp, txr);
@@ -904,7 +934,70 @@ em_resume(device_t dev)
}
-#ifdef EM_MULTIQUEUE
+#ifndef EM_MULTIQUEUE
+static void
+em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
+{
+ struct adapter *adapter = ifp->if_softc;
+ struct mbuf *m_head;
+
+ EM_TX_LOCK_ASSERT(txr);
+
+ if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
+ IFF_DRV_RUNNING)
+ return;
+
+ if (!adapter->link_active)
+ return;
+
+ while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
+ /* Call cleanup if number of TX descriptors low */
+ if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
+ em_txeof(txr);
+ if (txr->tx_avail < EM_MAX_SCATTER) {
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ break;
+ }
+ IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
+ if (m_head == NULL)
+ break;
+ /*
+ * Encapsulation can modify our pointer, and or make it
+ * NULL on failure. In that event, we can't requeue.
+ */
+ if (em_xmit(txr, &m_head)) {
+ if (m_head == NULL)
+ break;
+ IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+ break;
+ }
+
+ /* Mark the queue as having work */
+ if (txr->busy == EM_TX_IDLE)
+ txr->busy = EM_TX_BUSY;
+
+ /* Send a copy of the frame to the BPF listener */
+ ETHER_BPF_MTAP(ifp, m_head);
+
+ }
+
+ return;
+}
+
+static void
+em_start(struct ifnet *ifp)
+{
+ struct adapter *adapter = ifp->if_softc;
+ struct tx_ring *txr = adapter->tx_rings;
+
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ EM_TX_LOCK(txr);
+ em_start_locked(ifp, txr);
+ EM_TX_UNLOCK(txr);
+ }
+ return;
+}
+#else /* EM_MULTIQUEUE */
/*********************************************************************
* Multiqueue Transmit routines
*
@@ -913,8 +1006,38 @@ em_resume(device_t dev)
* than do an immediate send. It is this that is an advantage
* in this driver, rather than also having multiple tx queues.
**********************************************************************/
+/*
+** Multiqueue capable stack interface
+*/
static int
-em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
+em_mq_start(struct ifnet *ifp, struct mbuf *m)
+{
+ struct adapter *adapter = ifp->if_softc;
+ struct tx_ring *txr = adapter->tx_rings;
+ unsigned int i, error;
+
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ i = m->m_pkthdr.flowid % adapter->num_queues;
+ else
+ i = curcpu % adapter->num_queues;
+
+ txr = &adapter->tx_rings[i];
+
+ error = drbr_enqueue(ifp, txr->br, m);
+ if (error)
+ return (error);
+
+ if (EM_TX_TRYLOCK(txr)) {
+ em_mq_start_locked(ifp, txr);
+ EM_TX_UNLOCK(txr);
+ } else
+ taskqueue_enqueue(txr->tq, &txr->tx_task);
+
+ return (0);
+}
+
+static int
+em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
struct mbuf *next;
@@ -922,25 +1045,23 @@ em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING || adapter->link_active == 0) {
- if (m != NULL)
- err = drbr_enqueue(ifp, txr->br, m);
- return (err);
+ return (ENETDOWN);
}
- enq = 0;
- if (m != NULL) {
- err = drbr_enqueue(ifp, txr->br, m);
- if (err)
- return (err);
- }
-
/* Process the queue */
while ((next = drbr_peek(ifp, txr->br)) != NULL) {
if ((err = em_xmit(txr, &next)) != 0) {
- if (next == NULL)
+ if (next == NULL) {
+ /* It was freed, move forward */
drbr_advance(ifp, txr->br);
- else
+ } else {
+ /*
+ * Still have one left, it may not be
+ * the same since the transmit function
+ * may have changed it.
+ */
drbr_putback(ifp, txr->br, next);
+ }
break;
}
drbr_advance(ifp, txr->br);
@@ -959,31 +1080,13 @@ em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
if (txr->tx_avail < EM_MAX_SCATTER)
em_txeof(txr);
- if (txr->tx_avail < EM_MAX_SCATTER)
+ if (txr->tx_avail < EM_MAX_SCATTER) {
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ }
return (err);
}
/*
-** Multiqueue capable stack interface
-*/
-static int
-em_mq_start(struct ifnet *ifp, struct mbuf *m)
-{
- struct adapter *adapter = ifp->if_softc;
- struct tx_ring *txr = adapter->tx_rings;
- int error;
-
- if (EM_TX_TRYLOCK(txr)) {
- error = em_mq_start_locked(ifp, txr, m);
- EM_TX_UNLOCK(txr);
- } else
- error = drbr_enqueue(ifp, txr->br, m);
-
- return (error);
-}
-
-/*
** Flush all ring buffers
*/
static void
@@ -1001,69 +1104,6 @@ em_qflush(struct ifnet *ifp)
}
if_qflush(ifp);
}
-#else /* !EM_MULTIQUEUE */
-
-static void
-em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
-{
- struct adapter *adapter = ifp->if_softc;
- struct mbuf *m_head;
-
- EM_TX_LOCK_ASSERT(txr);
-
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
- IFF_DRV_RUNNING)
- return;
-
- if (!adapter->link_active)
- return;
-
- while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
- /* Call cleanup if number of TX descriptors low */
- if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
- em_txeof(txr);
- if (txr->tx_avail < EM_MAX_SCATTER) {
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- break;
- }
- IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
- if (m_head == NULL)
- break;
- /*
- * Encapsulation can modify our pointer, and or make it
- * NULL on failure. In that event, we can't requeue.
- */
- if (em_xmit(txr, &m_head)) {
- if (m_head == NULL)
- break;
- IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
- break;
- }
-
- /* Mark the queue as having work */
- if (txr->busy == EM_TX_IDLE)
- txr->busy = EM_TX_BUSY;
-
- /* Send a copy of the frame to the BPF listener */
- ETHER_BPF_MTAP(ifp, m_head);
- }
-
- return;
-}
-
-static void
-em_start(struct ifnet *ifp)
-{
- struct adapter *adapter = ifp->if_softc;
- struct tx_ring *txr = adapter->tx_rings;
-
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- EM_TX_LOCK(txr);
- em_start_locked(ifp, txr);
- EM_TX_UNLOCK(txr);
- }
- return;
-}
#endif /* EM_MULTIQUEUE */
/*********************************************************************
@@ -1460,7 +1500,7 @@ em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
em_txeof(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
em_start_locked(ifp, txr);
@@ -1527,14 +1567,14 @@ em_handle_que(void *context, int pending)
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
-
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
+
EM_TX_LOCK(txr);
em_txeof(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
em_start_locked(ifp, txr);
@@ -1568,11 +1608,12 @@ em_msix_tx(void *arg)
em_txeof(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
em_start_locked(ifp, txr);
#endif
+
/* Reenable this interrupt */
E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
EM_TX_UNLOCK(txr);
@@ -1598,9 +1639,10 @@ em_msix_rx(void *arg)
more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
if (more)
taskqueue_enqueue(rxr->tq, &rxr->rx_task);
- else
+ else {
/* Reenable this interrupt */
E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
+ }
return;
}
@@ -1627,6 +1669,16 @@ em_msix_link(void *arg)
} else
E1000_WRITE_REG(&adapter->hw, E1000_IMS,
EM_MSIX_LINK | E1000_IMS_LSC);
+ /*
+ ** Because we must read the ICR for this interrupt
+ ** it may clear other causes using autoclear, for
+ ** this reason we simply create a soft interrupt
+ ** for all these vectors.
+ */
+ if (reg_icr) {
+ E1000_WRITE_REG(&adapter->hw,
+ E1000_ICS, adapter->ims);
+ }
return;
}
@@ -1640,9 +1692,10 @@ em_handle_rx(void *context, int pending)
more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
if (more)
taskqueue_enqueue(rxr->tq, &rxr->rx_task);
- else
+ else {
/* Reenable this interrupt */
E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
+ }
}
static void
@@ -1656,7 +1709,7 @@ em_handle_tx(void *context, int pending)
em_txeof(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
em_start_locked(ifp, txr);
@@ -1686,7 +1739,7 @@ em_handle_link(void *context, int pending)
EM_TX_LOCK(txr);
#ifdef EM_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
- em_mq_start_locked(ifp, txr, NULL);
+ em_mq_start_locked(ifp, txr);
#else
if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
em_start_locked(ifp, txr);
@@ -2269,7 +2322,7 @@ em_local_timer(void *arg)
struct ifnet *ifp = adapter->ifp;
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
- u32 trigger;
+ u32 trigger = 0;
EM_CORE_LOCK_ASSERT(adapter);
@@ -2282,9 +2335,11 @@ em_local_timer(void *arg)
e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
/* Mask to use in the irq trigger */
- if (adapter->msix_mem)
- trigger = rxr->ims;
- else
+ if (adapter->msix_mem) {
+ for (int i = 0; i < adapter->num_queues; i++, rxr++)
+ trigger |= rxr->ims;
+ rxr = adapter->rx_rings;
+ } else
trigger = E1000_ICS_RXDMT0;
/*
@@ -2293,7 +2348,6 @@ em_local_timer(void *arg)
** and the HUNG state will be static if set.
*/
for (int i = 0; i < adapter->num_queues; i++, txr++) {
- /* Last cycle a queue was declared hung */
if (txr->busy == EM_TX_HUNG)
goto hung;
if (txr->busy >= EM_TX_MAXTRIES)
@@ -2311,14 +2365,9 @@ em_local_timer(void *arg)
return;
hung:
/* Looks like we're hung */
- device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
- device_printf(adapter->dev,
- "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
- E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
- E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
- device_printf(adapter->dev,"TX(%d) desc avail = %d,"
- "Next TX to Clean = %d\n",
- txr->me, txr->tx_avail, txr->next_to_clean);
+ device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
+ txr->me);
+ em_print_debug_info(adapter);
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
adapter->watchdog_events++;
em_init_locked(adapter);
@@ -2370,7 +2419,7 @@ em_update_link_status(struct adapter *adapter)
(hw->mac.type == e1000_82572))) {
int tarc0;
tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
- tarc0 &= ~SPEED_MODE_BIT;
+ tarc0 &= ~TARC_SPEED_MODE_BIT;
E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
}
if (bootverbose)
@@ -2486,14 +2535,6 @@ em_allocate_pci_resources(struct adapter *adapter)
rman_get_bushandle(adapter->memory);
adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
- /* Default to a single queue */
- adapter->num_queues = 1;
-
- /*
- * Setup MSI/X or MSI if PCI Express
- */
- adapter->msix = em_setup_msix(adapter);
-
adapter->hw.back = &adapter->osdep;
return (0);
@@ -2568,13 +2609,14 @@ em_allocate_msix(struct adapter *adapter)
struct tx_ring *txr = adapter->tx_rings;
struct rx_ring *rxr = adapter->rx_rings;
int error, rid, vector = 0;
+ int cpu_id = 0;
/* Make sure all interrupts are disabled */
E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
/* First set up ring resources */
- for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
+ for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
/* RX ring */
rid = vector + 1;
@@ -2594,14 +2636,20 @@ em_allocate_msix(struct adapter *adapter)
return (error);
}
#if __FreeBSD_version >= 800504
- bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
+ bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
#endif
- rxr->msix = vector++; /* NOTE increment vector for TX */
+ rxr->msix = vector;
+
+ if (em_last_bind_cpu < 0)
+ em_last_bind_cpu = CPU_FIRST();
+ cpu_id = em_last_bind_cpu;
+ bus_bind_intr(dev, rxr->res, cpu_id);
+
TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
taskqueue_thread_enqueue, &rxr->tq);
- taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
- device_get_nameunit(adapter->dev));
+ taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
+ device_get_nameunit(adapter->dev), cpu_id);
/*
** Set the bit to enable interrupt
** in E1000_IMS -- bits 20 and 21
@@ -2609,8 +2657,13 @@ em_allocate_msix(struct adapter *adapter)
** NOTHING to do with the MSIX vector
*/
rxr->ims = 1 << (20 + i);
+ adapter->ims |= rxr->ims;
adapter->ivars |= (8 | rxr->msix) << (i * 4);
+ em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
+ }
+
+ for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
/* TX ring */
rid = vector + 1;
txr->res = bus_alloc_resource_any(dev,
@@ -2628,14 +2681,20 @@ em_allocate_msix(struct adapter *adapter)
return (error);
}
#if __FreeBSD_version >= 800504
- bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
+ bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
#endif
- txr->msix = vector++; /* Increment vector for next pass */
+ txr->msix = vector;
+
+ if (em_last_bind_cpu < 0)
+ em_last_bind_cpu = CPU_FIRST();
+ cpu_id = em_last_bind_cpu;
+ bus_bind_intr(dev, txr->res, cpu_id);
+
TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
taskqueue_thread_enqueue, &txr->tq);
- taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
- device_get_nameunit(adapter->dev));
+ taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
+ device_get_nameunit(adapter->dev), cpu_id);
/*
** Set the bit to enable interrupt
** in E1000_IMS -- bits 22 and 23
@@ -2643,13 +2702,16 @@ em_allocate_msix(struct adapter *adapter)
** NOTHING to do with the MSIX vector
*/
txr->ims = 1 << (22 + i);
+ adapter->ims |= txr->ims;
adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
+
+ em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
}
/* Link interrupt */
- ++rid;
+ rid = vector + 1;
adapter->res = bus_alloc_resource_any(dev,
- SYS_RES_IRQ, &rid, RF_ACTIVE);
+ SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
if (!adapter->res) {
device_printf(dev,"Unable to allocate "
"bus resource: Link interrupt [%d]\n", rid);
@@ -2665,7 +2727,7 @@ em_allocate_msix(struct adapter *adapter)
return (error);
}
#if __FreeBSD_version >= 800504
- bus_describe_intr(dev, adapter->res, adapter->tag, "link");
+ bus_describe_intr(dev, adapter->res, adapter->tag, "link");
#endif
adapter->linkvec = vector;
adapter->ivars |= (8 | vector) << 16;
@@ -2689,9 +2751,8 @@ em_free_pci_resources(struct adapter *adapter)
*/
for (int i = 0; i < adapter->num_queues; i++) {
txr = &adapter->tx_rings[i];
- rxr = &adapter->rx_rings[i];
/* an early abort? */
- if ((txr == NULL) || (rxr == NULL))
+ if (txr == NULL)
break;
rid = txr->msix +1;
if (txr->tag != NULL) {
@@ -2701,6 +2762,11 @@ em_free_pci_resources(struct adapter *adapter)
if (txr->res != NULL)
bus_release_resource(dev, SYS_RES_IRQ,
rid, txr->res);
+
+ rxr = &adapter->rx_rings[i];
+ /* an early abort? */
+ if (rxr == NULL)
+ break;
rid = rxr->msix +1;
if (rxr->tag != NULL) {
bus_teardown_intr(dev, rxr->res, rxr->tag);
@@ -2750,14 +2816,19 @@ em_setup_msix(struct adapter *adapter)
device_t dev = adapter->dev;
int val;
+ /* Nearly always going to use one queue */
+ adapter->num_queues = 1;
+
/*
- ** Setup MSI/X for Hartwell: tests have shown
- ** use of two queues to be unstable, and to
- ** provide no great gain anyway, so we simply
- ** seperate the interrupts and use a single queue.
+ ** Try using MSI-X for Hartwell adapters
*/
if ((adapter->hw.mac.type == e1000_82574) &&
(em_enable_msix == TRUE)) {
+#ifdef EM_MULTIQUEUE
+ adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
+ if (adapter->num_queues > 1)
+ em_enable_vectors_82574(adapter);
+#endif
/* Map the MSIX BAR */
int rid = PCIR_BAR(EM_MSIX_BAR);
adapter->msix_mem = bus_alloc_resource_any(dev,
@@ -2769,16 +2840,34 @@ em_setup_msix(struct adapter *adapter)
goto msi;
}
val = pci_msix_count(dev);
- /* We only need/want 3 vectors */
- if (val >= 3)
- val = 3;
- else {
- device_printf(adapter->dev,
- "MSIX: insufficient vectors, using MSI\n");
- goto msi;
+
+#ifdef EM_MULTIQUEUE
+ /* We need 5 vectors in the multiqueue case */
+ if (adapter->num_queues > 1 ) {
+ if (val >= 5)
+ val = 5;
+ else {
+ adapter->num_queues = 1;
+ device_printf(adapter->dev,
+ "Insufficient MSIX vectors for >1 queue, "
+ "using single queue...\n");
+ goto msix_one;
+ }
+ } else {
+msix_one:
+#endif
+ if (val >= 3)
+ val = 3;
+ else {
+ device_printf(adapter->dev,
+ "Insufficient MSIX vectors, using MSI\n");
+ goto msi;
+ }
+#ifdef EM_MULTIQUEUE
}
+#endif
- if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
+ if ((pci_alloc_msix(dev, &val) == 0)) {
device_printf(adapter->dev,
"Using MSIX interrupts "
"with %d vectors\n", val);
@@ -2799,7 +2888,7 @@ msi:
}
val = 1;
if (pci_alloc_msi(dev, &val) == 0) {
- device_printf(adapter->dev,"Using an MSI interrupt\n");
+ device_printf(adapter->dev, "Using an MSI interrupt\n");
return (val);
}
/* Should only happen due to manual configuration */
@@ -3441,7 +3530,7 @@ em_initialize_transmit_unit(struct adapter *adapter)
{
struct tx_ring *txr = adapter->tx_rings;
struct e1000_hw *hw = &adapter->hw;
- u32 tctl, tarc, tipg = 0;
+ u32 tctl, txdctl = 0, tarc, tipg = 0;
INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
@@ -3463,6 +3552,15 @@ em_initialize_transmit_unit(struct adapter *adapter)
E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
txr->busy = EM_TX_IDLE;
+ txdctl = 0; /* clear txdctl */
+ txdctl |= 0x1f; /* PTHRESH */
+ txdctl |= 1 << 8; /* HTHRESH */
+ txdctl |= 1 << 16;/* WTHRESH */
+ txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
+ txdctl |= E1000_TXDCTL_GRAN;
+ txdctl |= 1 << 25; /* LWTHRESH */
+
+ E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
}
/* Set the default values for the Tx Inter Packet Gap timer */
@@ -3493,15 +3591,25 @@ em_initialize_transmit_unit(struct adapter *adapter)
if ((adapter->hw.mac.type == e1000_82571) ||
(adapter->hw.mac.type == e1000_82572)) {
tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
- tarc |= SPEED_MODE_BIT;
+ tarc |= TARC_SPEED_MODE_BIT;
E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
} else if (adapter->hw.mac.type == e1000_80003es2lan) {
+ /* errata: program both queues to unweighted RR */
tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
tarc |= 1;
E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
tarc |= 1;
E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
+ } else if (adapter->hw.mac.type == e1000_82574) {
+ tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
+ tarc |= TARC_ERRATA_BIT;
+ if ( adapter->num_queues > 1) {
+ tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
+ E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
+ E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
+ } else
+ E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
}
adapter->txd_cmd = E1000_TXD_CMD_IFCS;
@@ -3932,8 +4040,9 @@ em_txeof(struct tx_ring *txr)
* TX lock which, with a single queue, guarantees
* sanity.
*/
- if (txr->tx_avail >= EM_MAX_SCATTER)
+ if (txr->tx_avail >= EM_MAX_SCATTER) {
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ }
/* Disable hang detection if all clean */
if (txr->tx_avail == adapter->num_tx_desc)
@@ -4305,6 +4414,9 @@ em_initialize_receive_unit(struct adapter *adapter)
E1000_WRITE_REG(&adapter->hw, E1000_RADV,
adapter->rx_abs_int_delay.value);
+
+ E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
+ adapter->rx_int_delay.value);
/*
* Set the interrupt throttling rate. Value is calculated
* as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
@@ -4316,20 +4428,65 @@ em_initialize_receive_unit(struct adapter *adapter)
** using the EITR register (82574 only)
*/
if (hw->mac.type == e1000_82574) {
+ u32 rfctl;
for (int i = 0; i < 4; i++)
E1000_WRITE_REG(hw, E1000_EITR_82574(i),
DEFAULT_ITR);
/* Disable accelerated acknowledge */
- E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
+ rfctl = E1000_READ_REG(hw, E1000_RFCTL);
+ rfctl |= E1000_RFCTL_ACK_DIS;
+ E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
}
rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
- if (ifp->if_capenable & IFCAP_RXCSUM)
+ if (ifp->if_capenable & IFCAP_RXCSUM) {
+#ifdef EM_MULTIQUEUE
+ rxcsum |= E1000_RXCSUM_TUOFL |
+ E1000_RXCSUM_IPOFL |
+ E1000_RXCSUM_PCSD;
+#else
rxcsum |= E1000_RXCSUM_TUOFL;
- else
+#endif
+ } else
rxcsum &= ~E1000_RXCSUM_TUOFL;
+
E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
+#ifdef EM_MULTIQUEUE
+ if (adapter->num_queues > 1) {
+ uint32_t rss_key[10];
+ uint32_t reta;
+ int i;
+
+ /*
+ * Configure RSS key
+ */
+ arc4rand(rss_key, sizeof(rss_key), 0);
+ for (i = 0; i < 10; ++i)
+ E1000_WRITE_REG_ARRAY(hw,E1000_RSSRK(0), i, rss_key[i]);
+
+ /*
+ * Configure RSS redirect table in following fashion:
+ * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
+ */
+ reta = 0;
+ for (i = 0; i < 4; ++i) {
+ uint32_t q;
+ q = (i % adapter->num_queues) << 7;
+ reta |= q << (8 * i);
+ }
+ for (i = 0; i < 32; ++i)
+ E1000_WRITE_REG(hw, E1000_RETA(i), reta);
+
+ E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
+ E1000_MRQC_RSS_FIELD_IPV4_TCP |
+ E1000_MRQC_RSS_FIELD_IPV4 |
+ E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
+ E1000_MRQC_RSS_FIELD_IPV6_EX |
+ E1000_MRQC_RSS_FIELD_IPV6 |
+ E1000_MRQC_RSS_FIELD_IPV6_TCP);
+ }
+#endif
/*
** XXX TEMPORARY WORKAROUND: on some systems with 82573
** long latencies are observed, like Lenovo X60. This
@@ -4362,13 +4519,30 @@ em_initialize_receive_unit(struct adapter *adapter)
E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
}
- /* Set PTHRESH for improved jumbo performance */
+ /*
+ * Set PTHRESH for improved jumbo performance
+ * According to 10.2.5.11 of Intel 82574 Datasheet,
+ * RXDCTL(1) is written whenever RXDCTL(0) is written.
+ * Only write to RXDCTL(1) if there is a need for different
+ * settings.
+ */
if (((adapter->hw.mac.type == e1000_ich9lan) ||
(adapter->hw.mac.type == e1000_pch2lan) ||
(adapter->hw.mac.type == e1000_ich10lan)) &&
(ifp->if_mtu > ETHERMTU)) {
u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
+ } else if ((adapter->hw.mac.type == e1000_82574) &&
+ (ifp->if_mtu > ETHERMTU)) {
+ for (int i = 0; i < adapter->num_queues; i++) {
+ u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
+
+ rxdctl |= 0x20; /* PTHRESH */
+ rxdctl |= 4 << 8; /* HTHRESH */
+ rxdctl |= 4 << 16;/* WTHRESH */
+ rxdctl |= 1 << 24; /* Switch to granularity */
+ E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
+ }
}
if (adapter->hw.mac.type >= e1000_pch2lan) {
@@ -4435,6 +4609,11 @@ em_rxeof(struct rx_ring *rxr, int count, int *done)
EM_RX_LOCK(rxr);
+ /* Sync the ring */
+ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
+
#ifdef DEV_NETMAP
if (netmap_rx_irq(ifp, rxr->me, &processed)) {
EM_RX_UNLOCK(rxr);
@@ -4447,9 +4626,6 @@ em_rxeof(struct rx_ring *rxr, int count, int *done)
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;
- bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
- BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
-
cur = &rxr->rx_base[i];
status = cur->status;
mp = sendmp = NULL;
@@ -4515,6 +4691,10 @@ skip:
rxr->fmp = rxr->lmp = NULL;
}
next_desc:
+ /* Sync the ring */
+ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+
/* Zero out the receive descriptors status. */
cur->status = 0;
++rxdone; /* cumulative for POLL */
@@ -5327,10 +5507,10 @@ em_add_hw_stats(struct adapter *adapter)
CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
"Flow Control Low Watermark");
- for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
- snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
+ for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
+ snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
- CTLFLAG_RD, NULL, "Queue Name");
+ CTLFLAG_RD, NULL, "TX Queue Name");
queue_list = SYSCTL_CHILDREN(queue_node);
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
@@ -5349,7 +5529,12 @@ em_add_hw_stats(struct adapter *adapter)
SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
CTLFLAG_RD, &txr->no_desc_avail,
"Queue No Descriptor Available");
-
+
+ snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
+ queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
+ CTLFLAG_RD, NULL, "RX Queue Name");
+ queue_list = SYSCTL_CHILDREN(queue_node);
+
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
CTLTYPE_UINT | CTLFLAG_RD, adapter,
E1000_RDH(rxr->me),
@@ -5783,19 +5968,86 @@ em_print_debug_info(struct adapter *adapter)
else
printf("and ACTIVE\n");
- device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
- E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
- E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
- device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
- E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
- E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
- device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
- device_printf(dev, "TX descriptors avail = %d\n",
- txr->tx_avail);
- device_printf(dev, "Tx Descriptors avail failure = %ld\n",
- txr->no_desc_avail);
- device_printf(dev, "RX discarded packets = %ld\n",
- rxr->rx_discarded);
- device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
- device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
+ for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
+ device_printf(dev, "TX Queue %d ------\n", i);
+ device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
+ E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
+ E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
+ device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
+ device_printf(dev, "TX descriptors avail = %d\n",
+ txr->tx_avail);
+ device_printf(dev, "Tx Descriptors avail failure = %ld\n",
+ txr->no_desc_avail);
+ device_printf(dev, "RX Queue %d ------\n", i);
+ device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
+ E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
+ E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
+ device_printf(dev, "RX discarded packets = %ld\n",
+ rxr->rx_discarded);
+ device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
+ device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
+ }
+}
+
+#ifdef EM_MULTIQUEUE
+/*
+ * 82574 only:
+ * Write a new value to the EEPROM increasing the number of MSIX
+ * vectors from 3 to 5, for proper multiqueue support.
+ */
+static void
+em_enable_vectors_82574(struct adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ device_t dev = adapter->dev;
+ u16 edata;
+
+ e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
+ printf("Current cap: %#06x\n", edata);
+ if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
+ device_printf(dev, "Writing to eeprom: increasing "
+ "reported MSIX vectors from 3 to 5...\n");
+ edata &= ~(EM_NVM_MSIX_N_MASK);
+ edata |= 4 << EM_NVM_MSIX_N_SHIFT;
+ e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
+ e1000_update_nvm_checksum(hw);
+ device_printf(dev, "Writing to eeprom: done\n");
+ }
+}
+#endif
+
+#ifdef DDB
+DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
+{
+ devclass_t dc;
+ int max_em;
+
+ dc = devclass_find("em");
+ max_em = devclass_get_maxunit(dc);
+
+ for (int index = 0; index < (max_em - 1); index++) {
+ device_t dev;
+ dev = devclass_get_device(dc, index);
+ if (device_get_driver(dev) == &em_driver) {
+ struct adapter *adapter = device_get_softc(dev);
+ em_init_locked(adapter);
+ }
+ }
+}
+DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
+{
+ devclass_t dc;
+ int max_em;
+
+ dc = devclass_find("em");
+ max_em = devclass_get_maxunit(dc);
+
+ for (int index = 0; index < (max_em - 1); index++) {
+ device_t dev;
+ dev = devclass_get_device(dc, index);
+ if (device_get_driver(dev) == &em_driver)
+ em_print_debug_info(device_get_softc(dev));
+ }
+
}
+#endif
OpenPOWER on IntegriCloud