summaryrefslogtreecommitdiffstats
path: root/sys/dev/e1000/if_igb.c
diff options
context:
space:
mode:
authorsbruno <sbruno@FreeBSD.org>2012-01-04 02:01:27 +0000
committersbruno <sbruno@FreeBSD.org>2012-01-04 02:01:27 +0000
commit39d07b056ff7ed4a2cd65691be23d3c93398b881 (patch)
tree5207a81db7d21800f965bb648d0ff2001f7331ca /sys/dev/e1000/if_igb.c
parentd08191b4175ebda3e5ac2fabbe62e2bdf139a201 (diff)
parentcaa4548474a54b104b7a14a1625ef6c39b848dd3 (diff)
downloadFreeBSD-src-39d07b056ff7ed4a2cd65691be23d3c93398b881.zip
FreeBSD-src-39d07b056ff7ed4a2cd65691be23d3c93398b881.tar.gz
IFC to head to catch up the bhyve branch
Approved by: grehan@
Diffstat (limited to 'sys/dev/e1000/if_igb.c')
-rw-r--r--sys/dev/e1000/if_igb.c593
1 files changed, 365 insertions, 228 deletions
diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c
index 4ae4204..1380c3b 100644
--- a/sys/dev/e1000/if_igb.c
+++ b/sys/dev/e1000/if_igb.c
@@ -100,7 +100,7 @@ int igb_display_debug_stats = 0;
/*********************************************************************
* Driver version:
*********************************************************************/
-char igb_driver_version[] = "version - 2.2.5";
+char igb_driver_version[] = "version - 2.3.1";
/*********************************************************************
@@ -171,15 +171,13 @@ static int igb_detach(device_t);
static int igb_shutdown(device_t);
static int igb_suspend(device_t);
static int igb_resume(device_t);
+static void igb_start(struct ifnet *);
+static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
#if __FreeBSD_version >= 800000
static int igb_mq_start(struct ifnet *, struct mbuf *);
static int igb_mq_start_locked(struct ifnet *,
struct tx_ring *, struct mbuf *);
static void igb_qflush(struct ifnet *);
-static void igb_deferred_mq_start(void *, int);
-#else
-static void igb_start(struct ifnet *);
-static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
#endif
static int igb_ioctl(struct ifnet *, u_long, caddr_t);
static void igb_init(void *);
@@ -225,8 +223,9 @@ static __inline void igb_rx_input(struct rx_ring *,
static bool igb_rxeof(struct igb_queue *, int, int *);
static void igb_rx_checksum(u32, struct mbuf *, u32);
-static int igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
-static bool igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
+static bool igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
+static bool igb_tso_setup(struct tx_ring *, struct mbuf *, int,
+ struct ip *, struct tcphdr *);
static void igb_set_promisc(struct adapter *);
static void igb_disable_promisc(struct adapter *);
static void igb_set_multi(struct adapter *);
@@ -348,7 +347,7 @@ SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
** into the header and thus use no cluster. Its
** a very workload dependent type feature.
*/
-static bool igb_header_split = FALSE;
+static int igb_header_split = FALSE;
TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
"Enable receive mbuf header split");
@@ -369,6 +368,9 @@ SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
&igb_rx_process_limit, 0,
"Maximum number of received packets to process at a time, -1 means unlimited");
+#ifdef DEV_NETMAP /* see ixgbe.c for details */
+#include <dev/netmap/if_igb_netmap.h>
+#endif /* DEV_NETMAP */
/*********************************************************************
* Device identification routine
*
@@ -611,16 +613,6 @@ igb_attach(device_t dev)
goto err_late;
}
- /*
- ** Configure Interrupts
- */
- if ((adapter->msix > 1) && (igb_enable_msix))
- error = igb_allocate_msix(adapter);
- else /* MSI or Legacy */
- error = igb_allocate_legacy(adapter);
- if (error)
- goto err_late;
-
/* Setup OS specific network interface */
if (igb_setup_interface(dev, adapter) != 0)
goto err_late;
@@ -659,11 +651,25 @@ igb_attach(device_t dev)
igb_add_hw_stats(adapter);
/* Tell the stack that the interface is not active */
- adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+ adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
adapter->led_dev = led_create(igb_led_func, adapter,
device_get_nameunit(dev));
+ /*
+ ** Configure Interrupts
+ */
+ if ((adapter->msix > 1) && (igb_enable_msix))
+ error = igb_allocate_msix(adapter);
+ else /* MSI or Legacy */
+ error = igb_allocate_legacy(adapter);
+ if (error)
+ goto err_late;
+
+#ifdef DEV_NETMAP
+ igb_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
INIT_DEBUGOUT("igb_attach: end");
return (0);
@@ -673,10 +679,10 @@ err_late:
igb_free_transmit_structures(adapter);
igb_free_receive_structures(adapter);
igb_release_hw_control(adapter);
- if (adapter->ifp != NULL)
- if_free(adapter->ifp);
err_pci:
igb_free_pci_resources(adapter);
+ if (adapter->ifp != NULL)
+ if_free(adapter->ifp);
free(adapter->mta, M_DEVBUF);
IGB_CORE_LOCK_DESTROY(adapter);
@@ -707,8 +713,6 @@ igb_detach(device_t dev)
return (EBUSY);
}
- ether_ifdetach(adapter->ifp);
-
if (adapter->led_dev != NULL)
led_destroy(adapter->led_dev);
@@ -740,8 +744,13 @@ igb_detach(device_t dev)
if (adapter->vlan_detach != NULL)
EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
+ ether_ifdetach(adapter->ifp);
+
callout_drain(&adapter->timer);
+#ifdef DEV_NETMAP
+ netmap_detach(adapter->ifp);
+#endif /* DEV_NETMAP */
igb_free_pci_resources(adapter);
bus_generic_detach(dev);
if_free(ifp);
@@ -799,27 +808,14 @@ igb_resume(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
struct ifnet *ifp = adapter->ifp;
-#if __FreeBSD_version >= 800000
- struct tx_ring *txr = adapter->tx_rings;
-#endif
IGB_CORE_LOCK(adapter);
igb_init_locked(adapter);
igb_init_manageability(adapter);
if ((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
-#if __FreeBSD_version < 800000
+ (ifp->if_drv_flags & IFF_DRV_RUNNING))
igb_start(ifp);
-#else
- for (int i = 0; i < adapter->num_queues; i++, txr++) {
- IGB_TX_LOCK(txr);
- if (!drbr_empty(ifp, txr->br))
- igb_mq_start_locked(ifp, txr, NULL);
- IGB_TX_UNLOCK(txr);
- }
-#endif
- }
IGB_CORE_UNLOCK(adapter);
@@ -827,7 +823,6 @@ igb_resume(device_t dev)
}
-#if __FreeBSD_version < 800000
/*********************************************************************
* Transmit entry point
*
@@ -858,7 +853,7 @@ igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
if (txr->tx_avail <= IGB_MAX_SCATTER) {
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ txr->queue_status |= IGB_QUEUE_DEPLETED;
break;
}
IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
@@ -869,10 +864,10 @@ igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
* NULL on failure. In that event, we can't requeue.
*/
if (igb_xmit(txr, &m_head)) {
- if (m_head == NULL)
- break;
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+ if (m_head != NULL)
+ IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+ if (txr->tx_avail <= IGB_MAX_SCATTER)
+ txr->queue_status |= IGB_QUEUE_DEPLETED;
break;
}
@@ -881,7 +876,7 @@ igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
/* Set watchdog on */
txr->watchdog_time = ticks;
- txr->queue_status = IGB_QUEUE_WORKING;
+ txr->queue_status |= IGB_QUEUE_WORKING;
}
}
@@ -904,7 +899,7 @@ igb_start(struct ifnet *ifp)
return;
}
-#else /* __FreeBSD_version >= 800000 */
+#if __FreeBSD_version >= 800000
/*
** Multiqueue Transmit driver
**
@@ -915,21 +910,25 @@ igb_mq_start(struct ifnet *ifp, struct mbuf *m)
struct adapter *adapter = ifp->if_softc;
struct igb_queue *que;
struct tx_ring *txr;
- int i = 0, err = 0;
+ int i, err = 0;
+ bool moveable = TRUE;
/* Which queue to use */
- if ((m->m_flags & M_FLOWID) != 0)
+ if ((m->m_flags & M_FLOWID) != 0) {
i = m->m_pkthdr.flowid % adapter->num_queues;
+ moveable = FALSE;
+ } else
+ i = curcpu % adapter->num_queues;
txr = &adapter->tx_rings[i];
que = &adapter->queues[i];
-
- if (IGB_TX_TRYLOCK(txr)) {
+ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
+ IGB_TX_TRYLOCK(txr)) {
err = igb_mq_start_locked(ifp, txr, m);
IGB_TX_UNLOCK(txr);
} else {
err = drbr_enqueue(ifp, txr->br, m);
- taskqueue_enqueue(que->tq, &txr->txq_task);
+ taskqueue_enqueue(que->tq, &que->que_task);
}
return (err);
@@ -944,8 +943,9 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
IGB_TX_LOCK_ASSERT(txr);
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
- IFF_DRV_RUNNING || adapter->link_active == 0) {
+ if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
+ (txr->queue_status == IGB_QUEUE_DEPLETED) ||
+ adapter->link_active == 0) {
if (m != NULL)
err = drbr_enqueue(ifp, txr->br, m);
return (err);
@@ -973,39 +973,21 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
ETHER_BPF_MTAP(ifp, next);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;
- if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
- igb_txeof(txr);
- if (txr->tx_avail <= IGB_MAX_SCATTER) {
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- break;
- }
next = drbr_dequeue(ifp, txr->br);
}
if (enq > 0) {
/* Set the watchdog */
- txr->queue_status = IGB_QUEUE_WORKING;
+ txr->queue_status |= IGB_QUEUE_WORKING;
txr->watchdog_time = ticks;
}
+ if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
+ igb_txeof(txr);
+ if (txr->tx_avail <= IGB_MAX_SCATTER)
+ txr->queue_status |= IGB_QUEUE_DEPLETED;
return (err);
}
/*
- * Called from a taskqueue to drain queued transmit packets.
- */
-static void
-igb_deferred_mq_start(void *arg, int pending)
-{
- struct tx_ring *txr = arg;
- struct adapter *adapter = txr->adapter;
- struct ifnet *ifp = adapter->ifp;
-
- IGB_TX_LOCK(txr);
- if (!drbr_empty(ifp, txr->br))
- igb_mq_start_locked(ifp, txr, NULL);
- IGB_TX_UNLOCK(txr);
-}
-
-/*
** Flush all ring buffers
*/
static void
@@ -1023,7 +1005,7 @@ igb_qflush(struct ifnet *ifp)
}
if_qflush(ifp);
}
-#endif /* __FreeBSD_version < 800000 */
+#endif /* __FreeBSD_version >= 800000 */
/*********************************************************************
* Ioctl entry point
@@ -1129,11 +1111,6 @@ igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
}
break;
case SIOCSIFMEDIA:
- /*
- ** As the speed/duplex settings are being
- ** changed, we need toreset the PHY.
- */
- adapter->hw.phy.reset_disable = FALSE;
/* Check SOL/IDER usage */
IGB_CORE_LOCK(adapter);
if (e1000_check_reset_block(&adapter->hw)) {
@@ -1324,10 +1301,8 @@ igb_init_locked(struct adapter *adapter)
}
/* Set Energy Efficient Ethernet */
- e1000_set_eee_i350(&adapter->hw);
- /* Don't reset the phy next time init gets called */
- adapter->hw.phy.reset_disable = TRUE;
+ e1000_set_eee_i350(&adapter->hw);
}
static void
@@ -1352,18 +1327,21 @@ igb_handle_que(void *context, int pending)
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
bool more;
- more = igb_rxeof(que, -1, NULL);
+ more = igb_rxeof(que, adapter->rx_process_limit, NULL);
IGB_TX_LOCK(txr);
if (igb_txeof(txr))
more = TRUE;
#if __FreeBSD_version >= 800000
- if (!drbr_empty(ifp, txr->br))
+ /* Process the stack queue only if not depleted */
+ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
+ !drbr_empty(ifp, txr->br))
igb_mq_start_locked(ifp, txr, NULL);
#else
igb_start_locked(txr, ifp);
#endif
IGB_TX_UNLOCK(txr);
+ /* Do we need another? */
if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
taskqueue_enqueue(que->tq, &que->que_task);
return;
@@ -1494,7 +1472,7 @@ igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
/*********************************************************************
*
- * MSIX TX Interrupt Service routine
+ * MSIX Que Interrupt Service routine
*
**********************************************************************/
static void
@@ -1569,8 +1547,7 @@ igb_msix_que(void *arg)
no_calc:
/* Schedule a clean task if needed*/
- if (more_tx || more_rx ||
- (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE))
+ if (more_tx || more_rx)
taskqueue_enqueue(que->tq, &que->que_task);
else
/* Reenable this interrupt */
@@ -1718,10 +1695,8 @@ igb_media_change(struct ifnet *ifp)
/*********************************************************************
*
* This routine maps the mbufs to Advanced TX descriptors.
- * used by the 82575 adapter.
*
**********************************************************************/
-
static int
igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
{
@@ -1730,22 +1705,123 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
bus_dmamap_t map;
struct igb_tx_buffer *tx_buffer, *tx_buffer_mapped;
union e1000_adv_tx_desc *txd = NULL;
- struct mbuf *m_head;
- u32 olinfo_status = 0, cmd_type_len = 0;
- int nsegs, i, j, error, first, last = 0;
- u32 hdrlen = 0;
-
- m_head = *m_headp;
-
+ struct mbuf *m_head = *m_headp;
+ struct ether_vlan_header *eh = NULL;
+ struct ip *ip = NULL;
+ struct tcphdr *th = NULL;
+ u32 hdrlen, cmd_type_len, olinfo_status = 0;
+ int ehdrlen, poff;
+ int nsegs, i, first, last = 0;
+ int error, do_tso, remap = 1;
/* Set basic descriptor constants */
- cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
+ cmd_type_len = E1000_ADVTXD_DTYP_DATA;
cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
if (m_head->m_flags & M_VLANTAG)
cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
+retry:
+ m_head = *m_headp;
+ do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
+ hdrlen = ehdrlen = poff = 0;
+
/*
- * Map the packet for DMA.
+ * Intel recommends entire IP/TCP header length reside in a single
+ * buffer. If multiple descriptors are used to describe the IP and
+ * TCP header, each descriptor should describe one or more
+ * complete headers; descriptors referencing only parts of headers
+ * are not supported. If all layer headers are not coalesced into
+ * a single buffer, each buffer should not cross a 4KB boundary,
+ * or be larger than the maximum read request size.
+ * Controller also requires modifing IP/TCP header to make TSO work
+ * so we firstly get a writable mbuf chain then coalesce ethernet/
+ * IP/TCP header into a single buffer to meet the requirement of
+ * controller. This also simplifies IP/TCP/UDP checksum offloading
+ * which also has similiar restrictions.
+ */
+ if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
+ if (do_tso || (m_head->m_next != NULL &&
+ m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
+ if (M_WRITABLE(*m_headp) == 0) {
+ m_head = m_dup(*m_headp, M_DONTWAIT);
+ m_freem(*m_headp);
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ *m_headp = m_head;
+ }
+ }
+ /*
+ * Assume IPv4, we don't have TSO/checksum offload support
+ * for IPv6 yet.
+ */
+ ehdrlen = sizeof(struct ether_header);
+ m_head = m_pullup(m_head, ehdrlen);
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ eh = mtod(m_head, struct ether_vlan_header *);
+ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
+ ehdrlen = sizeof(struct ether_vlan_header);
+ m_head = m_pullup(m_head, ehdrlen);
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ }
+ m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
+ poff = ehdrlen + (ip->ip_hl << 2);
+ if (do_tso) {
+ m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ /*
+ * The pseudo TCP checksum does not include TCP payload
+ * length so driver should recompute the checksum here
+ * what hardware expect to see. This is adherence of
+ * Microsoft's Large Send specification.
+ */
+ th = (struct tcphdr *)(mtod(m_head, char *) + poff);
+ th->th_sum = in_pseudo(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr, htons(IPPROTO_TCP));
+ /* Keep track of the full header length */
+ hdrlen = poff + (th->th_off << 2);
+ } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
+ m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ th = (struct tcphdr *)(mtod(m_head, char *) + poff);
+ m_head = m_pullup(m_head, poff + (th->th_off << 2));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
+ th = (struct tcphdr *)(mtod(m_head, char *) + poff);
+ } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
+ m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
+ }
+ *m_headp = m_head;
+ }
+
+ /*
+ * Map the packet for DMA
*
* Capture the first descriptor index,
* this descriptor will have the index
@@ -1760,7 +1836,16 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
*m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
- if (error == EFBIG) {
+ /*
+ * There are two types of errors we can (try) to handle:
+ * - EFBIG means the mbuf chain was too long and bus_dma ran
+ * out of segments. Defragment the mbuf chain and try again.
+ * - ENOMEM means bus_dma could not obtain enough bounce buffers
+ * at this point in time. Defer sending and try again later.
+ * All other errors, in particular EINVAL, are fatal and prevent the
+ * mbuf chain from ever going through. Drop it and report error.
+ */
+ if (error == EFBIG && remap) {
struct mbuf *m;
m = m_defrag(*m_headp, M_DONTWAIT);
@@ -1772,19 +1857,9 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
}
*m_headp = m;
- /* Try it again */
- error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
- *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
-
- if (error == ENOMEM) {
- adapter->no_tx_dma_setup++;
- return (error);
- } else if (error != 0) {
- adapter->no_tx_dma_setup++;
- m_freem(*m_headp);
- *m_headp = NULL;
- return (error);
- }
+ /* Try it again, but only once */
+ remap = 0;
+ goto retry;
} else if (error == ENOMEM) {
adapter->no_tx_dma_setup++;
return (error);
@@ -1795,29 +1870,35 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
return (error);
}
- /* Check again to be sure we have enough descriptors */
- if (nsegs > (txr->tx_avail - 2)) {
+ /*
+ ** Make sure we don't overrun the ring,
+ ** we need nsegs descriptors and one for
+ ** the context descriptor used for the
+ ** offloads.
+ */
+ if ((nsegs + 1) > (txr->tx_avail - 2)) {
txr->no_desc_avail++;
bus_dmamap_unload(txr->txtag, map);
return (ENOBUFS);
}
m_head = *m_headp;
- /*
- * Set up the context descriptor:
- * used when any hardware offload is done.
- * This includes CSUM, VLAN, and TSO. It
- * will use the first descriptor.
+ /* Do hardware assists:
+ * Set up the context descriptor, used
+ * when any hardware offload is done.
+ * This includes CSUM, VLAN, and TSO.
+ * It will use the first descriptor.
*/
- if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
- if (igb_tso_setup(txr, m_head, &hdrlen)) {
+
+ if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
+ if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
} else
- return (ENXIO);
+ return (ENXIO);
} else if (igb_tx_ctx_setup(txr, m_head))
- olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
+ olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
/* Calculate payload length */
olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
@@ -1829,7 +1910,7 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
/* Set up our transmit descriptors */
i = txr->next_avail_desc;
- for (j = 0; j < nsegs; j++) {
+ for (int j = 0; j < nsegs; j++) {
bus_size_t seg_len;
bus_addr_t seg_addr;
@@ -1850,8 +1931,14 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
txr->next_avail_desc = i;
txr->tx_avail -= nsegs;
-
tx_buffer->m_head = m_head;
+
+ /*
+ ** Here we swap the map so the last descriptor,
+ ** which gets the completion interrupt has the
+ ** real map, and the first descriptor gets the
+ ** unused map from this descriptor.
+ */
tx_buffer_mapped->map = tx_buffer->map;
tx_buffer->map = map;
bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
@@ -1869,6 +1956,7 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
*/
tx_buffer = &txr->tx_buffers[first];
tx_buffer->next_eop = last;
+ /* Update the watchdog time early and often */
txr->watchdog_time = ticks;
/*
@@ -1881,9 +1969,7 @@ igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
++txr->tx_packets;
return (0);
-
}
-
static void
igb_set_promisc(struct adapter *adapter)
{
@@ -1990,7 +2076,10 @@ igb_local_timer(void *arg)
{
struct adapter *adapter = arg;
device_t dev = adapter->dev;
+ struct ifnet *ifp = adapter->ifp;
struct tx_ring *txr = adapter->tx_rings;
+ struct igb_queue *que = adapter->queues;
+ int hung = 0, busy = 0;
IGB_CORE_LOCK_ASSERT(adapter);
@@ -1998,22 +2087,29 @@ igb_local_timer(void *arg)
igb_update_link_status(adapter);
igb_update_stats_counters(adapter);
- /*
- ** If flow control has paused us since last checking
- ** it invalidates the watchdog timing, so dont run it.
- */
- if (adapter->pause_frames) {
- adapter->pause_frames = 0;
- goto out;
- }
-
/*
- ** Watchdog: check for time since any descriptor was cleaned
+ ** Check the TX queues status
+ ** - central locked handling of OACTIVE
+ ** - watchdog only if all queues show hung
*/
- for (int i = 0; i < adapter->num_queues; i++, txr++)
- if (txr->queue_status == IGB_QUEUE_HUNG)
- goto timeout;
-out:
+ for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
+ if ((txr->queue_status & IGB_QUEUE_HUNG) &&
+ (adapter->pause_frames == 0))
+ ++hung;
+ if (txr->queue_status & IGB_QUEUE_DEPLETED)
+ ++busy;
+ if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
+ taskqueue_enqueue(que->tq, &que->que_task);
+ }
+ if (hung == adapter->num_queues)
+ goto timeout;
+ if (busy == adapter->num_queues)
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
+ (busy < adapter->num_queues))
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+
+ adapter->pause_frames = 0;
callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
#ifndef DEVICE_POLLING
/* Schedule all queue interrupts - deadlock protection */
@@ -2106,7 +2202,7 @@ igb_update_link_status(struct adapter *adapter)
adapter->link_active = 0;
/* This can sleep */
if_link_state_change(ifp, LINK_STATE_DOWN);
- /* Turn off watchdogs */
+ /* Reset queue state */
for (int i = 0; i < adapter->num_queues; i++, txr++)
txr->queue_status = IGB_QUEUE_IDLE;
}
@@ -2135,9 +2231,10 @@ igb_stop(void *arg)
callout_stop(&adapter->timer);
/* Tell the stack that the interface is no longer active */
- ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- /* Unarm watchdog timer. */
+ /* Disarm watchdog timer. */
for (int i = 0; i < adapter->num_queues; i++, txr++) {
IGB_TX_LOCK(txr);
txr->queue_status = IGB_QUEUE_IDLE;
@@ -2232,7 +2329,6 @@ igb_allocate_legacy(struct adapter *adapter)
{
device_t dev = adapter->dev;
struct igb_queue *que = adapter->queues;
- struct tx_ring *txr = adapter->tx_rings;
int error, rid = 0;
/* Turn off all interrupts */
@@ -2251,10 +2347,6 @@ igb_allocate_legacy(struct adapter *adapter)
return (ENXIO);
}
-#if __FreeBSD_version >= 800000
- TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
-#endif
-
/*
* Try allocating a fast interrupt and the associated deferred
* processing contexts.
@@ -2292,6 +2384,9 @@ igb_allocate_msix(struct adapter *adapter)
struct igb_queue *que = adapter->queues;
int error, rid, vector = 0;
+ /* Be sure to start with all interrupts disabled */
+ E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
+ E1000_WRITE_FLUSH(&adapter->hw);
for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
rid = vector +1;
@@ -2325,13 +2420,9 @@ igb_allocate_msix(struct adapter *adapter)
*/
if (adapter->num_queues > 1)
bus_bind_intr(dev, que->res, i);
-#if __FreeBSD_version >= 800000
- TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
- que->txr);
-#endif
/* Make tasklet for deferred handling */
TASK_INIT(&que->que_task, 0, igb_handle_que, que);
- que->tq = taskqueue_create("igb_que", M_NOWAIT,
+ que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
taskqueue_thread_enqueue, &que->tq);
taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
device_get_nameunit(adapter->dev));
@@ -2538,24 +2629,13 @@ igb_free_pci_resources(struct adapter *adapter)
else
(adapter->msix != 0) ? (rid = 1):(rid = 0);
- que = adapter->queues;
if (adapter->tag != NULL) {
- taskqueue_drain(que->tq, &adapter->link_task);
bus_teardown_intr(dev, adapter->res, adapter->tag);
adapter->tag = NULL;
}
if (adapter->res != NULL)
bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
- for (int i = 0; i < adapter->num_queues; i++, que++) {
- if (que->tq != NULL) {
-#if __FreeBSD_version >= 800000
- taskqueue_drain(que->tq, &que->txr->txq_task);
-#endif
- taskqueue_drain(que->tq, &que->que_task);
- taskqueue_free(que->tq);
- }
- }
mem:
if (adapter->msix)
pci_release_msi(dev);
@@ -2686,7 +2766,6 @@ igb_reset(struct adapter *adapter)
pba = E1000_READ_REG(hw, E1000_RXPBS);
pba = e1000_rxpbs_adjust_82580(pba);
break;
- pba = E1000_PBA_35K;
default:
break;
}
@@ -2741,12 +2820,10 @@ igb_reset(struct adapter *adapter)
fc->pause_time = IGB_FC_PAUSE_TIME;
fc->send_xon = TRUE;
- if (fc->requested_mode)
- fc->current_mode = fc->requested_mode;
+ if (adapter->fc)
+ fc->requested_mode = adapter->fc;
else
- fc->current_mode = e1000_fc_full;
-
- adapter->fc = fc->current_mode;
+ fc->requested_mode = e1000_fc_default;
/* Issue a global reset */
e1000_reset_hw(hw);
@@ -2827,11 +2904,10 @@ igb_setup_interface(device_t dev, struct adapter *adapter)
ifp->if_softc = adapter;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = igb_ioctl;
+ ifp->if_start = igb_start;
#if __FreeBSD_version >= 800000
ifp->if_transmit = igb_mq_start;
ifp->if_qflush = igb_qflush;
-#else
- ifp->if_start = igb_start;
#endif
IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
@@ -3212,9 +3288,16 @@ igb_setup_transmit_ring(struct tx_ring *txr)
struct adapter *adapter = txr->adapter;
struct igb_tx_buffer *txbuf;
int i;
+#ifdef DEV_NETMAP
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_slot *slot;
+#endif /* DEV_NETMAP */
/* Clear the old descriptor contents */
IGB_TX_LOCK(txr);
+#ifdef DEV_NETMAP
+ slot = netmap_reset(na, NR_TX, txr->me, 0);
+#endif /* DEV_NETMAP */
bzero((void *)txr->tx_base,
(sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
/* Reset indices */
@@ -3231,6 +3314,17 @@ igb_setup_transmit_ring(struct tx_ring *txr)
m_freem(txbuf->m_head);
txbuf->m_head = NULL;
}
+#ifdef DEV_NETMAP
+ if (slot) {
+ /* slot si is mapped to the i-th NIC-ring entry */
+ int si = i + na->tx_rings[txr->me].nkr_hwofs;
+
+ if (si < 0)
+ si += na->num_tx_desc;
+ netmap_load_map(txr->txtag, txbuf->map,
+ NMB(slot + si), na->buff_size);
+ }
+#endif /* DEV_NETMAP */
/* clear the watch index */
txbuf->next_eop = -1;
}
@@ -3396,8 +3490,9 @@ igb_free_transmit_buffers(struct tx_ring *txr)
* Setup work for hardware segmentation offload (TSO)
*
**********************************************************************/
-static boolean_t
-igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
+static bool
+igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
+ struct ip *ip, struct tcphdr *th)
{
struct adapter *adapter = txr->adapter;
struct e1000_adv_tx_context_desc *TXD;
@@ -3405,45 +3500,15 @@ igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
u32 mss_l4len_idx = 0;
u16 vtag = 0;
- int ctxd, ehdrlen, ip_hlen, tcp_hlen;
- struct ether_vlan_header *eh;
- struct ip *ip;
- struct tcphdr *th;
-
-
- /*
- * Determine where frame payload starts.
- * Jump over vlan headers if already present
- */
- eh = mtod(mp, struct ether_vlan_header *);
- if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
- ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
- else
- ehdrlen = ETHER_HDR_LEN;
-
- /* Ensure we have at least the IP+TCP header in the first mbuf. */
- if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
- return FALSE;
+ int ctxd, ip_hlen, tcp_hlen;
- /* Only supports IPV4 for now */
ctxd = txr->next_avail_desc;
tx_buffer = &txr->tx_buffers[ctxd];
TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
- ip = (struct ip *)(mp->m_data + ehdrlen);
- if (ip->ip_p != IPPROTO_TCP)
- return FALSE; /* 0 */
ip->ip_sum = 0;
ip_hlen = ip->ip_hl << 2;
- th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
- th->th_sum = in_pseudo(ip->ip_src.s_addr,
- ip->ip_dst.s_addr, htons(IPPROTO_TCP));
tcp_hlen = th->th_off << 2;
- /*
- * Calculate header length, this is used
- * in the transmit desc in igb_xmit
- */
- *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
/* VLAN MACLEN IPLEN */
if (mp->m_flags & M_VLANTAG) {
@@ -3626,6 +3691,19 @@ igb_txeof(struct tx_ring *txr)
IGB_TX_LOCK_ASSERT(txr);
+#ifdef DEV_NETMAP
+ if (ifp->if_capenable & IFCAP_NETMAP) {
+ struct netmap_adapter *na = NA(ifp);
+
+ selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
+ IGB_TX_UNLOCK(txr);
+ IGB_CORE_LOCK(adapter);
+ selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET);
+ IGB_CORE_UNLOCK(adapter);
+ IGB_TX_LOCK(txr);
+ return FALSE;
+ }
+#endif /* DEV_NETMAP */
if (txr->tx_avail == adapter->num_tx_desc) {
txr->queue_status = IGB_QUEUE_IDLE;
return FALSE;
@@ -3705,20 +3783,20 @@ igb_txeof(struct tx_ring *txr)
** for too long indicates a hang.
*/
if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
- txr->queue_status = IGB_QUEUE_HUNG;
-
+ txr->queue_status |= IGB_QUEUE_HUNG;
/*
- * If we have a minimum free, clear IFF_DRV_OACTIVE
- * to tell the stack that it is OK to send packets.
+ * If we have a minimum free,
+ * clear depleted state bit
*/
- if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- /* All clean, turn off the watchdog */
- if (txr->tx_avail == adapter->num_tx_desc) {
- txr->queue_status = IGB_QUEUE_IDLE;
- return (FALSE);
- }
+ if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
+ txr->queue_status &= ~IGB_QUEUE_DEPLETED;
+
+ /* All clean, turn off the watchdog */
+ if (txr->tx_avail == adapter->num_tx_desc) {
+ txr->queue_status = IGB_QUEUE_IDLE;
+ return (FALSE);
}
+
return (TRUE);
}
@@ -3949,6 +4027,10 @@ igb_setup_receive_ring(struct rx_ring *rxr)
bus_dma_segment_t pseg[1], hseg[1];
struct lro_ctrl *lro = &rxr->lro;
int rsize, nsegs, error = 0;
+#ifdef DEV_NETMAP
+ struct netmap_adapter *na = NA(rxr->adapter->ifp);
+ struct netmap_slot *slot;
+#endif /* DEV_NETMAP */
adapter = rxr->adapter;
dev = adapter->dev;
@@ -3956,6 +4038,9 @@ igb_setup_receive_ring(struct rx_ring *rxr)
/* Clear the ring contents */
IGB_RX_LOCK(rxr);
+#ifdef DEV_NETMAP
+ slot = netmap_reset(na, NR_RX, rxr->me, 0);
+#endif /* DEV_NETMAP */
rsize = roundup2(adapter->num_rx_desc *
sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
bzero((void *)rxr->rx_base, rsize);
@@ -3974,6 +4059,22 @@ igb_setup_receive_ring(struct rx_ring *rxr)
struct mbuf *mh, *mp;
rxbuf = &rxr->rx_buffers[j];
+#ifdef DEV_NETMAP
+ if (slot) {
+ /* slot sj is mapped to the i-th NIC-ring entry */
+ int sj = j + na->rx_rings[rxr->me].nkr_hwofs;
+ void *addr;
+
+ if (sj < 0)
+ sj += na->num_rx_desc;
+ addr = NMB(slot + sj);
+ netmap_load_map(rxr->ptag,
+ rxbuf->pmap, addr, na->buff_size);
+ /* Update descriptor */
+ rxr->rx_base[j].read.pkt_addr = htole64(vtophys(addr));
+ continue;
+ }
+#endif /* DEV_NETMAP */
if (rxr->hdr_split == FALSE)
goto skip_head;
@@ -4258,6 +4359,26 @@ igb_initialize_receive_units(struct adapter *adapter)
for (int i = 0; i < adapter->num_queues; i++) {
rxr = &adapter->rx_rings[i];
E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
+#ifdef DEV_NETMAP
+ /*
+ * an init() while a netmap client is active must
+ * preserve the rx buffers passed to userspace.
+ * In this driver it means we adjust RDT to
+ * somthing different from next_to_refresh
+ * (which is not used in netmap mode).
+ */
+ if (ifp->if_capenable & IFCAP_NETMAP) {
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_kring *kring = &na->rx_rings[i];
+ int t = rxr->next_to_refresh - kring->nr_hwavail;
+
+ if (t >= adapter->num_rx_desc)
+ t -= adapter->num_rx_desc;
+ else if (t < 0)
+ t += adapter->num_rx_desc;
+ E1000_WRITE_REG(hw, E1000_RDT(i), t);
+ } else
+#endif /* DEV_NETMAP */
E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
}
return;
@@ -4436,6 +4557,19 @@ igb_rxeof(struct igb_queue *que, int count, int *done)
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+#ifdef DEV_NETMAP
+ if (ifp->if_capenable & IFCAP_NETMAP) {
+ struct netmap_adapter *na = NA(ifp);
+
+ selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
+ IGB_RX_UNLOCK(rxr);
+ IGB_CORE_LOCK(adapter);
+ selwakeuppri(&na->rx_rings[na->num_queues + 1].si, PI_NET);
+ IGB_CORE_UNLOCK(adapter);
+ return (0);
+ }
+#endif /* DEV_NETMAP */
+
/* Main clean loop */
for (i = rxr->next_to_check; count != 0;) {
struct mbuf *sendmp, *mh, *mp;
@@ -4762,8 +4896,8 @@ igb_setup_vlan_hw_support(struct adapter *adapter)
e1000_vfta_set_vf(hw,
adapter->shadow_vfta[i], TRUE);
else
- E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
- i, adapter->shadow_vfta[i]);
+ e1000_write_vfta(hw,
+ i, adapter->shadow_vfta[i]);
}
}
@@ -5629,23 +5763,26 @@ igb_set_sysctl_value(struct adapter *adapter, const char *name,
static int
igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
{
- int error;
- struct adapter *adapter = (struct adapter *) arg1;
+ int error;
+ static int input = 3; /* default is full */
+ struct adapter *adapter = (struct adapter *) arg1;
- error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
+ error = sysctl_handle_int(oidp, &input, 0, req);
if ((error) || (req->newptr == NULL))
return (error);
- switch (adapter->fc) {
+ switch (input) {
case e1000_fc_rx_pause:
case e1000_fc_tx_pause:
case e1000_fc_full:
- adapter->hw.fc.requested_mode = adapter->fc;
- break;
case e1000_fc_none:
+ adapter->hw.fc.requested_mode = input;
+ adapter->fc = input;
+ break;
default:
- adapter->hw.fc.requested_mode = e1000_fc_none;
+ /* Do nothing */
+ return (error);
}
adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
OpenPOWER on IntegriCloud