summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorluigi <luigi@FreeBSD.org>2011-12-05 12:06:53 +0000
committerluigi <luigi@FreeBSD.org>2011-12-05 12:06:53 +0000
commit298ffde66596a12229df0898bab02aa491f70535 (patch)
treea556ee936d46b8051dced8959c9019517fa2c641
parent07781bc7da6c5ba99f27277e39b9a1a47d52ce0e (diff)
downloadFreeBSD-src-298ffde66596a12229df0898bab02aa491f70535.zip
FreeBSD-src-298ffde66596a12229df0898bab02aa491f70535.tar.gz
1. Fix the handling of link reset while in netmap more.
A link reset now is completely transparent for the netmap client: even if the NIC resets its own ring (e.g. restarting from 0), the client will not see any change in the current rx/tx positions, because the driver will keep track of the offset between the two. 2. make the device-specific code more uniform across different drivers There were some inconsistencies in the implementation of the netmap support routines, now drivers have been aligned to a common code structure. 3. import netmap support for ixgbe . This is implemented as a very small patch for ixgbe.c (233 lines, 11 chunks, mostly comments: in total the patch has only 54 lines of new code) , as most of the code is in an external file sys/dev/netmap/ixgbe_netmap.h , following some initial comments from Jack Vogel about making changes less intrusive. (Note, i have emailed Jack multiple times asking if he had comments on this structure of the code; i got no reply so i assume he is fine with it). Support for other drivers (em, lem, re, igb) will come later. "ixgbe" is now the reference driver for netmap support. Both the external file (sys/dev/netmap/ixgbe_netmap.h) and the device-specific patches (in sys/dev/ixgbe/ixgbe.c) are heavily commented and should serve as a reference for other device drivers. Tested on i386 and amd64 with the pkt-gen program in tools/tools/netmap, the sender does 14.88 Mpps at 1050 Mhz and 14.2 Mpps at 900 MHz on an i7-860 with 4 cores and 82599 card. Haven't tried yet more aggressive optimizations such as adding 'prefetch' instructions in the time-critical parts of the code.
-rw-r--r--sys/dev/ixgbe/ixgbe.c148
-rw-r--r--sys/dev/netmap/if_em_netmap.h167
-rw-r--r--sys/dev/netmap/if_igb_netmap.h137
-rw-r--r--sys/dev/netmap/if_lem_netmap.h241
-rw-r--r--sys/dev/netmap/if_re_netmap.h147
-rw-r--r--sys/dev/netmap/ixgbe_netmap.h340
-rw-r--r--sys/dev/netmap/netmap.c211
-rw-r--r--sys/dev/netmap/netmap_kern.h18
-rw-r--r--sys/net/netmap.h9
-rw-r--r--tools/tools/netmap/pkt-gen.c17
10 files changed, 837 insertions, 598 deletions
diff --git a/sys/dev/ixgbe/ixgbe.c b/sys/dev/ixgbe/ixgbe.c
index 9a709af..ae76e9f 100644
--- a/sys/dev/ixgbe/ixgbe.c
+++ b/sys/dev/ixgbe/ixgbe.c
@@ -313,6 +313,18 @@ static int atr_sample_rate = 20;
static int fdir_pballoc = 1;
#endif
+#ifdef DEV_NETMAP
+/*
+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
+ * be a reference on how to implement netmap support in a driver.
+ * Additional comments are in ixgbe_netmap.h .
+ *
+ * <dev/netma/ixgbe_netmap.h> contains functions for netmap support
+ * that extend the standard driver.
+ */
+#include <dev/netmap/ixgbe_netmap.h>
+#endif /* DEV_NETMAP */
+
/*********************************************************************
* Device identification routine
*
@@ -578,6 +590,9 @@ ixgbe_attach(device_t dev)
ixgbe_add_hw_stats(adapter);
+#ifdef DEV_NETMAP
+ ixgbe_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
INIT_DEBUGOUT("ixgbe_attach: end");
return (0);
err_late:
@@ -652,6 +667,9 @@ ixgbe_detach(device_t dev)
ether_ifdetach(adapter->ifp);
callout_drain(&adapter->timer);
+#ifdef DEV_NETMAP
+ netmap_detach(adapter->ifp);
+#endif /* DEV_NETMAP */
ixgbe_free_pci_resources(adapter);
bus_generic_detach(dev);
if_free(adapter->ifp);
@@ -2813,9 +2831,20 @@ ixgbe_setup_transmit_ring(struct tx_ring *txr)
struct adapter *adapter = txr->adapter;
struct ixgbe_tx_buf *txbuf;
int i;
+#ifdef DEV_NETMAP
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_slot *slot;
+#endif /* DEV_NETMAP */
/* Clear the old ring contents */
IXGBE_TX_LOCK(txr);
+#ifdef DEV_NETMAP
+ /*
+ * (under lock): if in netmap mode, do some consistency
+ * checks and set slot to entry 0 of the netmap ring.
+ */
+ slot = netmap_reset(na, NR_TX, txr->me, 0);
+#endif /* DEV_NETMAP */
bzero((void *)txr->tx_base,
(sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
/* Reset indices */
@@ -2832,6 +2861,26 @@ ixgbe_setup_transmit_ring(struct tx_ring *txr)
m_freem(txbuf->m_head);
txbuf->m_head = NULL;
}
+#ifdef DEV_NETMAP
+ /*
+ * In netmap mode, set the map for the packet buffer.
+ * NOTE: Some drivers (not this one) also need to set
+ * the physical buffer address in the NIC ring.
+ * Slots in the netmap ring (indexed by "si") are
+ * kring->nkr_hwofs positions "ahead" wrt the
+ * corresponding slot in the NIC ring. In some drivers
+ * (not here) nkr_hwofs can be negative. When computing
+ * si = i + kring->nkr_hwofs make sure to handle wraparounds.
+ */
+ if (slot) {
+ int si = i + na->tx_rings[txr->me].nkr_hwofs;
+
+ if (si >= na->num_tx_desc)
+ si -= na->num_tx_desc;
+ netmap_load_map(txr->txtag, txbuf->map,
+ NMB(slot + si), na->buff_size);
+ }
+#endif /* DEV_NETMAP */
/* Clear the EOP index */
txbuf->eop_index = -1;
}
@@ -3310,6 +3359,29 @@ ixgbe_txeof(struct tx_ring *txr)
mtx_assert(&txr->tx_mtx, MA_OWNED);
+#ifdef DEV_NETMAP
+ if (ifp->if_capenable & IFCAP_NETMAP) {
+ struct netmap_adapter *na = NA(ifp);
+
+ /*
+ * In netmap mode, all the work is done in the context
+ * of the client thread. Interrupt handlers only wake up
+ * clients, which may be sleeping on individual rings
+ * or on a global resource for all rings.
+ * When the driver has separate locks, we need to
+ * release and re-acquire txlock to avoid deadlocks.
+ * XXX see if we can find a better way.
+ */
+ selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
+ IXGBE_TX_UNLOCK(txr);
+ IXGBE_CORE_LOCK(adapter);
+ selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET);
+ IXGBE_CORE_UNLOCK(adapter);
+ IXGBE_TX_LOCK(txr);
+ return FALSE;
+ }
+#endif /* DEV_NETMAP */
+
if (txr->tx_avail == adapter->num_tx_desc) {
txr->queue_status = IXGBE_QUEUE_IDLE;
return FALSE;
@@ -3698,6 +3770,10 @@ ixgbe_setup_receive_ring(struct rx_ring *rxr)
bus_dma_segment_t pseg[1], hseg[1];
struct lro_ctrl *lro = &rxr->lro;
int rsize, nsegs, error = 0;
+#ifdef DEV_NETMAP
+ struct netmap_adapter *na = NA(rxr->adapter->ifp);
+ struct netmap_slot *slot;
+#endif /* DEV_NETMAP */
adapter = rxr->adapter;
ifp = adapter->ifp;
@@ -3705,6 +3781,10 @@ ixgbe_setup_receive_ring(struct rx_ring *rxr)
/* Clear the ring contents */
IXGBE_RX_LOCK(rxr);
+#ifdef DEV_NETMAP
+ /* same as in ixgbe_setup_transmit_ring() */
+ slot = netmap_reset(na, NR_RX, rxr->me, 0);
+#endif /* DEV_NETMAP */
rsize = roundup2(adapter->num_rx_desc *
sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
bzero((void *)rxr->rx_base, rsize);
@@ -3721,6 +3801,29 @@ ixgbe_setup_receive_ring(struct rx_ring *rxr)
struct mbuf *mh, *mp;
rxbuf = &rxr->rx_buffers[j];
+#ifdef DEV_NETMAP
+ /*
+ * In netmap mode, fill the map and set the buffer
+ * address in the NIC ring, considering the offset
+ * between the netmap and NIC rings (see comment in
+ * ixgbe_setup_transmit_ring() ). No need to allocate
+ * an mbuf, so end the block with a continue;
+ */
+ if (slot) {
+ int sj = j + na->rx_rings[rxr->me].nkr_hwofs;
+ void *addr;
+
+ if (sj >= na->num_rx_desc)
+ sj -= na->num_rx_desc;
+ addr = NMB(slot + sj);
+ netmap_load_map(rxr->ptag,
+ rxbuf->pmap, addr, na->buff_size);
+ /* Update descriptor */
+ rxr->rx_base[j].read.pkt_addr =
+ htole64(vtophys(addr));
+ continue;
+ }
+#endif /* DEV_NETMAP */
/*
** Don't allocate mbufs if not
** doing header split, its wasteful
@@ -3913,6 +4016,35 @@ ixgbe_initialize_receive_units(struct adapter *adapter)
/* Setup the HW Rx Head and Tail Descriptor Pointers */
IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
+#ifdef DEV_NETMAP
+ /*
+ * In netmap mode, we must preserve the buffers made
+ * available to userspace before the if_init()
+ * (this is true by default on the TX side, because
+ * init makes all buffers available to userspace).
+ *
+ * netmap_reset() and the device specific routines
+ * (e.g. ixgbe_setup_receive_rings()) map these
+ * buffers at the end of the NIC ring, so here we
+ * must set the RDT (tail) register to make sure
+ * they are not overwritten.
+ *
+ * In this driver the NIC ring starts at RDH = 0,
+ * RDT points to the first 'busy' slot, so RDT = 0
+ * means the whole ring is available, and
+ * RDT = (num_rx_desc - X) means X slots are available.
+ * Computations are done modulo the ring size.
+ */
+ if (ifp->if_capenable & IFCAP_NETMAP) {
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_kring *kring = &na->rx_rings[i];
+ int t = na->num_rx_desc - kring->nr_hwavail;
+
+ if (t >= na->num_rx_desc)
+ t -= adapter->num_rx_desc;
+ IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
+ } else
+#endif /* DEV_NETMAP */
IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
}
@@ -4148,6 +4280,22 @@ ixgbe_rxeof(struct ix_queue *que, int count)
IXGBE_RX_LOCK(rxr);
+#ifdef DEV_NETMAP
+ if (ifp->if_capenable & IFCAP_NETMAP) {
+ /*
+ * Same as the txeof routine, only wakeup clients
+ * and make sure there are no deadlocks.
+ */
+ struct netmap_adapter *na = NA(ifp);
+
+ selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
+ IXGBE_RX_UNLOCK(rxr);
+ IXGBE_CORE_LOCK(adapter);
+ selwakeuppri(&na->rx_rings[na->num_queues + 1].si, PI_NET);
+ IXGBE_CORE_UNLOCK(adapter);
+ return (FALSE);
+ }
+#endif /* DEV_NETMAP */
for (i = rxr->next_to_check; count != 0;) {
struct mbuf *sendmp, *mh, *mp;
u32 rsc, ptype;
diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h
index 0e220e7..681a652 100644
--- a/sys/dev/netmap/if_em_netmap.h
+++ b/sys/dev/netmap/if_em_netmap.h
@@ -9,7 +9,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -25,9 +25,12 @@
/*
* $FreeBSD$
- * $Id: if_em_netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ * $Id: if_em_netmap.h 9802 2011-12-02 18:42:37Z luigi $
*
* netmap changes for if_em.
+ *
+ * For structure and details on the individual functions please see
+ * ixgbe_netmap.h
*/
#include <net/netmap.h>
@@ -58,12 +61,7 @@ em_netmap_attach(struct adapter *adapter)
na.nm_rxsync = em_netmap_rxsync;
na.nm_lock = em_netmap_lock_wrapper;
na.nm_register = em_netmap_reg;
- /*
- * adapter->rx_mbuf_sz is set by SIOCSETMTU, but in netmap mode
- * we allocate the buffers on the first register. So we must
- * disallow a SIOCSETMTU when if_capenable & IFCAP_NETMAP is set.
- */
- na.buff_size = MCLBYTES;
+ na.buff_size = NETMAP_BUF_SIZE;
netmap_attach(&na, adapter->num_queues);
}
@@ -100,6 +98,7 @@ em_netmap_lock_wrapper(void *_a, int what, u_int queueid)
}
+// XXX do we need to block/unblock the tasks ?
static void
em_netmap_block_tasks(struct adapter *adapter)
{
@@ -162,9 +161,6 @@ em_netmap_reg(struct ifnet *ifp, int onoff)
if (onoff) {
ifp->if_capenable |= IFCAP_NETMAP;
- /* save if_transmit for later restore.
- * XXX also if_start and if_qflush ?
- */
na->if_transmit = ifp->if_transmit;
ifp->if_transmit = netmap_start;
@@ -179,15 +175,13 @@ fail:
ifp->if_transmit = na->if_transmit;
ifp->if_capenable &= ~IFCAP_NETMAP;
em_init_locked(adapter); /* also enable intr */
-
}
em_netmap_unblock_tasks(adapter);
return (error);
}
/*
- * Reconcile hardware and user view of the transmit ring, see
- * ixgbe.c for details.
+ * Reconcile hardware and user view of the transmit ring.
*/
static int
em_netmap_txsync(void *a, u_int ring_nr, int do_lock)
@@ -197,13 +191,13 @@ em_netmap_txsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
/* generate an interrupt approximately every half ring */
int report_frequency = kring->nkr_num_slots >> 1;
k = ring->cur;
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ if (k > lim)
return netmap_ring_reinit(kring);
if (do_lock)
@@ -211,35 +205,20 @@ em_netmap_txsync(void *a, u_int ring_nr, int do_lock)
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
- /* record completed transmissions TODO
- *
- * instead of using TDH, we could read the transmitted status bit.
+ /* check for new packets to send.
+ * j indexes the netmap ring, l indexes the nic ring, and
+ * j = kring->nr_hwcur, l = E1000_TDT (not tracked),
+ * j == (l + kring->nkr_hwofs) % ring_size
*/
- j = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
- if (j >= kring->nkr_num_slots) { /* XXX can happen */
- D("TDH wrap %d", j);
- j -= kring->nkr_num_slots;
- }
- int delta = j - txr->next_to_clean;
- if (delta) {
- /* new transmissions were completed, increment
- ring->nr_hwavail. */
- if (delta < 0)
- delta += kring->nkr_num_slots;
- txr->next_to_clean = j;
- kring->nr_hwavail += delta;
- }
-
- /* update avail to what the hardware knows */
- ring->avail = kring->nr_hwavail;
-
j = kring->nr_hwcur;
if (j != k) { /* we have packets to send */
- n = 0;
+ l = j - kring->nkr_hwofs;
+ if (l < 0)
+ l += lim + 1;
while (j != k) {
struct netmap_slot *slot = &ring->slot[j];
- struct e1000_tx_desc *curr = &txr->tx_base[j];
- struct em_buffer *txbuf = &txr->tx_buffers[j];
+ struct e1000_tx_desc *curr = &txr->tx_base[l];
+ struct em_buffer *txbuf = &txr->tx_buffers[l];
int flags = ((slot->flags & NS_REPORT) ||
j == 0 || j == report_frequency) ?
E1000_TXD_CMD_RS : 0;
@@ -254,42 +233,61 @@ em_netmap_txsync(void *a, u_int ring_nr, int do_lock)
slot->flags &= ~NS_REPORT;
curr->upper.data = 0;
curr->lower.data =
- htole32(
- adapter->txd_cmd |
- (E1000_TXD_CMD_EOP | flags) |
- slot->len);
+ htole32(adapter->txd_cmd | len |
+ (E1000_TXD_CMD_EOP | flags) );
if (slot->flags & NS_BUF_CHANGED) {
curr->buffer_addr = htole64(vtophys(addr));
- /* buffer has changed, unload and reload map */
+ /* buffer has changed, reload map */
netmap_reload_map(txr->txtag, txbuf->map,
- addr, na->buff_size);
+ addr, na->buff_size);
slot->flags &= ~NS_BUF_CHANGED;
}
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
- kring->nr_hwcur = ring->cur;
+ kring->nr_hwcur = k;
/* decrease avail by number of sent packets */
- ring->avail -= n;
- kring->nr_hwavail = ring->avail;
+ kring->nr_hwavail -= n;
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
- E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me),
- ring->cur);
+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), l);
}
+
+ if (n == 0 || kring->nr_hwavail < 1) {
+ int delta;
+
+ /* record completed transmissions using THD. */
+ l = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
+ if (l >= kring->nkr_num_slots) { /* XXX can happen */
+ D("TDH wrap %d", l);
+ l -= kring->nkr_num_slots;
+ }
+ delta = l - txr->next_to_clean;
+ if (delta) {
+ /* some completed, increment hwavail. */
+ if (delta < 0)
+ delta += kring->nkr_num_slots;
+ txr->next_to_clean = l;
+ kring->nr_hwavail += delta;
+ }
+ }
+ /* update avail to what the hardware knows */
+ ring->avail = kring->nr_hwavail;
+
if (do_lock)
EM_TX_UNLOCK(txr);
return 0;
}
/*
- * Reconcile kernel and user view of the receive ring, see ixgbe.c
+ * Reconcile kernel and user view of the receive ring.
*/
static int
em_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
@@ -299,10 +297,10 @@ em_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n, lim = kring->nkr_num_slots - 1;
k = ring->cur;
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ if (k > lim)
return netmap_ring_reinit(kring);
if (do_lock)
@@ -311,36 +309,52 @@ em_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
- /* acknowledge all the received packets. */
- j = rxr->next_to_check;
+ /* import newly received packets into the netmap ring.
+ * j is an index in the netmap ring, l in the NIC ring, and
+ * j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size
+ * l = rxr->next_to_check;
+ * and
+ * j == (l + kring->nkr_hwofs) % ring_size
+ */
+ l = rxr->next_to_check;
+ j = l + kring->nkr_hwofs;
+ /* here nkr_hwofs can be negative so must check for j < 0 */
+ if (j < 0)
+ j += lim + 1;
+ else if (j > lim)
+ j -= lim + 1;
for (n = 0; ; n++) {
- struct e1000_rx_desc *curr = &rxr->rx_base[j];
+ struct e1000_rx_desc *curr = &rxr->rx_base[l];
if ((curr->status & E1000_RXD_STAT_DD) == 0)
break;
ring->slot[j].len = le16toh(curr->length);
- bus_dmamap_sync(rxr->tag, rxr->rx_buffers[j].map,
+ bus_dmamap_sync(rxr->tag, rxr->rx_buffers[l].map,
BUS_DMASYNC_POSTREAD);
j = (j == lim) ? 0 : j + 1;
+ /* make sure next_to_refresh follows next_to_check */
+ rxr->next_to_refresh = l; // XXX
+ l = (l == lim) ? 0 : l + 1;
}
if (n) {
- rxr->next_to_check = j;
+ rxr->next_to_check = l;
kring->nr_hwavail += n;
}
- /* skip past packets that userspace has already processed:
- * making them available for reception.
- * advance nr_hwcur and issue a bus_dmamap_sync on the
- * buffers so it is safe to write to them.
- * Also increase nr_hwavail
- */
+ /* skip past packets that userspace has already processed */
j = kring->nr_hwcur;
if (j != k) { /* userspace has read some packets. */
n = 0;
+ l = j - kring->nkr_hwofs; /* NIC ring index */
+ /* here nkr_hwofs can be negative so check for l > lim */
+ if (l < 0)
+ l += lim + 1;
+ else if (l > lim)
+ l -= lim + 1;
while (j != k) {
struct netmap_slot *slot = &ring->slot[j];
- struct e1000_rx_desc *curr = &rxr->rx_base[j];
- struct em_buffer *rxbuf = &rxr->rx_buffers[j];
+ struct e1000_rx_desc *curr = &rxr->rx_base[l];
+ struct em_buffer *rxbuf = &rxr->rx_buffers[l];
void *addr = NMB(slot);
if (addr == netmap_buffer_base) { /* bad buf */
@@ -352,28 +366,29 @@ em_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
curr->status = 0;
if (slot->flags & NS_BUF_CHANGED) {
curr->buffer_addr = htole64(vtophys(addr));
- /* buffer has changed, unload and reload map */
+ /* buffer has changed, reload map */
netmap_reload_map(rxr->rxtag, rxbuf->map,
- addr, na->buff_size);
+ addr, na->buff_size);
slot->flags &= ~NS_BUF_CHANGED;
}
bus_dmamap_sync(rxr->rxtag, rxbuf->map,
- BUS_DMASYNC_PREREAD);
+ BUS_DMASYNC_PREREAD);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
kring->nr_hwavail -= n;
- kring->nr_hwcur = ring->cur;
+ kring->nr_hwcur = k;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* IMPORTANT: we must leave one free slot in the ring,
- * so move j back by one unit
+ * so move l back by one unit
*/
- j = (j == 0) ? lim : j - 1;
- E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), j);
+ l = (l == 0) ? lim : l - 1;
+ E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l);
}
/* tell userspace that there are new packets */
ring->avail = kring->nr_hwavail ;
diff --git a/sys/dev/netmap/if_igb_netmap.h b/sys/dev/netmap/if_igb_netmap.h
index 0c14706..c048ec4 100644
--- a/sys/dev/netmap/if_igb_netmap.h
+++ b/sys/dev/netmap/if_igb_netmap.h
@@ -25,7 +25,7 @@
/*
* $FreeBSD$
- * $Id: if_igb_netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ * $Id: if_igb_netmap.h 9802 2011-12-02 18:42:37Z luigi $
*
* netmap modifications for igb
* contribured by Ahmed Kooli
@@ -58,12 +58,7 @@ igb_netmap_attach(struct adapter *adapter)
na.nm_rxsync = igb_netmap_rxsync;
na.nm_lock = igb_netmap_lock_wrapper;
na.nm_register = igb_netmap_reg;
- /*
- * adapter->rx_mbuf_sz is set by SIOCSETMTU, but in netmap mode
- * we allocate the buffers on the first register. So we must
- * disallow a SIOCSETMTU when if_capenable & IFCAP_NETMAP is set.
- */
- na.buff_size = MCLBYTES;
+ na.buff_size = NETMAP_BUF_SIZE;
netmap_attach(&na, adapter->num_queues);
}
@@ -111,7 +106,7 @@ igb_netmap_reg(struct ifnet *ifp, int onoff)
struct netmap_adapter *na = NA(ifp);
int error = 0;
- if (!na)
+ if (na == NULL)
return EINVAL;
igb_disable_intr(adapter);
@@ -144,21 +139,6 @@ fail:
/*
* Reconcile kernel and user view of the transmit ring.
- *
- * Userspace has filled tx slots up to cur (excluded).
- * The last unused slot previously known to the kernel was nr_hwcur,
- * and the last interrupt reported nr_hwavail slots available
- * (using the special value -1 to indicate idle transmit ring).
- * The function must first update avail to what the kernel
- * knows, subtract the newly used slots (cur - nr_hwcur)
- * from both avail and nr_hwavail, and set nr_hwcur = cur
- * issuing a dmamap_sync on all slots.
- *
- * Check parameters in the struct netmap_ring.
- * We don't use avail, only check for bogus values.
- * Make sure cur is valid, and same goes for buffer indexes and lengths.
- * To avoid races, read the values once, and never use those from
- * the ring afterwards.
*/
static int
igb_netmap_txsync(void *a, u_int ring_nr, int do_lock)
@@ -168,54 +148,40 @@ igb_netmap_txsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
/* generate an interrupt approximately every half ring */
int report_frequency = kring->nkr_num_slots >> 1;
- k = ring->cur; /* ring is not protected by any lock */
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ k = ring->cur;
+ if (k > lim)
return netmap_ring_reinit(kring);
if (do_lock)
IGB_TX_LOCK(txr);
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
- BUS_DMASYNC_POSTREAD);
-
- /* record completed transmissions. TODO
- *
- * Instead of reading from the TDH register, we could and try to check
- * the status bit of descriptor packets.
- */
- j = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
- if (j >= kring->nkr_num_slots) /* XXX can it happen ? */
- j -= kring->nkr_num_slots;
- int delta = j - txr->next_to_clean;
- if (delta) {
- /* new tx were completed */
- if (delta < 0)
- delta += kring->nkr_num_slots;
- txr->next_to_clean = j;
- kring->nr_hwavail += delta;
- }
+ BUS_DMASYNC_POSTREAD);
/* update avail to what the hardware knows */
ring->avail = kring->nr_hwavail;
- j = kring->nr_hwcur;
+ j = kring->nr_hwcur; /* netmap ring index */
if (j != k) { /* we have new packets to send */
u32 olinfo_status = 0;
- n = 0;
+ int n = 0;
+ l = j - kring->nkr_hwofs; /* NIC ring index */
+ if (l < 0)
+ l += lim + 1;
/* 82575 needs the queue index added */
if (adapter->hw.mac.type == e1000_82575)
olinfo_status |= txr->me << 4;
while (j != k) {
struct netmap_slot *slot = &ring->slot[j];
- struct igb_tx_buffer *txbuf = &txr->tx_buffers[j];
+ struct igb_tx_buffer *txbuf = &txr->tx_buffers[l];
union e1000_adv_tx_desc *curr =
- (union e1000_adv_tx_desc *)&txr->tx_base[j];
+ (union e1000_adv_tx_desc *)&txr->tx_base[l];
void *addr = NMB(slot);
int flags = ((slot->flags & NS_REPORT) ||
j == 0 || j == report_frequency) ?
@@ -229,6 +195,7 @@ igb_netmap_txsync(void *a, u_int ring_nr, int do_lock)
}
slot->flags &= ~NS_REPORT;
+ // XXX do we need to set the address ?
curr->read.buffer_addr = htole64(vtophys(addr));
curr->read.olinfo_status =
htole32(olinfo_status |
@@ -239,7 +206,7 @@ igb_netmap_txsync(void *a, u_int ring_nr, int do_lock)
E1000_ADVTXD_DCMD_DEXT |
E1000_ADVTXD_DCMD_EOP | flags);
if (slot->flags & NS_BUF_CHANGED) {
- /* buffer has changed, unload and reload map */
+ /* buffer has changed, reload map */
netmap_reload_map(txr->txtag, txbuf->map,
addr, na->buff_size);
slot->flags &= ~NS_BUF_CHANGED;
@@ -248,22 +215,40 @@ igb_netmap_txsync(void *a, u_int ring_nr, int do_lock)
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
kring->nr_hwcur = k;
/* decrease avail by number of sent packets */
- ring->avail -= n;
- kring->nr_hwavail = ring->avail;
+ kring->nr_hwavail -= n;
+ ring->avail = kring->nr_hwavail;
- /* Set the watchdog */
+ /* Set the watchdog XXX ? */
txr->queue_status = IGB_QUEUE_WORKING;
txr->watchdog_time = ticks;
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
- E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), k);
+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), l);
+ }
+ if (n == 0 || kring->nr_hwavail < 1) {
+ int delta;
+
+ /* record completed transmission using TDH */
+ l = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
+ if (l >= kring->nkr_num_slots) /* XXX can it happen ? */
+ l -= kring->nkr_num_slots;
+ delta = l - txr->next_to_clean;
+ if (delta) {
+ /* new tx were completed */
+ if (delta < 0)
+ delta += kring->nkr_num_slots;
+ txr->next_to_clean = l;
+ kring->nr_hwavail += delta;
+ ring->avail = kring->nr_hwavail;
+ }
}
if (do_lock)
IGB_TX_UNLOCK(txr);
@@ -273,15 +258,6 @@ igb_netmap_txsync(void *a, u_int ring_nr, int do_lock)
/*
* Reconcile kernel and user view of the receive ring.
- *
- * Userspace has read rx slots up to cur (excluded).
- * The last unread slot previously known to the kernel was nr_hwcur,
- * and the last interrupt reported nr_hwavail slots available.
- * We must subtract the newly consumed slots (cur - nr_hwcur)
- * from nr_hwavail, clearing the descriptors for the next
- * read, tell the hardware that they are available,
- * and set nr_hwcur = cur and avail = nr_hwavail.
- * issuing a dmamap_sync on all slots.
*/
static int
igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
@@ -291,10 +267,10 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n, lim = kring->nkr_num_slots - 1;
- k = ring->cur; /* ring is not protected by any lock */
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ k = ring->cur;
+ if (k > lim)
return netmap_ring_reinit(kring);
if (do_lock)
@@ -304,9 +280,12 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
- j = rxr->next_to_check;
+ l = rxr->next_to_check;
+ j = l + kring->nkr_hwofs;
+ if (j > lim)
+ j -= lim + 1;
for (n = 0; ; n++) {
- union e1000_adv_rx_desc *curr = &rxr->rx_base[j];
+ union e1000_adv_rx_desc *curr = &rxr->rx_base[l];
uint32_t staterr = le32toh(curr->wb.upper.status_error);
if ((staterr & E1000_RXD_STAT_DD) == 0)
@@ -314,15 +293,13 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
ring->slot[j].len = le16toh(curr->wb.upper.length);
bus_dmamap_sync(rxr->ptag,
- rxr->rx_buffers[j].pmap, BUS_DMASYNC_POSTREAD);
+ rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
}
if (n) {
- rxr->next_to_check = j;
+ rxr->next_to_check = l;
kring->nr_hwavail += n;
- if (kring->nr_hwavail >= lim - 10) {
- ND("rx ring %d almost full %d", ring_nr, kring->nr_hwavail);
- }
}
/* skip past packets that userspace has already processed,
@@ -332,12 +309,15 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
* Also increase nr_hwavail
*/
j = kring->nr_hwcur;
+ l = kring->nr_hwcur - kring->nkr_hwofs;
+ if (l < 0)
+ l += lim + 1;
if (j != k) { /* userspace has read some packets. */
n = 0;
while (j != k) {
struct netmap_slot *slot = ring->slot + j;
- union e1000_adv_rx_desc *curr = &rxr->rx_base[j];
- struct igb_rx_buf *rxbuf = rxr->rx_buffers + j;
+ union e1000_adv_rx_desc *curr = &rxr->rx_base[l];
+ struct igb_rx_buf *rxbuf = rxr->rx_buffers + l;
void *addr = NMB(slot);
if (addr == netmap_buffer_base) { /* bad buf */
@@ -358,6 +338,7 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
BUS_DMASYNC_PREREAD);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
kring->nr_hwavail -= n;
@@ -365,10 +346,10 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* IMPORTANT: we must leave one free slot in the ring,
- * so move j back by one unit
+ * so move l back by one unit
*/
- j = (j == 0) ? lim : j - 1;
- E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), j);
+ l = (l == 0) ? lim : l - 1;
+ E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l);
}
/* tell userspace that there are new packets */
ring->avail = kring->nr_hwavail ;
diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h
index a8f3498..ae64cd6 100644
--- a/sys/dev/netmap/if_lem_netmap.h
+++ b/sys/dev/netmap/if_lem_netmap.h
@@ -25,9 +25,12 @@
/*
* $FreeBSD$
- * $Id: if_lem_netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ * $Id: if_lem_netmap.h 9802 2011-12-02 18:42:37Z luigi $
*
* netmap support for if_lem.c
+ *
+ * For structure and details on the individual functions please see
+ * ixgbe_netmap.h
*/
#include <net/netmap.h>
@@ -59,7 +62,7 @@ lem_netmap_attach(struct adapter *adapter)
na.nm_rxsync = lem_netmap_rxsync;
na.nm_lock = lem_netmap_lock_wrapper;
na.nm_register = lem_netmap_reg;
- na.buff_size = MCLBYTES;
+ na.buff_size = NETMAP_BUF_SIZE;
netmap_attach(&na, 1);
}
@@ -94,7 +97,61 @@ lem_netmap_lock_wrapper(void *_a, int what, u_int ringid)
/*
- * Reconcile kernel and user view of the transmit ring. see ixgbe.c
+ * Register/unregister routine
+ */
+static int
+lem_netmap_reg(struct ifnet *ifp, int onoff)
+{
+ struct adapter *adapter = ifp->if_softc;
+ struct netmap_adapter *na = NA(ifp);
+ int error = 0;
+
+ if (na == NULL)
+ return EINVAL;
+
+ lem_disable_intr(adapter);
+
+ /* Tell the stack that the interface is no longer active */
+ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+
+ /* lem_netmap_block_tasks(adapter); */
+#ifndef EM_LEGACY_IRQ // XXX do we need this ?
+ taskqueue_block(adapter->tq);
+ taskqueue_drain(adapter->tq, &adapter->rxtx_task);
+ taskqueue_drain(adapter->tq, &adapter->link_task);
+#endif /* !EM_LEGCY_IRQ */
+ if (onoff) {
+ ifp->if_capenable |= IFCAP_NETMAP;
+
+ /* save if_transmit to restore it when exiting.
+ * XXX what about if_start and if_qflush ?
+ */
+ na->if_transmit = ifp->if_transmit;
+ ifp->if_transmit = netmap_start;
+
+ lem_init_locked(adapter);
+ if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
+ error = ENOMEM;
+ goto fail;
+ }
+ } else {
+fail:
+ /* restore non-netmap mode */
+ ifp->if_transmit = na->if_transmit;
+ ifp->if_capenable &= ~IFCAP_NETMAP;
+ lem_init_locked(adapter); /* also enables intr */
+ }
+
+#ifndef EM_LEGACY_IRQ
+ taskqueue_unblock(adapter->tq); // XXX do we need this ?
+#endif /* !EM_LEGCY_IRQ */
+
+ return (error);
+}
+
+
+/*
+ * Reconcile kernel and user view of the transmit ring.
*/
static int
lem_netmap_txsync(void *a, u_int ring_nr, int do_lock)
@@ -103,13 +160,13 @@ lem_netmap_txsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->tx_rings[0];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
/* generate an interrupt approximately every half ring */
int report_frequency = kring->nkr_num_slots >> 1;
k = ring->cur;
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ if (k > lim)
return netmap_ring_reinit(kring);
if (do_lock)
@@ -117,33 +174,18 @@ lem_netmap_txsync(void *a, u_int ring_nr, int do_lock)
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
- /* record completed transmissions TODO
- *
- * instead of using TDH, we could read the transmitted status bit.
- */
- j = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
- if (j >= kring->nkr_num_slots) { /* can it happen ? */
- D("bad TDH %d", j);
- j -= kring->nkr_num_slots;
- }
- int delta = j - adapter->next_tx_to_clean;
- if (delta) {
- if (delta < 0)
- delta += kring->nkr_num_slots;
- adapter->next_tx_to_clean = j;
- kring->nr_hwavail += delta;
- }
-
/* update avail to what the hardware knows */
ring->avail = kring->nr_hwavail;
- j = kring->nr_hwcur;
+ j = kring->nr_hwcur; /* points into the netmap ring */
if (j != k) { /* we have new packets to send */
- n = 0;
+ l = j - kring->nkr_hwofs; /* points into the NIC ring */
+ if (l < 0)
+ l += lim + 1;
while (j != k) {
struct netmap_slot *slot = &ring->slot[j];
- struct e1000_tx_desc *curr = &adapter->tx_desc_base[j];
- struct em_buffer *txbuf = &adapter->tx_buffer_area[j];
+ struct e1000_tx_desc *curr = &adapter->tx_desc_base[l];
+ struct em_buffer *txbuf = &adapter->tx_buffer_area[l];
void *addr = NMB(slot);
int flags = ((slot->flags & NS_REPORT) ||
j == 0 || j == report_frequency) ?
@@ -156,34 +198,54 @@ lem_netmap_txsync(void *a, u_int ring_nr, int do_lock)
return netmap_ring_reinit(kring);
}
+ slot->flags &= ~NS_REPORT;
curr->upper.data = 0;
- /* always interrupt. XXX make it conditional */
curr->lower.data =
htole32( adapter->txd_cmd | len |
(E1000_TXD_CMD_EOP | flags) );
if (slot->flags & NS_BUF_CHANGED) {
curr->buffer_addr = htole64(vtophys(addr));
- /* buffer has changed, unload and reload map */
+ /* buffer has changed, reload map */
netmap_reload_map(adapter->txtag, txbuf->map,
- addr, na->buff_size);
+ addr, na->buff_size);
slot->flags &= ~NS_BUF_CHANGED;
}
bus_dmamap_sync(adapter->txtag, txbuf->map,
- BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREWRITE);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
- kring->nr_hwcur = ring->cur;
+ kring->nr_hwcur = k;
/* decrease avail by number of sent packets */
- ring->avail -= n;
- kring->nr_hwavail = ring->avail;
+ kring->nr_hwavail -= n;
+ ring->avail = kring->nr_hwavail;
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
- E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), ring->cur);
+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), l);
+ }
+
+ if (n == 0 || kring->nr_hwavail < 1) {
+ int delta;
+
+ /* record completed transmissions using TDH */
+ l = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
+ if (l >= kring->nkr_num_slots) { /* can it happen ? */
+ D("bad TDH %d", l);
+ l -= kring->nkr_num_slots;
+ }
+ delta = l - adapter->next_tx_to_clean;
+ if (delta) {
+ if (delta < 0)
+ delta += kring->nkr_num_slots;
+ adapter->next_tx_to_clean = l;
+ kring->nr_hwavail += delta;
+ ring->avail = kring->nr_hwavail;
+ }
}
if (do_lock)
EM_TX_UNLOCK(adapter);
@@ -192,7 +254,7 @@ lem_netmap_txsync(void *a, u_int ring_nr, int do_lock)
/*
- * Reconcile kernel and user view of the receive ring. see ixgbe.c
+ * Reconcile kernel and user view of the receive ring.
*/
static int
lem_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
@@ -201,10 +263,10 @@ lem_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->rx_rings[0];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n, lim = kring->nkr_num_slots - 1;
k = ring->cur;
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ if (k > lim)
return netmap_ring_reinit(kring);
if (do_lock)
@@ -213,40 +275,45 @@ lem_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
- /* acknowldge all the received packets. */
- j = adapter->next_rx_desc_to_check;
+ /* import newly received packets into the netmap ring */
+ l = adapter->next_rx_desc_to_check; /* points into the NIC ring */
+ j = l + kring->nkr_hwofs; /* points into the netmap ring */
+ if (j > lim)
+ j -= lim + 1;
for (n = 0; ; n++) {
- struct e1000_rx_desc *curr = &adapter->rx_desc_base[j];
- int len = le16toh(adapter->rx_desc_base[j].length) - 4; // CRC
+ struct e1000_rx_desc *curr = &adapter->rx_desc_base[l];
+ int len;
if ((curr->status & E1000_RXD_STAT_DD) == 0)
break;
+ len = le16toh(curr->length) - 4; // CRC
if (len < 0) {
D("bogus pkt size at %d", j);
len = 0;
}
ring->slot[j].len = len;
- bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[j].map,
- BUS_DMASYNC_POSTREAD);
+ bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[l].map,
+ BUS_DMASYNC_POSTREAD);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
}
if (n) {
- adapter->next_rx_desc_to_check = j;
+ adapter->next_rx_desc_to_check = l;
kring->nr_hwavail += n;
}
- /* skip past packets that userspace has already processed,
- * making them available for reception. We don't need to set
- * the length as it is the same for all slots.
- */
- j = kring->nr_hwcur;
+ /* skip past packets that userspace has already processed */
+ j = kring->nr_hwcur; /* netmap ring index */
if (j != k) { /* userspace has read some packets. */
n = 0;
+ l = j - kring->nkr_hwofs; /* NIC ring index */
+ if (l < 0)
+ l += lim + 1;
while (j != k) {
struct netmap_slot *slot = &ring->slot[j];
- struct e1000_rx_desc *curr = &adapter->rx_desc_base[j];
- struct em_buffer *rxbuf = &adapter->rx_buffer_area[j];
+ struct e1000_rx_desc *curr = &adapter->rx_desc_base[l];
+ struct em_buffer *rxbuf = &adapter->rx_buffer_area[l];
void *addr = NMB(slot);
if (addr == netmap_buffer_base) { /* bad buf */
@@ -254,32 +321,32 @@ lem_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
EM_RX_UNLOCK(adapter);
return netmap_ring_reinit(kring);
}
- curr = &adapter->rx_desc_base[j];
curr->status = 0;
if (slot->flags & NS_BUF_CHANGED) {
curr->buffer_addr = htole64(vtophys(addr));
- /* buffer has changed, unload and reload map */
+ /* buffer has changed, and reload map */
netmap_reload_map(adapter->rxtag, rxbuf->map,
- addr, na->buff_size);
+ addr, na->buff_size);
slot->flags &= ~NS_BUF_CHANGED;
}
bus_dmamap_sync(adapter->rxtag, rxbuf->map,
- BUS_DMASYNC_PREREAD);
+ BUS_DMASYNC_PREREAD);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
kring->nr_hwavail -= n;
- kring->nr_hwcur = ring->cur;
+ kring->nr_hwcur = k;
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* IMPORTANT: we must leave one free slot in the ring,
- * so move j back by one unit
+ * so move l back by one unit
*/
- j = (j == 0) ? lim : j - 1;
- E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), j);
+ l = (l == 0) ? lim : l - 1;
+ E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), l);
}
/* tell userspace that there are new packets */
@@ -290,55 +357,3 @@ lem_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
}
-/*
- * Register/unregister routine
- */
-static int
-lem_netmap_reg(struct ifnet *ifp, int onoff)
-{
- struct adapter *adapter = ifp->if_softc;
- struct netmap_adapter *na = NA(ifp);
- int error = 0;
-
- if (!na)
- return EINVAL;
-
- lem_disable_intr(adapter);
-
- /* Tell the stack that the interface is no longer active */
- ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
-
- /* lem_netmap_block_tasks(adapter); */
-#ifndef EM_LEGACY_IRQ
- taskqueue_block(adapter->tq);
- taskqueue_drain(adapter->tq, &adapter->rxtx_task);
- taskqueue_drain(adapter->tq, &adapter->link_task);
-#endif /* !EM_LEGCY_IRQ */
- if (onoff) {
- ifp->if_capenable |= IFCAP_NETMAP;
-
- /* save if_transmit to restore it when exiting.
- * XXX what about if_start and if_qflush ?
- */
- na->if_transmit = ifp->if_transmit;
- ifp->if_transmit = netmap_start;
-
- lem_init_locked(adapter);
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
- error = ENOMEM;
- goto fail;
- }
- } else {
-fail:
- /* restore non-netmap mode */
- ifp->if_transmit = na->if_transmit;
- ifp->if_capenable &= ~IFCAP_NETMAP;
- lem_init_locked(adapter); /* also enables intr */
- }
-
-#ifndef EM_LEGACY_IRQ
- taskqueue_unblock(adapter->tq);
-#endif /* !EM_LEGCY_IRQ */
-
- return (error);
-}
diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h
index efccf3a..105660c 100644
--- a/sys/dev/netmap/if_re_netmap.h
+++ b/sys/dev/netmap/if_re_netmap.h
@@ -25,7 +25,7 @@
/*
* $FreeBSD$
- * $Id: if_re_netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ * $Id: if_re_netmap.h 9802 2011-12-02 18:42:37Z luigi $
*
* netmap support for if_re
*/
@@ -56,7 +56,7 @@ re_netmap_attach(struct rl_softc *sc)
na.nm_rxsync = re_netmap_rxsync;
na.nm_lock = re_netmap_lock_wrapper;
na.nm_register = re_netmap_reg;
- na.buff_size = MCLBYTES;
+ na.buff_size = NETMAP_BUF_SIZE;
netmap_attach(&na, 1);
}
@@ -99,7 +99,7 @@ re_netmap_reg(struct ifnet *ifp, int onoff)
struct netmap_adapter *na = NA(ifp);
int error = 0;
- if (!na)
+ if (na == NULL)
return EINVAL;
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
@@ -109,9 +109,8 @@ re_netmap_reg(struct ifnet *ifp, int onoff)
if (onoff) {
ifp->if_capenable |= IFCAP_NETMAP;
- /* save if_transmit and restore it */
+ /* save if_transmit to restore it later */
na->if_transmit = ifp->if_transmit;
- /* XXX if_start and if_qflush ??? */
ifp->if_transmit = netmap_start;
re_init_locked(adapter);
@@ -127,23 +126,12 @@ fail:
ifp->if_capenable &= ~IFCAP_NETMAP;
re_init_locked(adapter); /* also enables intr */
}
- return (error);
-
+ return (error);
}
/*
* Reconcile kernel and user view of the transmit ring.
- *
- * Userspace has filled tx slots up to cur (excluded).
- * The last unused slot previously known to the kernel was nr_hwcur,
- * and the last interrupt reported nr_hwavail slots available
- * (using the special value -1 to indicate idle transmit ring).
- * The function must first update avail to what the kernel
- * knows (translating the -1 to nkr_num_slots - 1),
- * subtract the newly used slots (cur - nr_hwcur)
- * from both avail and nr_hwavail, and set nr_hwcur = cur
- * issuing a dmamap_sync on all slots.
*/
static int
re_netmap_txsync(void *a, u_int ring_nr, int do_lock)
@@ -153,10 +141,10 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(sc->rl_ifp);
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n, lim = kring->nkr_num_slots - 1;
k = ring->cur;
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ if (k > lim)
return netmap_ring_reinit(kring);
if (do_lock)
@@ -167,17 +155,18 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock)
sc->rl_ldata.rl_tx_list_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+ /* XXX move after the transmissions */
/* record completed transmissions */
- for (n = 0, j = sc->rl_ldata.rl_tx_considx;
- j != sc->rl_ldata.rl_tx_prodidx;
- n++, j = RL_TX_DESC_NXT(sc, j)) {
+ for (n = 0, l = sc->rl_ldata.rl_tx_considx;
+ l != sc->rl_ldata.rl_tx_prodidx;
+ n++, l = RL_TX_DESC_NXT(sc, l)) {
uint32_t cmdstat =
- le32toh(sc->rl_ldata.rl_tx_list[j].rl_cmdstat);
+ le32toh(sc->rl_ldata.rl_tx_list[l].rl_cmdstat);
if (cmdstat & RL_TDESC_STAT_OWN)
break;
}
if (n > 0) {
- sc->rl_ldata.rl_tx_considx = j;
+ sc->rl_ldata.rl_tx_considx = l;
sc->rl_ldata.rl_tx_free += n;
kring->nr_hwavail += n;
}
@@ -185,13 +174,13 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock)
/* update avail to what the hardware knows */
ring->avail = kring->nr_hwavail;
- /* we trust prodidx, not hwcur */
- j = kring->nr_hwcur = sc->rl_ldata.rl_tx_prodidx;
+ j = kring->nr_hwcur;
if (j != k) { /* we have new packets to send */
n = 0;
+ l = sc->rl_ldata.rl_tx_prodidx;
while (j != k) {
struct netmap_slot *slot = &ring->slot[j];
- struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[j];
+ struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[l];
int cmd = slot->len | RL_TDESC_CMD_EOF |
RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF ;
void *addr = NMB(slot);
@@ -200,10 +189,11 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock)
if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
if (do_lock)
RL_UNLOCK(sc);
+ // XXX what about prodidx ?
return netmap_ring_reinit(kring);
}
- if (j == lim) /* mark end of ring */
+ if (l == lim) /* mark end of ring */
cmd |= RL_TDESC_CMD_EOR;
if (slot->flags & NS_BUF_CHANGED) {
@@ -212,17 +202,19 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock)
desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
/* buffer has changed, unload and reload map */
netmap_reload_map(sc->rl_ldata.rl_tx_mtag,
- txd[j].tx_dmamap, addr, na->buff_size);
+ txd[l].tx_dmamap, addr, na->buff_size);
slot->flags &= ~NS_BUF_CHANGED;
}
slot->flags &= ~NS_REPORT;
desc->rl_cmdstat = htole32(cmd);
bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
- txd[j].tx_dmamap, BUS_DMASYNC_PREWRITE);
+ txd[l].tx_dmamap, BUS_DMASYNC_PREWRITE);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
- sc->rl_ldata.rl_tx_prodidx = kring->nr_hwcur = ring->cur;
+ sc->rl_ldata.rl_tx_prodidx = l;
+ kring->nr_hwcur = k;
/* decrease avail by number of sent packets */
ring->avail -= n;
@@ -243,15 +235,6 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock)
/*
* Reconcile kernel and user view of the receive ring.
- *
- * Userspace has read rx slots up to cur (excluded).
- * The last unread slot previously known to the kernel was nr_hwcur,
- * and the last interrupt reported nr_hwavail slots available.
- * We must subtract the newly consumed slots (cur - nr_hwcur)
- * from nr_hwavail, clearing the descriptors for the next
- * read, tell the hardware that they are available,
- * and set nr_hwcur = cur and avail = nr_hwavail.
- * issuing a dmamap_sync on all slots.
*/
static int
re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
@@ -261,10 +244,10 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(sc->rl_ifp);
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n, lim = kring->nkr_num_slots - 1;
k = ring->cur;
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ if (k > lim)
return netmap_ring_reinit(kring);
if (do_lock)
@@ -280,9 +263,10 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
* cleared (all buffers could have it cleared. The easiest one
* is to limit the amount of data reported up to 'lim'
*/
- j = sc->rl_ldata.rl_rx_prodidx;
+ l = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */
+ j = l + kring->nkr_hwofs;
for (n = kring->nr_hwavail; n < lim ; n++) {
- struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[j];
+ struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[l];
uint32_t rxstat = le32toh(cur_rx->rl_cmdstat);
uint32_t total_len;
@@ -294,11 +278,12 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
kring->ring->slot[j].len = total_len;
/* sync was in re_newbuf() */
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
- rxd[j].rx_dmamap, BUS_DMASYNC_POSTREAD);
- j = RL_RX_DESC_NXT(sc, j);
+ rxd[l].rx_dmamap, BUS_DMASYNC_POSTREAD);
+ j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
}
if (n != kring->nr_hwavail) {
- sc->rl_ldata.rl_rx_prodidx = j;
+ sc->rl_ldata.rl_rx_prodidx = l;
sc->rl_ifp->if_ipackets += n - kring->nr_hwavail;
kring->nr_hwavail = n;
}
@@ -312,9 +297,12 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
j = kring->nr_hwcur;
if (j != k) { /* userspace has read some packets. */
n = 0;
+ l = kring->nr_hwcur - kring->nkr_hwofs;
+ if (l < 0)
+ l += lim + 1;
while (j != k) {
struct netmap_slot *slot = ring->slot + j;
- struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[j];
+ struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[l];
int cmd = na->buff_size | RL_RDESC_CMD_OWN;
void *addr = NMB(slot);
@@ -324,7 +312,7 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
return netmap_ring_reinit(kring);
}
- if (j == lim) /* mark end of ring */
+ if (l == lim) /* mark end of ring */
cmd |= RL_RDESC_CMD_EOR;
desc->rl_cmdstat = htole32(cmd);
@@ -334,12 +322,13 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
- rxd[j].rx_dmamap, addr, na->buff_size);
+ rxd[l].rx_dmamap, addr, na->buff_size);
slot->flags &= ~NS_BUF_CHANGED;
}
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
- rxd[j].rx_dmamap, BUS_DMASYNC_PREREAD);
+ rxd[l].rx_dmamap, BUS_DMASYNC_PREREAD);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
kring->nr_hwavail -= n;
@@ -351,18 +340,22 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
}
/* tell userspace that there are new packets */
- ring->avail = kring->nr_hwavail ;
+ ring->avail = kring->nr_hwavail;
if (do_lock)
RL_UNLOCK(sc);
return 0;
}
+/*
+ * Additional routines to init the tx and rx rings.
+ * In other drivers we do that inline in the main code.
+ */
static void
re_netmap_tx_init(struct rl_softc *sc)
{
struct rl_txdesc *txd;
struct rl_desc *desc;
- int i;
+ int i, n;
struct netmap_adapter *na = NA(sc->rl_ifp);
struct netmap_slot *slot = netmap_reset(na, NR_TX, 0, 0);
@@ -372,11 +365,20 @@ re_netmap_tx_init(struct rl_softc *sc)
/* in netmap mode, overwrite addresses and maps */
txd = sc->rl_ldata.rl_tx_desc;
desc = sc->rl_ldata.rl_tx_list;
+ n = sc->rl_ldata.rl_tx_desc_cnt;
+
+ /* l points in the netmap ring, i points in the NIC ring */
+ for (i = 0; i < n; i++) {
+ void *addr;
+ uint64_t paddr;
+ struct netmap_kring *kring = &na->tx_rings[0];
+ int l = i + kring->nkr_hwofs;
- for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
- void *addr = NMB(slot+i);
- uint64_t paddr = vtophys(addr);
+ if (l >= n)
+ l -= n;
+ addr = NMB(slot + l);
+ paddr = vtophys(addr);
desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc[i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
netmap_load_map(sc->rl_ldata.rl_tx_mtag,
@@ -387,26 +389,39 @@ re_netmap_tx_init(struct rl_softc *sc)
static void
re_netmap_rx_init(struct rl_softc *sc)
{
- /* slot is NULL if we are not in netmap mode */
struct netmap_adapter *na = NA(sc->rl_ifp);
struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0);
struct rl_desc *desc = sc->rl_ldata.rl_rx_list;
uint32_t cmdstat;
- int i;
+ int i, n;
if (!slot)
return;
-
- for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
- void *addr = NMB(slot+i);
- uint64_t paddr = vtophys(addr);
-
+ n = sc->rl_ldata.rl_rx_desc_cnt;
+ for (i = 0; i < n; i++) {
+ void *addr;
+ uint64_t paddr;
+ struct netmap_kring *kring = &na->rx_rings[0];
+ int l = i + kring->nkr_hwofs;
+
+ if (l >= n)
+ l -= n;
+
+ addr = NMB(slot + l);
+ paddr = vtophys(addr);
desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc[i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
- cmdstat = slot[i].len = na->buff_size; // XXX
- if (i == sc->rl_ldata.rl_rx_desc_cnt - 1)
+ cmdstat = na->buff_size;
+ if (i == n - 1)
cmdstat |= RL_RDESC_CMD_EOR;
- desc[i].rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
+ /*
+ * userspace knows that hwavail packets were ready before the
+ * reset, so we need to tell the NIC that last hwavail
+ * descriptors of the ring are still owned by the driver.
+ */
+ if (i < n - 1 - kring->nr_hwavail) // XXX + 1 ?
+ cmdstat |= RL_RDESC_CMD_OWN;
+ desc[i].rl_cmdstat = htole32(cmdstat);
netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
sc->rl_ldata.rl_rx_desc[i].rx_dmamap,
diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h
index a4d5491..6c8b2b6 100644
--- a/sys/dev/netmap/ixgbe_netmap.h
+++ b/sys/dev/netmap/ixgbe_netmap.h
@@ -25,25 +25,48 @@
/*
* $FreeBSD$
- * $Id: ixgbe_netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ * $Id: ixgbe_netmap.h 9802 2011-12-02 18:42:37Z luigi $
*
* netmap modifications for ixgbe
+ *
+ * This file is meant to be a reference on how to implement
+ * netmap support for a network driver.
+ * This file contains code but only static or inline functions
+ * that are used by a single driver. To avoid replication of
+ * code we just #include it near the beginning of the
+ * standard driver.
*/
#include <net/netmap.h>
#include <sys/selinfo.h>
-// #include <vm/vm.h>
-// #include <vm/pmap.h> /* vtophys ? */
+/*
+ * Some drivers may need the following headers. Others
+ * already include them by default
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+ */
+
#include <dev/netmap/netmap_kern.h>
+/*
+ * prototypes for the new API calls that are used by the
+ * *_netmap_attach() routine.
+ */
static int ixgbe_netmap_reg(struct ifnet *, int onoff);
static int ixgbe_netmap_txsync(void *, u_int, int);
static int ixgbe_netmap_rxsync(void *, u_int, int);
static void ixgbe_netmap_lock_wrapper(void *, int, u_int);
-SYSCTL_NODE(_dev, OID_AUTO, ixgbe, CTLFLAG_RW, 0, "ixgbe card");
-
+/*
+ * The attach routine, called near the end of ixgbe_attach(),
+ * fills the parameters for netmap_attach() and calls it.
+ * It cannot fail, in the worst case (such as no memory)
+ * netmap mode will be disabled and the driver will only
+ * operate in standard mode.
+ */
static void
ixgbe_netmap_attach(struct adapter *adapter)
{
@@ -52,7 +75,7 @@ ixgbe_netmap_attach(struct adapter *adapter)
bzero(&na, sizeof(na));
na.ifp = adapter->ifp;
- na.separate_locks = 1;
+ na.separate_locks = 1; /* this card has separate rx/tx locks */
na.num_tx_desc = adapter->num_tx_desc;
na.num_rx_desc = adapter->num_rx_desc;
na.nm_txsync = ixgbe_netmap_txsync;
@@ -60,17 +83,18 @@ ixgbe_netmap_attach(struct adapter *adapter)
na.nm_lock = ixgbe_netmap_lock_wrapper;
na.nm_register = ixgbe_netmap_reg;
/*
+ * XXX where do we put this comment ?
* adapter->rx_mbuf_sz is set by SIOCSETMTU, but in netmap mode
* we allocate the buffers on the first register. So we must
* disallow a SIOCSETMTU when if_capenable & IFCAP_NETMAP is set.
*/
- na.buff_size = MCLBYTES;
+ na.buff_size = NETMAP_BUF_SIZE;
netmap_attach(&na, adapter->num_queues);
}
/*
- * wrapper to export locks to the generic code
+ * wrapper to export locks to the generic netmap code.
*/
static void
ixgbe_netmap_lock_wrapper(void *_a, int what, u_int queueid)
@@ -102,8 +126,8 @@ ixgbe_netmap_lock_wrapper(void *_a, int what, u_int queueid)
/*
- * support for netmap register/unregisted. We are already under core lock.
- * only called on the first init or the last unregister.
+ * Netmap register/unregister. We are already under core lock.
+ * Only called on the first register or the last unregister.
*/
static int
ixgbe_netmap_reg(struct ifnet *ifp, int onoff)
@@ -112,7 +136,7 @@ ixgbe_netmap_reg(struct ifnet *ifp, int onoff)
struct netmap_adapter *na = NA(ifp);
int error = 0;
- if (!na)
+ if (!na) /* probably, netmap_attach() failed */
return EINVAL;
ixgbe_disable_intr(adapter);
@@ -120,23 +144,28 @@ ixgbe_netmap_reg(struct ifnet *ifp, int onoff)
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
- if (onoff) {
+ if (onoff) { /* enable netmap mode */
ifp->if_capenable |= IFCAP_NETMAP;
- /* save if_transmit to restore it later */
+ /* save if_transmit and replace with our routine */
na->if_transmit = ifp->if_transmit;
ifp->if_transmit = netmap_start;
+ /*
+ * reinitialize the adapter, now with netmap flag set,
+ * so the rings will be set accordingly.
+ */
ixgbe_init_locked(adapter);
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
error = ENOMEM;
goto fail;
}
- } else {
+ } else { /* reset normal mode (explicit request or netmap failed) */
fail:
/* restore if_transmit */
ifp->if_transmit = na->if_transmit;
ifp->if_capenable &= ~IFCAP_NETMAP;
+ /* initialize the card, this time in standard mode */
ixgbe_init_locked(adapter); /* also enables intr */
}
return (error);
@@ -145,21 +174,23 @@ fail:
/*
* Reconcile kernel and user view of the transmit ring.
+ * This routine might be called frequently so it must be efficient.
+ *
+ * Userspace has filled tx slots up to ring->cur (excluded).
+ * The last unused slot previously known to the kernel was kring->nkr_hwcur,
+ * and the last interrupt reported kring->nr_hwavail slots available.
*
- * Userspace has filled tx slots up to cur (excluded).
- * The last unused slot previously known to the kernel was nr_hwcur,
- * and the last interrupt reported nr_hwavail slots available
- * (using the special value -1 to indicate idle transmit ring).
- * The function must first update avail to what the kernel
- * knows, subtract the newly used slots (cur - nr_hwcur)
- * from both avail and nr_hwavail, and set nr_hwcur = cur
+ * This function runs under lock (acquired from the caller or internally).
+ * It must first update ring->avail to what the kernel knows,
+ * subtract the newly used slots (ring->cur - kring->nkr_hwcur)
+ * from both avail and nr_hwavail, and set ring->nkr_hwcur = ring->cur
* issuing a dmamap_sync on all slots.
*
- * Check parameters in the struct netmap_ring.
- * We don't use avail, only check for bogus values.
- * Make sure cur is valid, and same goes for buffer indexes and lengths.
- * To avoid races, read the values once, and never use those from
- * the ring afterwards.
+ * Since ring comes from userspace, its content must be read only once,
+ * and validated before being used to update the kernel's structures.
+ * (this is also true for every use of ring in the kernel).
+ *
+ * ring->avail is never used, only checked for bogus values.
*/
static int
ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock)
@@ -169,42 +200,96 @@ ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, n = 0, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
- /* generate an interrupt approximately every half ring */
+ /*
+ * ixgbe can generate an interrupt on every tx packet, but it
+ * seems very expensive, so we interrupt once every half ring,
+ * or when requested with NS_REPORT
+ */
int report_frequency = kring->nkr_num_slots >> 1;
- k = ring->cur; /* ring is not protected by any lock */
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
- return netmap_ring_reinit(kring);
-
if (do_lock)
IXGBE_TX_LOCK(txr);
+ /* take a copy of ring->cur now, and never read it again */
+ k = ring->cur;
+ l = k - kring->nr_hwcur;
+ if (l < 0)
+ l += lim + 1;
+ /* if cur is invalid reinitialize the ring. */
+ if (k > lim || l > kring->nr_hwavail) {
+ if (do_lock)
+ IXGBE_TX_UNLOCK(txr);
+ return netmap_ring_reinit(kring);
+ }
+
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
- /* update avail to what the hardware knows */
- ring->avail = kring->nr_hwavail;
-
+ /*
+ * Process new packets to send. j is the current index in the
+ * netmap ring, l is the corresponding index in the NIC ring.
+ * The two numbers differ because upon a *_init() we reset
+ * the NIC ring but leave the netmap ring unchanged.
+ * For the transmit ring, we have
+ *
+ * j = kring->nr_hwcur
+ * l = IXGBE_TDT (not tracked in the driver)
+ * and
+ * j == (l + kring->nkr_hwofs) % ring_size
+ *
+ * In this driver kring->nkr_hwofs >= 0, but for other
+ * drivers it might be negative as well.
+ */
j = kring->nr_hwcur;
if (j != k) { /* we have new packets to send */
+ l = j - kring->nkr_hwofs;
+ if (l < 0) /* wraparound */
+ l += lim + 1;
+
while (j != k) {
+ /*
+ * Collect per-slot info.
+ * Note that txbuf and curr are indexed by l.
+ *
+ * In this driver we collect the buffer address
+ * (using the NMB() macro) because we always
+ * need to rewrite it into the NIC ring.
+ * Many other drivers preserve the address, so
+ * we only need to access it if NS_BUF_CHANGED
+ * is set.
+ */
struct netmap_slot *slot = &ring->slot[j];
- struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[j];
- union ixgbe_adv_tx_desc *curr = &txr->tx_base[j];
+ struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[l];
+ union ixgbe_adv_tx_desc *curr = &txr->tx_base[l];
void *addr = NMB(slot);
+ // XXX type for flags and len ?
int flags = ((slot->flags & NS_REPORT) ||
j == 0 || j == report_frequency) ?
IXGBE_TXD_CMD_RS : 0;
int len = slot->len;
+ /*
+ * Quick check for valid addr and len.
+ * NMB() returns netmap_buffer_base for invalid
+ * buffer indexes (but the address is still a
+ * valid one to be used in a ring). slot->len is
+ * unsigned so no need to check for negative values.
+ */
if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
+ring_reset:
if (do_lock)
IXGBE_TX_UNLOCK(txr);
return netmap_ring_reinit(kring);
}
slot->flags &= ~NS_REPORT;
+ /*
+ * Fill the slot in the NIC ring.
+ * In this driver we need to rewrite the buffer
+ * address in the NIC ring. Other drivers do not
+ * need this.
+ */
curr->read.buffer_addr = htole64(vtophys(addr));
curr->read.olinfo_status = 0;
curr->read.cmd_type_len =
@@ -212,6 +297,10 @@ ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock)
(IXGBE_ADVTXD_DTYP_DATA |
IXGBE_ADVTXD_DCMD_IFCS |
IXGBE_TXD_CMD_EOP | flags) );
+ /* If the buffer has changed, unload and reload map
+ * (and possibly the physical address in the NIC
+ * slot, but we did it already).
+ */
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, unload and reload map */
netmap_reload_map(txr->txtag, txbuf->map,
@@ -219,69 +308,89 @@ ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock)
slot->flags &= ~NS_BUF_CHANGED;
}
+ /* make sure changes to the buffer are synced */
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
- kring->nr_hwcur = k;
+ kring->nr_hwcur = k; /* the saved ring->cur */
/* decrease avail by number of sent packets */
- ring->avail -= n;
- kring->nr_hwavail = ring->avail;
+ kring->nr_hwavail -= n;
+ /* synchronize the NIC ring */
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
-
- IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), k);
+ /* (re)start the transmitter up to slot l (excluded) */
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), l);
}
+ /*
+ * If no packets are sent, or there is no room in the tx ring,
+ * Check whether there are completed transmissions.
+ * Because this is expensive (we need a register etc.)
+ * we only do it if absolutely necessary, i.e. there is no room
+ * in the tx ring, or where were no completed transmissions
+ * (meaning that probably the caller really wanted to check
+ * for completed transmissions).
+ */
if (n == 0 || kring->nr_hwavail < 1) {
- /* record completed transmissions. TODO
+ int delta;
+
+ /*
+ * Record completed transmissions.
+ * We (re)use the driver's txr->next_to_clean to keep
+ * track of the most recently completed transmission.
*
* The datasheet discourages the use of TDH to find out the
- * number of sent packets; the right way to do so, is to check
- * the DD bit inside the status of a packet descriptor. On the
- * other hand, we avoid to set the `report status' bit for
- * *all* outgoing packets (kind of interrupt mitigation),
- * consequently the DD bit is not guaranteed to be set for all
- * the packets: thats way, for the moment we continue to use
- * TDH.
+ * number of sent packets. We should rather check the DD
+ * status bit in a packet descriptor. However, we only set
+ * the "report status" bit for some descriptors (a kind of
+ * interrupt mitigation), so we can only check on those.
+ * For the time being we use TDH, as we do it infrequently
+ * enough not to pose performance problems.
*/
- j = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr));
- if (j >= kring->nkr_num_slots) { /* XXX can happen */
- D("TDH wrap %d", j);
- j -= kring->nkr_num_slots;
+ l = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr));
+ if (l >= kring->nkr_num_slots) { /* XXX can happen */
+ D("TDH wrap %d", l);
+ l -= kring->nkr_num_slots;
}
- int delta = j - txr->next_to_clean;
+ delta = l - txr->next_to_clean;
if (delta) {
- /* new transmissions were completed, increment
- ring->nr_hwavail. */
+ /* some tx completed, increment avail */
if (delta < 0)
delta += kring->nkr_num_slots;
- txr->next_to_clean = j;
+ txr->next_to_clean = l;
kring->nr_hwavail += delta;
- ring->avail = kring->nr_hwavail;
+ if (kring->nr_hwavail > lim)
+ goto ring_reset;
}
}
+ /* update avail to what the kernel knows */
+ ring->avail = kring->nr_hwavail;
if (do_lock)
IXGBE_TX_UNLOCK(txr);
return 0;
+
}
/*
* Reconcile kernel and user view of the receive ring.
+ * Same as for the txsync, this routine must be efficient and
+ * avoid races in accessing the shared regions.
+ *
+ * When called, userspace has read data from slots kring->nr_hwcur
+ * up to ring->cur (excluded).
*
- * Userspace has read rx slots up to cur (excluded).
- * The last unread slot previously known to the kernel was nr_hwcur,
- * and the last interrupt reported nr_hwavail slots available.
+ * The last interrupt reported kring->nr_hwavail slots available
+ * after kring->nr_hwcur.
* We must subtract the newly consumed slots (cur - nr_hwcur)
- * from nr_hwavail, clearing the descriptors for the next
- * read, tell the hardware that they are available,
- * and set nr_hwcur = cur and avail = nr_hwavail.
- * issuing a dmamap_sync on all slots.
+ * from nr_hwavail, make the descriptors available for the next reads,
+ * and set kring->nr_hwcur = ring->cur and ring->avail = kring->nr_hwavail.
*/
static int
ixgbe_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
@@ -291,86 +400,123 @@ ixgbe_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n, lim = kring->nkr_num_slots - 1;
- k = ring->cur; /* ring is not protected by any lock */
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ k = ring->cur; /* cache and check value, same as in txsync */
+ n = k - kring->nr_hwcur;
+ if (n < 0)
+ n += lim + 1;
+ if (k > lim || n > kring->nr_hwavail) /* userspace is cheating */
return netmap_ring_reinit(kring);
if (do_lock)
IXGBE_RX_LOCK(rxr);
+ if (n < 0)
+ n += lim + 1;
/* XXX check sync modes */
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
- j = rxr->next_to_check;
+ /*
+ * First part, import newly received packets into the netmap ring.
+ *
+ * j is the index of the next free slot in the netmap ring,
+ * and l is the index of the next received packet in the NIC ring,
+ * and they may differ in case if_init() has been called while
+ * in netmap mode. For the receive ring we have
+ *
+ * j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size
+ * l = rxr->next_to_check;
+ * and
+ * j == (l + kring->nkr_hwofs) % ring_size
+ *
+ * rxr->next_to_check is set to 0 on a ring reinit
+ */
+ l = rxr->next_to_check;
+ j = rxr->next_to_check + kring->nkr_hwofs;
+ if (j > lim)
+ j -= lim + 1;
+
for (n = 0; ; n++) {
- union ixgbe_adv_rx_desc *curr = &rxr->rx_base[j];
+ union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l];
uint32_t staterr = le32toh(curr->wb.upper.status_error);
if ((staterr & IXGBE_RXD_STAT_DD) == 0)
break;
ring->slot[j].len = le16toh(curr->wb.upper.length);
bus_dmamap_sync(rxr->ptag,
- rxr->rx_buffers[j].pmap, BUS_DMASYNC_POSTREAD);
+ rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
}
- if (n) {
- rxr->next_to_check = j;
+ if (n) { /* update the state variables */
+ rxr->next_to_check = l;
kring->nr_hwavail += n;
- if (kring->nr_hwavail >= lim - 10) {
- ND("rx ring %d almost full %d", ring_nr, kring->nr_hwavail);
- }
}
- /* skip past packets that userspace has already processed,
- * making them available for reception.
- * advance nr_hwcur and issue a bus_dmamap_sync on the
- * buffers so it is safe to write to them.
- * Also increase nr_hwavail
+ /*
+ * Skip past packets that userspace has already processed
+ * (from kring->nr_hwcur to ring->cur excluded), and make
+ * the buffers available for reception.
+ * As usual j is the index in the netmap ring, l is the index
+ * in the NIC ring, and j == (l + kring->nkr_hwofs) % ring_size
*/
j = kring->nr_hwcur;
if (j != k) { /* userspace has read some packets. */
n = 0;
+ l = kring->nr_hwcur - kring->nkr_hwofs;
+ if (l < 0)
+ l += lim + 1;
while (j != k) {
- struct netmap_slot *slot = ring->slot + j;
- union ixgbe_adv_rx_desc *curr = &rxr->rx_base[j];
- struct ixgbe_rx_buf *rxbuf = rxr->rx_buffers + j;
+ /* collect per-slot info, with similar validations
+ * and flag handling as in the txsync code.
+ *
+ * NOTE curr and rxbuf are indexed by l.
+ * Also, this driver needs to update the physical * address in the NIC ring, but other drivers
+ * may not have this requirement.
+ */
+ struct netmap_slot *slot = &ring->slot[j];
+ union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l];
+ struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[l];
void *addr = NMB(slot);
- if (addr == netmap_buffer_base) { /* bad buf */
- if (do_lock)
- IXGBE_RX_UNLOCK(rxr);
- return netmap_ring_reinit(kring);
- }
+ if (addr == netmap_buffer_base) /* bad buf */
+ goto ring_reset;
curr->wb.upper.status_error = 0;
curr->read.pkt_addr = htole64(vtophys(addr));
if (slot->flags & NS_BUF_CHANGED) {
netmap_reload_map(rxr->ptag, rxbuf->pmap,
- addr, na->buff_size);
+ addr, na->buff_size);
slot->flags &= ~NS_BUF_CHANGED;
}
bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
- BUS_DMASYNC_PREREAD);
+ BUS_DMASYNC_PREREAD);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
kring->nr_hwavail -= n;
- kring->nr_hwcur = ring->cur;
+ kring->nr_hwcur = k;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* IMPORTANT: we must leave one free slot in the ring,
- * so move j back by one unit
+ * so move l back by one unit
*/
- j = (j == 0) ? lim : j - 1;
- IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), j);
+ l = (l == 0) ? lim : l - 1;
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), l);
}
/* tell userspace that there are new packets */
ring->avail = kring->nr_hwavail ;
if (do_lock)
IXGBE_RX_UNLOCK(rxr);
return 0;
+
+ring_reset:
+ if (do_lock)
+ IXGBE_RX_UNLOCK(rxr);
+ return netmap_ring_reinit(kring);
}
+/* end of file */
diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
index fef8516..34a0627 100644
--- a/sys/dev/netmap/netmap.c
+++ b/sys/dev/netmap/netmap.c
@@ -1,15 +1,15 @@
/*
* Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -25,7 +25,7 @@
/*
* $FreeBSD$
- * $Id: netmap.c 9662 2011-11-16 13:18:06Z luigi $
+ * $Id: netmap.c 9795 2011-12-02 11:39:08Z luigi $
*
* This module supports memory mapped access to network devices,
* see netmap(4).
@@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$");
#include <sys/module.h>
#include <sys/errno.h>
#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/jail.h>
#include <sys/kernel.h> /* types used in module initialization */
#include <sys/conf.h> /* cdevsw struct */
#include <sys/uio.h> /* uio struct */
@@ -70,6 +71,7 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/mman.h> /* PROT_EXEC */
#include <sys/poll.h>
+#include <sys/proc.h>
#include <vm/vm.h> /* vtophys */
#include <vm/pmap.h> /* vtophys */
#include <sys/socket.h> /* sockaddrs */
@@ -78,6 +80,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/bpf.h> /* BIOCIMMEDIATE */
+#include <net/vnet.h>
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <machine/bus.h> /* bus_dmamap_* */
@@ -678,6 +681,13 @@ get_ifp(const char *name, struct ifnet **ifp)
* Error routine called when txsync/rxsync detects an error.
* Can't do much more than resetting cur = hwcur, avail = hwavail.
* Return 1 on reinit.
+ *
+ * This routine is only called by the upper half of the kernel.
+ * It only reads hwcur (which is changed only by the upper half, too)
+ * and hwavail (which may be changed by the lower half, but only on
+ * a tx ring and only to increase it, so any error will be recovered
+ * on the next call). For the above, we don't strictly need to call
+ * it under lock.
*/
int
netmap_ring_reinit(struct netmap_kring *kring)
@@ -717,29 +727,10 @@ netmap_ring_reinit(struct netmap_kring *kring)
ring->avail, kring->nr_hwavail);
ring->cur = kring->nr_hwcur;
ring->avail = kring->nr_hwavail;
- ring->flags |= NR_REINIT;
- kring->na->flags |= NR_REINIT;
}
return (errors ? 1 : 0);
}
-/*
- * Clean the reinit flag for our rings.
- * XXX at the moment, clear for all rings
- */
-static void
-netmap_clean_reinit(struct netmap_adapter *na)
-{
- //struct netmap_kring *kring;
- u_int i;
-
- na->flags &= ~NR_REINIT;
- D("--- NR_REINIT reset on %s", na->ifp->if_xname);
- for (i = 0; i < na->num_queues + 1; i++) {
- na->tx_rings[i].ring->flags &= ~NR_REINIT;
- na->rx_rings[i].ring->flags &= ~NR_REINIT;
- }
-}
/*
* Set the ring ID. For devices with a single queue, a request
@@ -801,7 +792,7 @@ netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
*/
static int
netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data,
- __unused int fflag, __unused struct thread *td)
+ __unused int fflag, struct thread *td)
{
struct netmap_priv_d *priv = NULL;
struct ifnet *ifp;
@@ -812,9 +803,13 @@ netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data,
u_int i;
struct netmap_if *nifp;
+ CURVNET_SET(TD_TO_VNET(td));
+
error = devfs_get_cdevpriv((void **)&priv);
- if (error != ENOENT && error != 0)
+ if (error != ENOENT && error != 0) {
+ CURVNET_RESTORE();
return (error);
+ }
error = 0; /* Could be ENOENT */
switch (cmd) {
@@ -836,8 +831,10 @@ netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data,
break;
case NIOCREGIF:
- if (priv != NULL) /* thread already registered */
- return netmap_set_ringid(priv, nmr->nr_ringid);
+ if (priv != NULL) { /* thread already registered */
+ error = netmap_set_ringid(priv, nmr->nr_ringid);
+ break;
+ }
/* find the interface and a reference */
error = get_ifp(nmr->nr_name, &ifp); /* keep reference */
if (error)
@@ -927,8 +924,10 @@ error:
break;
case NIOCUNREGIF:
- if (priv == NULL)
- return (ENXIO);
+ if (priv == NULL) {
+ error = ENXIO;
+ break;
+ }
/* the interface is unregistered inside the
destructor of the private data. */
@@ -937,22 +936,21 @@ error:
case NIOCTXSYNC:
case NIOCRXSYNC:
- if (priv == NULL)
- return (ENXIO);
+ if (priv == NULL) {
+ error = ENXIO;
+ break;
+ }
ifp = priv->np_ifp; /* we have a reference */
na = NA(ifp); /* retrieve netmap adapter */
adapter = ifp->if_softc; /* shorthand */
- if (na->flags & NR_REINIT)
- netmap_clean_reinit(na);
-
if (priv->np_qfirst == na->num_queues) {
/* queues to/from host */
if (cmd == NIOCTXSYNC)
netmap_sync_to_host(na);
else
netmap_sync_from_host(na, NULL);
- return error;
+ break;
}
for (i = priv->np_qfirst; i < priv->np_qlast; i++) {
@@ -999,6 +997,7 @@ error:
}
}
+ CURVNET_RESTORE();
return (error);
}
@@ -1039,13 +1038,6 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td)
adapter = ifp->if_softc;
na = NA(ifp); /* retrieve netmap adapter */
- /* pending reinit, report up as a poll error. Pending
- * reads and writes are lost.
- */
- if (na->flags & NR_REINIT) {
- netmap_clean_reinit(na);
- revents |= POLLERR;
- }
/* how many queues we are scanning */
i = priv->np_qfirst;
if (i == na->num_queues) { /* from/to host */
@@ -1111,20 +1103,20 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td)
* data available. If this fails, then lock and call the sync
* routines.
*/
- for (i = priv->np_qfirst; want_rx && i < priv->np_qlast; i++) {
- kring = &na->rx_rings[i];
- if (kring->ring->avail > 0) {
- revents |= want_rx;
- want_rx = 0; /* also breaks the loop */
+ for (i = priv->np_qfirst; want_rx && i < priv->np_qlast; i++) {
+ kring = &na->rx_rings[i];
+ if (kring->ring->avail > 0) {
+ revents |= want_rx;
+ want_rx = 0; /* also breaks the loop */
+ }
}
- }
- for (i = priv->np_qfirst; want_tx && i < priv->np_qlast; i++) {
- kring = &na->tx_rings[i];
- if (kring->ring->avail > 0) {
- revents |= want_tx;
- want_tx = 0; /* also breaks the loop */
+ for (i = priv->np_qfirst; want_tx && i < priv->np_qlast; i++) {
+ kring = &na->tx_rings[i];
+ if (kring->ring->avail > 0) {
+ revents |= want_tx;
+ want_tx = 0; /* also breaks the loop */
+ }
}
- }
/*
* If we to push packets out (priv->np_txpoll) or want_tx is
@@ -1326,24 +1318,6 @@ done:
* netmap_reset() is called by the driver routines when reinitializing
* a ring. The driver is in charge of locking to protect the kring.
* If netmap mode is not set just return NULL.
- * Otherwise set NR_REINIT (in the ring and in na) to signal
- * that a ring has been reinitialized,
- * set cur = hwcur = 0 and avail = hwavail = num_slots - 1 .
- * IT IS IMPORTANT to leave one slot free even in the tx ring because
- * we rely on cur=hwcur only for empty rings.
- * These are good defaults but can be overridden later in the device
- * specific code if, after a reinit, the ring does not start from 0
- * (e.g. if_em.c does this).
- *
- * XXX we shouldn't be touching the ring, but there is a
- * race anyways and this is our best option.
- *
- * XXX setting na->flags makes the syscall code faster, as there is
- * only one place to check. On the other hand, we will need a better
- * way to notify multiple threads that rings have been reset.
- * One way is to increment na->rst_count at each ring reset.
- * Each thread in its own priv structure will keep a matching counter,
- * and on a reset will acknowledge and clean its own rings.
*/
struct netmap_slot *
netmap_reset(struct netmap_adapter *na, enum txrx tx, int n,
@@ -1351,8 +1325,7 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, int n,
{
struct netmap_kring *kring;
struct netmap_ring *ring;
- struct netmap_slot *slot;
- u_int i;
+ int new_hwofs, lim;
if (na == NULL)
return NULL; /* no netmap support here */
@@ -1360,74 +1333,26 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, int n,
return NULL; /* nothing to reinitialize */
kring = tx == NR_TX ? na->tx_rings + n : na->rx_rings + n;
ring = kring->ring;
- if (tx == NR_TX) {
- /*
- * The last argument is the new value of next_to_clean.
- *
- * In the TX ring, we have P pending transmissions (from
- * next_to_clean to nr_hwcur) followed by nr_hwavail free slots.
- * Generally we can use all the slots in the ring so
- * P = ring_size - nr_hwavail hence (modulo ring_size):
- * next_to_clean == nr_hwcur + nr_hwavail
- *
- * If, upon a reset, nr_hwavail == ring_size and next_to_clean
- * does not change we have nothing to report. Otherwise some
- * pending packets may be lost, or newly injected packets will.
- */
- /* if hwcur does not change, nothing to report.
- * otherwise remember the change so perhaps we can
- * shift the block at the next reinit
- */
- if (new_cur == kring->nr_hwcur &&
- kring->nr_hwavail == kring->nkr_num_slots - 1) {
- /* all ok */
- D("+++ NR_REINIT ok on %s TX[%d]", na->ifp->if_xname, n);
- } else {
- D("+++ NR_REINIT set on %s TX[%d]", na->ifp->if_xname, n);
- }
- ring->flags |= NR_REINIT;
- na->flags |= NR_REINIT;
- ring->avail = kring->nr_hwavail = kring->nkr_num_slots - 1;
- ring->cur = kring->nr_hwcur = new_cur;
- } else {
- /*
- * The last argument is the next free slot.
- * In the RX ring we have nr_hwavail full buffers starting
- * from nr_hwcur.
- * If nr_hwavail == 0 and nr_hwcur does not change we are ok
- * otherwise we might be in trouble as the buffers are
- * changing.
- */
- if (new_cur == kring->nr_hwcur && kring->nr_hwavail == 0) {
- /* all ok */
- D("+++ NR_REINIT ok on %s RX[%d]", na->ifp->if_xname, n);
- } else {
- D("+++ NR_REINIT set on %s RX[%d]", na->ifp->if_xname, n);
- }
- ring->flags |= NR_REINIT;
- na->flags |= NR_REINIT;
- ring->avail = kring->nr_hwavail = 0; /* no data */
- ring->cur = kring->nr_hwcur = new_cur;
- }
+ lim = kring->nkr_num_slots - 1;
+
+ if (tx == NR_TX)
+ new_hwofs = kring->nr_hwcur - new_cur;
+ else
+ new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur;
+ if (new_hwofs > lim)
+ new_hwofs -= lim + 1;
+
+ /* Alwayws set the new offset value and realign the ring. */
+ kring->nkr_hwofs = new_hwofs;
+ if (tx == NR_TX)
+ kring->nr_hwavail = kring->nkr_num_slots - 1;
+ D("new hwofs %d on %s %s[%d]",
+ kring->nkr_hwofs, na->ifp->if_xname,
+ tx == NR_TX ? "TX" : "RX", n);
- slot = ring->slot;
/*
- * Check that buffer indexes are correct. If we find a
- * bogus value we are a bit in trouble because we cannot
- * recover easily. Best we can do is (probably) persistently
- * reset the ring.
- */
- for (i = 0; i < kring->nkr_num_slots; i++) {
- if (slot[i].buf_idx >= netmap_total_buffers) {
- D("invalid buf_idx %d at slot %d", slot[i].buf_idx, i);
- slot[i].buf_idx = 0; /* XXX reset */
- }
- /* XXX we don't really need to set the length */
- slot[i].len = 0;
- }
- /* wakeup possible waiters, both on the ring and on the global
- * selfd. Perhaps a bit early now but the device specific
- * routine is locked so hopefully we won't have a race.
+ * We do the wakeup here, but the ring is not yet reconfigured.
+ * However, we are under lock so there are no races.
*/
selwakeuppri(&kring->si, PI_NET);
selwakeuppri(&kring[na->num_queues + 1 - n].si, PI_NET);
diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
index 5434609..eb36ec6 100644
--- a/sys/dev/netmap/netmap_kern.h
+++ b/sys/dev/netmap/netmap_kern.h
@@ -1,15 +1,15 @@
/*
* Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -25,7 +25,7 @@
/*
* $FreeBSD$
- * $Id: netmap_kern.h 9662 2011-11-16 13:18:06Z luigi $
+ * $Id: netmap_kern.h 9795 2011-12-02 11:39:08Z luigi $
*
* The header contains the definitions of constants and function
* prototypes used only in kernelspace.
@@ -68,7 +68,7 @@ struct netmap_kring {
u_int nr_kflags;
u_int nkr_num_slots;
- u_int nkr_hwofs; /* offset between NIC and netmap ring */
+ int nkr_hwofs; /* offset between NIC and netmap ring */
struct netmap_adapter *na; // debugging
struct selinfo si; /* poll/select wait queue */
};
@@ -94,7 +94,7 @@ struct netmap_adapter {
u_int num_rx_desc;
u_int buff_size;
- u_int flags; /* NR_REINIT */
+ u_int flags;
/* tx_rings and rx_rings are private but allocated
* as a contiguous chunk of memory. Each array has
* N+1 entries, for the adapter queues and for the host queue.
diff --git a/sys/net/netmap.h b/sys/net/netmap.h
index be9c686..4dec1fd 100644
--- a/sys/net/netmap.h
+++ b/sys/net/netmap.h
@@ -32,7 +32,7 @@
/*
* $FreeBSD$
- * $Id: netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ * $Id: netmap.h 9753 2011-11-28 15:10:43Z luigi $
*
* This header contains the definitions of the constants and the
* structures needed by the ``netmap'' module, both kernel and
@@ -186,13 +186,6 @@ struct netmap_ring {
const uint16_t nr_buf_size;
uint16_t flags;
- /*
- * When a ring is reinitialized, the kernel sets kflags.
- * On exit from a syscall, if the flag is found set, we
- * also reinitialize the nr_* variables. The kflag is then
- * unconditionally copied to nr_flags and cleared.
- */
-#define NR_REINIT 0x0001 /* ring reinitialized! */
#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */
struct timeval ts; /* time of last *sync() */
diff --git a/tools/tools/netmap/pkt-gen.c b/tools/tools/netmap/pkt-gen.c
index 747bd9d..21dc8de 100644
--- a/tools/tools/netmap/pkt-gen.c
+++ b/tools/tools/netmap/pkt-gen.c
@@ -4,10 +4,10 @@
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
@@ -25,7 +25,7 @@
/*
* $FreeBSD$
- * $Id: pkt-gen.c 9638 2011-11-07 18:07:43Z luigi $
+ * $Id: pkt-gen.c 9827 2011-12-05 11:29:34Z luigi $
*
* Example program to show how to build a multithreaded packet
* source/sink using the netmap device.
@@ -45,6 +45,7 @@ const char *default_payload="netmap pkt-gen Luigi Rizzo and Matteo Landi\n"
#include <signal.h> /* signal */
#include <stdlib.h>
#include <stdio.h>
+#include <inttypes.h> /* PRI* macros */
#include <string.h> /* strcmp */
#include <fcntl.h> /* open */
#include <unistd.h> /* close */
@@ -616,7 +617,7 @@ tx_output(uint64_t sent, int size, double delta)
punit += 1;
}
- printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n",
+ printf("Sent %" PRIu64 " packets, %d bytes each, in %.2f seconds.\n",
sent, size, delta);
printf("Speed: %.2f%cpps. Bandwidth: %.2f%cbps.\n",
pps, units[punit], amount, units[aunit]);
@@ -636,7 +637,7 @@ rx_output(uint64_t received, double delta)
punit += 1;
}
- printf("Received %llu packets, in %.2f seconds.\n", received, delta);
+ printf("Received %" PRIu64 " packets, in %.2f seconds.\n", received, delta);
printf("Speed: %.2f%cpps.\n", pps, units[punit]);
}
@@ -971,7 +972,7 @@ main(int arc, char **argv)
if (pps < 10000)
continue;
pps = (my_count - prev)*1000000 / pps;
- D("%llu pps", pps);
+ D("%" PRIu64 " pps", pps);
prev = my_count;
toc = now;
if (done == g.nthreads)
OpenPOWER on IntegriCloud