summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/dev/ixgbe/ixgbe.c148
-rw-r--r--sys/dev/netmap/if_em_netmap.h167
-rw-r--r--sys/dev/netmap/if_igb_netmap.h137
-rw-r--r--sys/dev/netmap/if_lem_netmap.h241
-rw-r--r--sys/dev/netmap/if_re_netmap.h147
-rw-r--r--sys/dev/netmap/ixgbe_netmap.h340
-rw-r--r--sys/dev/netmap/netmap.c211
-rw-r--r--sys/dev/netmap/netmap_kern.h18
-rw-r--r--sys/net/netmap.h9
-rw-r--r--tools/tools/netmap/pkt-gen.c17
10 files changed, 837 insertions, 598 deletions
diff --git a/sys/dev/ixgbe/ixgbe.c b/sys/dev/ixgbe/ixgbe.c
index 9a709af..ae76e9f 100644
--- a/sys/dev/ixgbe/ixgbe.c
+++ b/sys/dev/ixgbe/ixgbe.c
@@ -313,6 +313,18 @@ static int atr_sample_rate = 20;
static int fdir_pballoc = 1;
#endif
+#ifdef DEV_NETMAP
+/*
+ * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
+ * be a reference on how to implement netmap support in a driver.
+ * Additional comments are in ixgbe_netmap.h .
+ *
+ * <dev/netma/ixgbe_netmap.h> contains functions for netmap support
+ * that extend the standard driver.
+ */
+#include <dev/netmap/ixgbe_netmap.h>
+#endif /* DEV_NETMAP */
+
/*********************************************************************
* Device identification routine
*
@@ -578,6 +590,9 @@ ixgbe_attach(device_t dev)
ixgbe_add_hw_stats(adapter);
+#ifdef DEV_NETMAP
+ ixgbe_netmap_attach(adapter);
+#endif /* DEV_NETMAP */
INIT_DEBUGOUT("ixgbe_attach: end");
return (0);
err_late:
@@ -652,6 +667,9 @@ ixgbe_detach(device_t dev)
ether_ifdetach(adapter->ifp);
callout_drain(&adapter->timer);
+#ifdef DEV_NETMAP
+ netmap_detach(adapter->ifp);
+#endif /* DEV_NETMAP */
ixgbe_free_pci_resources(adapter);
bus_generic_detach(dev);
if_free(adapter->ifp);
@@ -2813,9 +2831,20 @@ ixgbe_setup_transmit_ring(struct tx_ring *txr)
struct adapter *adapter = txr->adapter;
struct ixgbe_tx_buf *txbuf;
int i;
+#ifdef DEV_NETMAP
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_slot *slot;
+#endif /* DEV_NETMAP */
/* Clear the old ring contents */
IXGBE_TX_LOCK(txr);
+#ifdef DEV_NETMAP
+ /*
+ * (under lock): if in netmap mode, do some consistency
+ * checks and set slot to entry 0 of the netmap ring.
+ */
+ slot = netmap_reset(na, NR_TX, txr->me, 0);
+#endif /* DEV_NETMAP */
bzero((void *)txr->tx_base,
(sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
/* Reset indices */
@@ -2832,6 +2861,26 @@ ixgbe_setup_transmit_ring(struct tx_ring *txr)
m_freem(txbuf->m_head);
txbuf->m_head = NULL;
}
+#ifdef DEV_NETMAP
+ /*
+ * In netmap mode, set the map for the packet buffer.
+ * NOTE: Some drivers (not this one) also need to set
+ * the physical buffer address in the NIC ring.
+ * Slots in the netmap ring (indexed by "si") are
+ * kring->nkr_hwofs positions "ahead" wrt the
+ * corresponding slot in the NIC ring. In some drivers
+ * (not here) nkr_hwofs can be negative. When computing
+ * si = i + kring->nkr_hwofs make sure to handle wraparounds.
+ */
+ if (slot) {
+ int si = i + na->tx_rings[txr->me].nkr_hwofs;
+
+ if (si >= na->num_tx_desc)
+ si -= na->num_tx_desc;
+ netmap_load_map(txr->txtag, txbuf->map,
+ NMB(slot + si), na->buff_size);
+ }
+#endif /* DEV_NETMAP */
/* Clear the EOP index */
txbuf->eop_index = -1;
}
@@ -3310,6 +3359,29 @@ ixgbe_txeof(struct tx_ring *txr)
mtx_assert(&txr->tx_mtx, MA_OWNED);
+#ifdef DEV_NETMAP
+ if (ifp->if_capenable & IFCAP_NETMAP) {
+ struct netmap_adapter *na = NA(ifp);
+
+ /*
+ * In netmap mode, all the work is done in the context
+ * of the client thread. Interrupt handlers only wake up
+ * clients, which may be sleeping on individual rings
+ * or on a global resource for all rings.
+ * When the driver has separate locks, we need to
+ * release and re-acquire txlock to avoid deadlocks.
+ * XXX see if we can find a better way.
+ */
+ selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
+ IXGBE_TX_UNLOCK(txr);
+ IXGBE_CORE_LOCK(adapter);
+ selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET);
+ IXGBE_CORE_UNLOCK(adapter);
+ IXGBE_TX_LOCK(txr);
+ return FALSE;
+ }
+#endif /* DEV_NETMAP */
+
if (txr->tx_avail == adapter->num_tx_desc) {
txr->queue_status = IXGBE_QUEUE_IDLE;
return FALSE;
@@ -3698,6 +3770,10 @@ ixgbe_setup_receive_ring(struct rx_ring *rxr)
bus_dma_segment_t pseg[1], hseg[1];
struct lro_ctrl *lro = &rxr->lro;
int rsize, nsegs, error = 0;
+#ifdef DEV_NETMAP
+ struct netmap_adapter *na = NA(rxr->adapter->ifp);
+ struct netmap_slot *slot;
+#endif /* DEV_NETMAP */
adapter = rxr->adapter;
ifp = adapter->ifp;
@@ -3705,6 +3781,10 @@ ixgbe_setup_receive_ring(struct rx_ring *rxr)
/* Clear the ring contents */
IXGBE_RX_LOCK(rxr);
+#ifdef DEV_NETMAP
+ /* same as in ixgbe_setup_transmit_ring() */
+ slot = netmap_reset(na, NR_RX, rxr->me, 0);
+#endif /* DEV_NETMAP */
rsize = roundup2(adapter->num_rx_desc *
sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
bzero((void *)rxr->rx_base, rsize);
@@ -3721,6 +3801,29 @@ ixgbe_setup_receive_ring(struct rx_ring *rxr)
struct mbuf *mh, *mp;
rxbuf = &rxr->rx_buffers[j];
+#ifdef DEV_NETMAP
+ /*
+ * In netmap mode, fill the map and set the buffer
+ * address in the NIC ring, considering the offset
+ * between the netmap and NIC rings (see comment in
+ * ixgbe_setup_transmit_ring() ). No need to allocate
+ * an mbuf, so end the block with a continue;
+ */
+ if (slot) {
+ int sj = j + na->rx_rings[rxr->me].nkr_hwofs;
+ void *addr;
+
+ if (sj >= na->num_rx_desc)
+ sj -= na->num_rx_desc;
+ addr = NMB(slot + sj);
+ netmap_load_map(rxr->ptag,
+ rxbuf->pmap, addr, na->buff_size);
+ /* Update descriptor */
+ rxr->rx_base[j].read.pkt_addr =
+ htole64(vtophys(addr));
+ continue;
+ }
+#endif /* DEV_NETMAP */
/*
** Don't allocate mbufs if not
** doing header split, its wasteful
@@ -3913,6 +4016,35 @@ ixgbe_initialize_receive_units(struct adapter *adapter)
/* Setup the HW Rx Head and Tail Descriptor Pointers */
IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
+#ifdef DEV_NETMAP
+ /*
+ * In netmap mode, we must preserve the buffers made
+ * available to userspace before the if_init()
+ * (this is true by default on the TX side, because
+ * init makes all buffers available to userspace).
+ *
+ * netmap_reset() and the device specific routines
+ * (e.g. ixgbe_setup_receive_rings()) map these
+ * buffers at the end of the NIC ring, so here we
+ * must set the RDT (tail) register to make sure
+ * they are not overwritten.
+ *
+ * In this driver the NIC ring starts at RDH = 0,
+ * RDT points to the first 'busy' slot, so RDT = 0
+ * means the whole ring is available, and
+ * RDT = (num_rx_desc - X) means X slots are available.
+ * Computations are done modulo the ring size.
+ */
+ if (ifp->if_capenable & IFCAP_NETMAP) {
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_kring *kring = &na->rx_rings[i];
+ int t = na->num_rx_desc - kring->nr_hwavail;
+
+ if (t >= na->num_rx_desc)
+ t -= adapter->num_rx_desc;
+ IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
+ } else
+#endif /* DEV_NETMAP */
IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
}
@@ -4148,6 +4280,22 @@ ixgbe_rxeof(struct ix_queue *que, int count)
IXGBE_RX_LOCK(rxr);
+#ifdef DEV_NETMAP
+ if (ifp->if_capenable & IFCAP_NETMAP) {
+ /*
+ * Same as the txeof routine, only wakeup clients
+ * and make sure there are no deadlocks.
+ */
+ struct netmap_adapter *na = NA(ifp);
+
+ selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
+ IXGBE_RX_UNLOCK(rxr);
+ IXGBE_CORE_LOCK(adapter);
+ selwakeuppri(&na->rx_rings[na->num_queues + 1].si, PI_NET);
+ IXGBE_CORE_UNLOCK(adapter);
+ return (FALSE);
+ }
+#endif /* DEV_NETMAP */
for (i = rxr->next_to_check; count != 0;) {
struct mbuf *sendmp, *mh, *mp;
u32 rsc, ptype;
diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h
index 0e220e7..681a652 100644
--- a/sys/dev/netmap/if_em_netmap.h
+++ b/sys/dev/netmap/if_em_netmap.h
@@ -9,7 +9,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -25,9 +25,12 @@
/*
* $FreeBSD$
- * $Id: if_em_netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ * $Id: if_em_netmap.h 9802 2011-12-02 18:42:37Z luigi $
*
* netmap changes for if_em.
+ *
+ * For structure and details on the individual functions please see
+ * ixgbe_netmap.h
*/
#include <net/netmap.h>
@@ -58,12 +61,7 @@ em_netmap_attach(struct adapter *adapter)
na.nm_rxsync = em_netmap_rxsync;
na.nm_lock = em_netmap_lock_wrapper;
na.nm_register = em_netmap_reg;
- /*
- * adapter->rx_mbuf_sz is set by SIOCSETMTU, but in netmap mode
- * we allocate the buffers on the first register. So we must
- * disallow a SIOCSETMTU when if_capenable & IFCAP_NETMAP is set.
- */
- na.buff_size = MCLBYTES;
+ na.buff_size = NETMAP_BUF_SIZE;
netmap_attach(&na, adapter->num_queues);
}
@@ -100,6 +98,7 @@ em_netmap_lock_wrapper(void *_a, int what, u_int queueid)
}
+// XXX do we need to block/unblock the tasks ?
static void
em_netmap_block_tasks(struct adapter *adapter)
{
@@ -162,9 +161,6 @@ em_netmap_reg(struct ifnet *ifp, int onoff)
if (onoff) {
ifp->if_capenable |= IFCAP_NETMAP;
- /* save if_transmit for later restore.
- * XXX also if_start and if_qflush ?
- */
na->if_transmit = ifp->if_transmit;
ifp->if_transmit = netmap_start;
@@ -179,15 +175,13 @@ fail:
ifp->if_transmit = na->if_transmit;
ifp->if_capenable &= ~IFCAP_NETMAP;
em_init_locked(adapter); /* also enable intr */
-
}
em_netmap_unblock_tasks(adapter);
return (error);
}
/*
- * Reconcile hardware and user view of the transmit ring, see
- * ixgbe.c for details.
+ * Reconcile hardware and user view of the transmit ring.
*/
static int
em_netmap_txsync(void *a, u_int ring_nr, int do_lock)
@@ -197,13 +191,13 @@ em_netmap_txsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
/* generate an interrupt approximately every half ring */
int report_frequency = kring->nkr_num_slots >> 1;
k = ring->cur;
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ if (k > lim)
return netmap_ring_reinit(kring);
if (do_lock)
@@ -211,35 +205,20 @@ em_netmap_txsync(void *a, u_int ring_nr, int do_lock)
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
- /* record completed transmissions TODO
- *
- * instead of using TDH, we could read the transmitted status bit.
+ /* check for new packets to send.
+ * j indexes the netmap ring, l indexes the nic ring, and
+ * j = kring->nr_hwcur, l = E1000_TDT (not tracked),
+ * j == (l + kring->nkr_hwofs) % ring_size
*/
- j = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
- if (j >= kring->nkr_num_slots) { /* XXX can happen */
- D("TDH wrap %d", j);
- j -= kring->nkr_num_slots;
- }
- int delta = j - txr->next_to_clean;
- if (delta) {
- /* new transmissions were completed, increment
- ring->nr_hwavail. */
- if (delta < 0)
- delta += kring->nkr_num_slots;
- txr->next_to_clean = j;
- kring->nr_hwavail += delta;
- }
-
- /* update avail to what the hardware knows */
- ring->avail = kring->nr_hwavail;
-
j = kring->nr_hwcur;
if (j != k) { /* we have packets to send */
- n = 0;
+ l = j - kring->nkr_hwofs;
+ if (l < 0)
+ l += lim + 1;
while (j != k) {
struct netmap_slot *slot = &ring->slot[j];
- struct e1000_tx_desc *curr = &txr->tx_base[j];
- struct em_buffer *txbuf = &txr->tx_buffers[j];
+ struct e1000_tx_desc *curr = &txr->tx_base[l];
+ struct em_buffer *txbuf = &txr->tx_buffers[l];
int flags = ((slot->flags & NS_REPORT) ||
j == 0 || j == report_frequency) ?
E1000_TXD_CMD_RS : 0;
@@ -254,42 +233,61 @@ em_netmap_txsync(void *a, u_int ring_nr, int do_lock)
slot->flags &= ~NS_REPORT;
curr->upper.data = 0;
curr->lower.data =
- htole32(
- adapter->txd_cmd |
- (E1000_TXD_CMD_EOP | flags) |
- slot->len);
+ htole32(adapter->txd_cmd | len |
+ (E1000_TXD_CMD_EOP | flags) );
if (slot->flags & NS_BUF_CHANGED) {
curr->buffer_addr = htole64(vtophys(addr));
- /* buffer has changed, unload and reload map */
+ /* buffer has changed, reload map */
netmap_reload_map(txr->txtag, txbuf->map,
- addr, na->buff_size);
+ addr, na->buff_size);
slot->flags &= ~NS_BUF_CHANGED;
}
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
- kring->nr_hwcur = ring->cur;
+ kring->nr_hwcur = k;
/* decrease avail by number of sent packets */
- ring->avail -= n;
- kring->nr_hwavail = ring->avail;
+ kring->nr_hwavail -= n;
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
- E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me),
- ring->cur);
+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), l);
}
+
+ if (n == 0 || kring->nr_hwavail < 1) {
+ int delta;
+
+ /* record completed transmissions using THD. */
+ l = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
+ if (l >= kring->nkr_num_slots) { /* XXX can happen */
+ D("TDH wrap %d", l);
+ l -= kring->nkr_num_slots;
+ }
+ delta = l - txr->next_to_clean;
+ if (delta) {
+ /* some completed, increment hwavail. */
+ if (delta < 0)
+ delta += kring->nkr_num_slots;
+ txr->next_to_clean = l;
+ kring->nr_hwavail += delta;
+ }
+ }
+ /* update avail to what the hardware knows */
+ ring->avail = kring->nr_hwavail;
+
if (do_lock)
EM_TX_UNLOCK(txr);
return 0;
}
/*
- * Reconcile kernel and user view of the receive ring, see ixgbe.c
+ * Reconcile kernel and user view of the receive ring.
*/
static int
em_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
@@ -299,10 +297,10 @@ em_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n, lim = kring->nkr_num_slots - 1;
k = ring->cur;
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ if (k > lim)
return netmap_ring_reinit(kring);
if (do_lock)
@@ -311,36 +309,52 @@ em_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
- /* acknowledge all the received packets. */
- j = rxr->next_to_check;
+ /* import newly received packets into the netmap ring.
+ * j is an index in the netmap ring, l in the NIC ring, and
+ * j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size
+ * l = rxr->next_to_check;
+ * and
+ * j == (l + kring->nkr_hwofs) % ring_size
+ */
+ l = rxr->next_to_check;
+ j = l + kring->nkr_hwofs;
+ /* here nkr_hwofs can be negative so must check for j < 0 */
+ if (j < 0)
+ j += lim + 1;
+ else if (j > lim)
+ j -= lim + 1;
for (n = 0; ; n++) {
- struct e1000_rx_desc *curr = &rxr->rx_base[j];
+ struct e1000_rx_desc *curr = &rxr->rx_base[l];
if ((curr->status & E1000_RXD_STAT_DD) == 0)
break;
ring->slot[j].len = le16toh(curr->length);
- bus_dmamap_sync(rxr->tag, rxr->rx_buffers[j].map,
+ bus_dmamap_sync(rxr->tag, rxr->rx_buffers[l].map,
BUS_DMASYNC_POSTREAD);
j = (j == lim) ? 0 : j + 1;
+ /* make sure next_to_refresh follows next_to_check */
+ rxr->next_to_refresh = l; // XXX
+ l = (l == lim) ? 0 : l + 1;
}
if (n) {
- rxr->next_to_check = j;
+ rxr->next_to_check = l;
kring->nr_hwavail += n;
}
- /* skip past packets that userspace has already processed:
- * making them available for reception.
- * advance nr_hwcur and issue a bus_dmamap_sync on the
- * buffers so it is safe to write to them.
- * Also increase nr_hwavail
- */
+ /* skip past packets that userspace has already processed */
j = kring->nr_hwcur;
if (j != k) { /* userspace has read some packets. */
n = 0;
+ l = j - kring->nkr_hwofs; /* NIC ring index */
+ /* here nkr_hwofs can be negative so check for l > lim */
+ if (l < 0)
+ l += lim + 1;
+ else if (l > lim)
+ l -= lim + 1;
while (j != k) {
struct netmap_slot *slot = &ring->slot[j];
- struct e1000_rx_desc *curr = &rxr->rx_base[j];
- struct em_buffer *rxbuf = &rxr->rx_buffers[j];
+ struct e1000_rx_desc *curr = &rxr->rx_base[l];
+ struct em_buffer *rxbuf = &rxr->rx_buffers[l];
void *addr = NMB(slot);
if (addr == netmap_buffer_base) { /* bad buf */
@@ -352,28 +366,29 @@ em_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
curr->status = 0;
if (slot->flags & NS_BUF_CHANGED) {
curr->buffer_addr = htole64(vtophys(addr));
- /* buffer has changed, unload and reload map */
+ /* buffer has changed, reload map */
netmap_reload_map(rxr->rxtag, rxbuf->map,
- addr, na->buff_size);
+ addr, na->buff_size);
slot->flags &= ~NS_BUF_CHANGED;
}
bus_dmamap_sync(rxr->rxtag, rxbuf->map,
- BUS_DMASYNC_PREREAD);
+ BUS_DMASYNC_PREREAD);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
kring->nr_hwavail -= n;
- kring->nr_hwcur = ring->cur;
+ kring->nr_hwcur = k;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* IMPORTANT: we must leave one free slot in the ring,
- * so move j back by one unit
+ * so move l back by one unit
*/
- j = (j == 0) ? lim : j - 1;
- E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), j);
+ l = (l == 0) ? lim : l - 1;
+ E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l);
}
/* tell userspace that there are new packets */
ring->avail = kring->nr_hwavail ;
diff --git a/sys/dev/netmap/if_igb_netmap.h b/sys/dev/netmap/if_igb_netmap.h
index 0c14706..c048ec4 100644
--- a/sys/dev/netmap/if_igb_netmap.h
+++ b/sys/dev/netmap/if_igb_netmap.h
@@ -25,7 +25,7 @@
/*
* $FreeBSD$
- * $Id: if_igb_netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ * $Id: if_igb_netmap.h 9802 2011-12-02 18:42:37Z luigi $
*
* netmap modifications for igb
* contribured by Ahmed Kooli
@@ -58,12 +58,7 @@ igb_netmap_attach(struct adapter *adapter)
na.nm_rxsync = igb_netmap_rxsync;
na.nm_lock = igb_netmap_lock_wrapper;
na.nm_register = igb_netmap_reg;
- /*
- * adapter->rx_mbuf_sz is set by SIOCSETMTU, but in netmap mode
- * we allocate the buffers on the first register. So we must
- * disallow a SIOCSETMTU when if_capenable & IFCAP_NETMAP is set.
- */
- na.buff_size = MCLBYTES;
+ na.buff_size = NETMAP_BUF_SIZE;
netmap_attach(&na, adapter->num_queues);
}
@@ -111,7 +106,7 @@ igb_netmap_reg(struct ifnet *ifp, int onoff)
struct netmap_adapter *na = NA(ifp);
int error = 0;
- if (!na)
+ if (na == NULL)
return EINVAL;
igb_disable_intr(adapter);
@@ -144,21 +139,6 @@ fail:
/*
* Reconcile kernel and user view of the transmit ring.
- *
- * Userspace has filled tx slots up to cur (excluded).
- * The last unused slot previously known to the kernel was nr_hwcur,
- * and the last interrupt reported nr_hwavail slots available
- * (using the special value -1 to indicate idle transmit ring).
- * The function must first update avail to what the kernel
- * knows, subtract the newly used slots (cur - nr_hwcur)
- * from both avail and nr_hwavail, and set nr_hwcur = cur
- * issuing a dmamap_sync on all slots.
- *
- * Check parameters in the struct netmap_ring.
- * We don't use avail, only check for bogus values.
- * Make sure cur is valid, and same goes for buffer indexes and lengths.
- * To avoid races, read the values once, and never use those from
- * the ring afterwards.
*/
static int
igb_netmap_txsync(void *a, u_int ring_nr, int do_lock)
@@ -168,54 +148,40 @@ igb_netmap_txsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
/* generate an interrupt approximately every half ring */
int report_frequency = kring->nkr_num_slots >> 1;
- k = ring->cur; /* ring is not protected by any lock */
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ k = ring->cur;
+ if (k > lim)
return netmap_ring_reinit(kring);
if (do_lock)
IGB_TX_LOCK(txr);
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
- BUS_DMASYNC_POSTREAD);
-
- /* record completed transmissions. TODO
- *
- * Instead of reading from the TDH register, we could and try to check
- * the status bit of descriptor packets.
- */
- j = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
- if (j >= kring->nkr_num_slots) /* XXX can it happen ? */
- j -= kring->nkr_num_slots;
- int delta = j - txr->next_to_clean;
- if (delta) {
- /* new tx were completed */
- if (delta < 0)
- delta += kring->nkr_num_slots;
- txr->next_to_clean = j;
- kring->nr_hwavail += delta;
- }
+ BUS_DMASYNC_POSTREAD);
/* update avail to what the hardware knows */
ring->avail = kring->nr_hwavail;
- j = kring->nr_hwcur;
+ j = kring->nr_hwcur; /* netmap ring index */
if (j != k) { /* we have new packets to send */
u32 olinfo_status = 0;
- n = 0;
+ int n = 0;
+ l = j - kring->nkr_hwofs; /* NIC ring index */
+ if (l < 0)
+ l += lim + 1;
/* 82575 needs the queue index added */
if (adapter->hw.mac.type == e1000_82575)
olinfo_status |= txr->me << 4;
while (j != k) {
struct netmap_slot *slot = &ring->slot[j];
- struct igb_tx_buffer *txbuf = &txr->tx_buffers[j];
+ struct igb_tx_buffer *txbuf = &txr->tx_buffers[l];
union e1000_adv_tx_desc *curr =
- (union e1000_adv_tx_desc *)&txr->tx_base[j];
+ (union e1000_adv_tx_desc *)&txr->tx_base[l];
void *addr = NMB(slot);
int flags = ((slot->flags & NS_REPORT) ||
j == 0 || j == report_frequency) ?
@@ -229,6 +195,7 @@ igb_netmap_txsync(void *a, u_int ring_nr, int do_lock)
}
slot->flags &= ~NS_REPORT;
+ // XXX do we need to set the address ?
curr->read.buffer_addr = htole64(vtophys(addr));
curr->read.olinfo_status =
htole32(olinfo_status |
@@ -239,7 +206,7 @@ igb_netmap_txsync(void *a, u_int ring_nr, int do_lock)
E1000_ADVTXD_DCMD_DEXT |
E1000_ADVTXD_DCMD_EOP | flags);
if (slot->flags & NS_BUF_CHANGED) {
- /* buffer has changed, unload and reload map */
+ /* buffer has changed, reload map */
netmap_reload_map(txr->txtag, txbuf->map,
addr, na->buff_size);
slot->flags &= ~NS_BUF_CHANGED;
@@ -248,22 +215,40 @@ igb_netmap_txsync(void *a, u_int ring_nr, int do_lock)
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
kring->nr_hwcur = k;
/* decrease avail by number of sent packets */
- ring->avail -= n;
- kring->nr_hwavail = ring->avail;
+ kring->nr_hwavail -= n;
+ ring->avail = kring->nr_hwavail;
- /* Set the watchdog */
+ /* Set the watchdog XXX ? */
txr->queue_status = IGB_QUEUE_WORKING;
txr->watchdog_time = ticks;
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
- E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), k);
+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), l);
+ }
+ if (n == 0 || kring->nr_hwavail < 1) {
+ int delta;
+
+ /* record completed transmission using TDH */
+ l = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
+ if (l >= kring->nkr_num_slots) /* XXX can it happen ? */
+ l -= kring->nkr_num_slots;
+ delta = l - txr->next_to_clean;
+ if (delta) {
+ /* new tx were completed */
+ if (delta < 0)
+ delta += kring->nkr_num_slots;
+ txr->next_to_clean = l;
+ kring->nr_hwavail += delta;
+ ring->avail = kring->nr_hwavail;
+ }
}
if (do_lock)
IGB_TX_UNLOCK(txr);
@@ -273,15 +258,6 @@ igb_netmap_txsync(void *a, u_int ring_nr, int do_lock)
/*
* Reconcile kernel and user view of the receive ring.
- *
- * Userspace has read rx slots up to cur (excluded).
- * The last unread slot previously known to the kernel was nr_hwcur,
- * and the last interrupt reported nr_hwavail slots available.
- * We must subtract the newly consumed slots (cur - nr_hwcur)
- * from nr_hwavail, clearing the descriptors for the next
- * read, tell the hardware that they are available,
- * and set nr_hwcur = cur and avail = nr_hwavail.
- * issuing a dmamap_sync on all slots.
*/
static int
igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
@@ -291,10 +267,10 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n, lim = kring->nkr_num_slots - 1;
- k = ring->cur; /* ring is not protected by any lock */
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ k = ring->cur;
+ if (k > lim)
return netmap_ring_reinit(kring);
if (do_lock)
@@ -304,9 +280,12 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
- j = rxr->next_to_check;
+ l = rxr->next_to_check;
+ j = l + kring->nkr_hwofs;
+ if (j > lim)
+ j -= lim + 1;
for (n = 0; ; n++) {
- union e1000_adv_rx_desc *curr = &rxr->rx_base[j];
+ union e1000_adv_rx_desc *curr = &rxr->rx_base[l];
uint32_t staterr = le32toh(curr->wb.upper.status_error);
if ((staterr & E1000_RXD_STAT_DD) == 0)
@@ -314,15 +293,13 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
ring->slot[j].len = le16toh(curr->wb.upper.length);
bus_dmamap_sync(rxr->ptag,
- rxr->rx_buffers[j].pmap, BUS_DMASYNC_POSTREAD);
+ rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
}
if (n) {
- rxr->next_to_check = j;
+ rxr->next_to_check = l;
kring->nr_hwavail += n;
- if (kring->nr_hwavail >= lim - 10) {
- ND("rx ring %d almost full %d", ring_nr, kring->nr_hwavail);
- }
}
/* skip past packets that userspace has already processed,
@@ -332,12 +309,15 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
* Also increase nr_hwavail
*/
j = kring->nr_hwcur;
+ l = kring->nr_hwcur - kring->nkr_hwofs;
+ if (l < 0)
+ l += lim + 1;
if (j != k) { /* userspace has read some packets. */
n = 0;
while (j != k) {
struct netmap_slot *slot = ring->slot + j;
- union e1000_adv_rx_desc *curr = &rxr->rx_base[j];
- struct igb_rx_buf *rxbuf = rxr->rx_buffers + j;
+ union e1000_adv_rx_desc *curr = &rxr->rx_base[l];
+ struct igb_rx_buf *rxbuf = rxr->rx_buffers + l;
void *addr = NMB(slot);
if (addr == netmap_buffer_base) { /* bad buf */
@@ -358,6 +338,7 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
BUS_DMASYNC_PREREAD);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
kring->nr_hwavail -= n;
@@ -365,10 +346,10 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* IMPORTANT: we must leave one free slot in the ring,
- * so move j back by one unit
+ * so move l back by one unit
*/
- j = (j == 0) ? lim : j - 1;
- E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), j);
+ l = (l == 0) ? lim : l - 1;
+ E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l);
}
/* tell userspace that there are new packets */
ring->avail = kring->nr_hwavail ;
diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h
index a8f3498..ae64cd6 100644
--- a/sys/dev/netmap/if_lem_netmap.h
+++ b/sys/dev/netmap/if_lem_netmap.h
@@ -25,9 +25,12 @@
/*
* $FreeBSD$
- * $Id: if_lem_netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ * $Id: if_lem_netmap.h 9802 2011-12-02 18:42:37Z luigi $
*
* netmap support for if_lem.c
+ *
+ * For structure and details on the individual functions please see
+ * ixgbe_netmap.h
*/
#include <net/netmap.h>
@@ -59,7 +62,7 @@ lem_netmap_attach(struct adapter *adapter)
na.nm_rxsync = lem_netmap_rxsync;
na.nm_lock = lem_netmap_lock_wrapper;
na.nm_register = lem_netmap_reg;
- na.buff_size = MCLBYTES;
+ na.buff_size = NETMAP_BUF_SIZE;
netmap_attach(&na, 1);
}
@@ -94,7 +97,61 @@ lem_netmap_lock_wrapper(void *_a, int what, u_int ringid)
/*
- * Reconcile kernel and user view of the transmit ring. see ixgbe.c
+ * Register/unregister routine
+ */
+static int
+lem_netmap_reg(struct ifnet *ifp, int onoff)
+{
+ struct adapter *adapter = ifp->if_softc;
+ struct netmap_adapter *na = NA(ifp);
+ int error = 0;
+
+ if (na == NULL)
+ return EINVAL;
+
+ lem_disable_intr(adapter);
+
+ /* Tell the stack that the interface is no longer active */
+ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+
+ /* lem_netmap_block_tasks(adapter); */
+#ifndef EM_LEGACY_IRQ // XXX do we need this ?
+ taskqueue_block(adapter->tq);
+ taskqueue_drain(adapter->tq, &adapter->rxtx_task);
+ taskqueue_drain(adapter->tq, &adapter->link_task);
+#endif /* !EM_LEGCY_IRQ */
+ if (onoff) {
+ ifp->if_capenable |= IFCAP_NETMAP;
+
+ /* save if_transmit to restore it when exiting.
+ * XXX what about if_start and if_qflush ?
+ */
+ na->if_transmit = ifp->if_transmit;
+ ifp->if_transmit = netmap_start;
+
+ lem_init_locked(adapter);
+ if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
+ error = ENOMEM;
+ goto fail;
+ }
+ } else {
+fail:
+ /* restore non-netmap mode */
+ ifp->if_transmit = na->if_transmit;
+ ifp->if_capenable &= ~IFCAP_NETMAP;
+ lem_init_locked(adapter); /* also enables intr */
+ }
+
+#ifndef EM_LEGACY_IRQ
+ taskqueue_unblock(adapter->tq); // XXX do we need this ?
+#endif /* !EM_LEGCY_IRQ */
+
+ return (error);
+}
+
+
+/*
+ * Reconcile kernel and user view of the transmit ring.
*/
static int
lem_netmap_txsync(void *a, u_int ring_nr, int do_lock)
@@ -103,13 +160,13 @@ lem_netmap_txsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->tx_rings[0];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
/* generate an interrupt approximately every half ring */
int report_frequency = kring->nkr_num_slots >> 1;
k = ring->cur;
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ if (k > lim)
return netmap_ring_reinit(kring);
if (do_lock)
@@ -117,33 +174,18 @@ lem_netmap_txsync(void *a, u_int ring_nr, int do_lock)
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
- /* record completed transmissions TODO
- *
- * instead of using TDH, we could read the transmitted status bit.
- */
- j = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
- if (j >= kring->nkr_num_slots) { /* can it happen ? */
- D("bad TDH %d", j);
- j -= kring->nkr_num_slots;
- }
- int delta = j - adapter->next_tx_to_clean;
- if (delta) {
- if (delta < 0)
- delta += kring->nkr_num_slots;
- adapter->next_tx_to_clean = j;
- kring->nr_hwavail += delta;
- }
-
/* update avail to what the hardware knows */
ring->avail = kring->nr_hwavail;
- j = kring->nr_hwcur;
+ j = kring->nr_hwcur; /* points into the netmap ring */
if (j != k) { /* we have new packets to send */
- n = 0;
+ l = j - kring->nkr_hwofs; /* points into the NIC ring */
+ if (l < 0)
+ l += lim + 1;
while (j != k) {
struct netmap_slot *slot = &ring->slot[j];
- struct e1000_tx_desc *curr = &adapter->tx_desc_base[j];
- struct em_buffer *txbuf = &adapter->tx_buffer_area[j];
+ struct e1000_tx_desc *curr = &adapter->tx_desc_base[l];
+ struct em_buffer *txbuf = &adapter->tx_buffer_area[l];
void *addr = NMB(slot);
int flags = ((slot->flags & NS_REPORT) ||
j == 0 || j == report_frequency) ?
@@ -156,34 +198,54 @@ lem_netmap_txsync(void *a, u_int ring_nr, int do_lock)
return netmap_ring_reinit(kring);
}
+ slot->flags &= ~NS_REPORT;
curr->upper.data = 0;
- /* always interrupt. XXX make it conditional */
curr->lower.data =
htole32( adapter->txd_cmd | len |
(E1000_TXD_CMD_EOP | flags) );
if (slot->flags & NS_BUF_CHANGED) {
curr->buffer_addr = htole64(vtophys(addr));
- /* buffer has changed, unload and reload map */
+ /* buffer has changed, reload map */
netmap_reload_map(adapter->txtag, txbuf->map,
- addr, na->buff_size);
+ addr, na->buff_size);
slot->flags &= ~NS_BUF_CHANGED;
}
bus_dmamap_sync(adapter->txtag, txbuf->map,
- BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREWRITE);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
- kring->nr_hwcur = ring->cur;
+ kring->nr_hwcur = k;
/* decrease avail by number of sent packets */
- ring->avail -= n;
- kring->nr_hwavail = ring->avail;
+ kring->nr_hwavail -= n;
+ ring->avail = kring->nr_hwavail;
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
- E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), ring->cur);
+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), l);
+ }
+
+ if (n == 0 || kring->nr_hwavail < 1) {
+ int delta;
+
+ /* record completed transmissions using TDH */
+ l = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
+ if (l >= kring->nkr_num_slots) { /* can it happen ? */
+ D("bad TDH %d", l);
+ l -= kring->nkr_num_slots;
+ }
+ delta = l - adapter->next_tx_to_clean;
+ if (delta) {
+ if (delta < 0)
+ delta += kring->nkr_num_slots;
+ adapter->next_tx_to_clean = l;
+ kring->nr_hwavail += delta;
+ ring->avail = kring->nr_hwavail;
+ }
}
if (do_lock)
EM_TX_UNLOCK(adapter);
@@ -192,7 +254,7 @@ lem_netmap_txsync(void *a, u_int ring_nr, int do_lock)
/*
- * Reconcile kernel and user view of the receive ring. see ixgbe.c
+ * Reconcile kernel and user view of the receive ring.
*/
static int
lem_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
@@ -201,10 +263,10 @@ lem_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->rx_rings[0];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n, lim = kring->nkr_num_slots - 1;
k = ring->cur;
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ if (k > lim)
return netmap_ring_reinit(kring);
if (do_lock)
@@ -213,40 +275,45 @@ lem_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
- /* acknowldge all the received packets. */
- j = adapter->next_rx_desc_to_check;
+ /* import newly received packets into the netmap ring */
+ l = adapter->next_rx_desc_to_check; /* points into the NIC ring */
+ j = l + kring->nkr_hwofs; /* points into the netmap ring */
+ if (j > lim)
+ j -= lim + 1;
for (n = 0; ; n++) {
- struct e1000_rx_desc *curr = &adapter->rx_desc_base[j];
- int len = le16toh(adapter->rx_desc_base[j].length) - 4; // CRC
+ struct e1000_rx_desc *curr = &adapter->rx_desc_base[l];
+ int len;
if ((curr->status & E1000_RXD_STAT_DD) == 0)
break;
+ len = le16toh(curr->length) - 4; // CRC
if (len < 0) {
D("bogus pkt size at %d", j);
len = 0;
}
ring->slot[j].len = len;
- bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[j].map,
- BUS_DMASYNC_POSTREAD);
+ bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[l].map,
+ BUS_DMASYNC_POSTREAD);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
}
if (n) {
- adapter->next_rx_desc_to_check = j;
+ adapter->next_rx_desc_to_check = l;
kring->nr_hwavail += n;
}
- /* skip past packets that userspace has already processed,
- * making them available for reception. We don't need to set
- * the length as it is the same for all slots.
- */
- j = kring->nr_hwcur;
+ /* skip past packets that userspace has already processed */
+ j = kring->nr_hwcur; /* netmap ring index */
if (j != k) { /* userspace has read some packets. */
n = 0;
+ l = j - kring->nkr_hwofs; /* NIC ring index */
+ if (l < 0)
+ l += lim + 1;
while (j != k) {
struct netmap_slot *slot = &ring->slot[j];
- struct e1000_rx_desc *curr = &adapter->rx_desc_base[j];
- struct em_buffer *rxbuf = &adapter->rx_buffer_area[j];
+ struct e1000_rx_desc *curr = &adapter->rx_desc_base[l];
+ struct em_buffer *rxbuf = &adapter->rx_buffer_area[l];
void *addr = NMB(slot);
if (addr == netmap_buffer_base) { /* bad buf */
@@ -254,32 +321,32 @@ lem_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
EM_RX_UNLOCK(adapter);
return netmap_ring_reinit(kring);
}
- curr = &adapter->rx_desc_base[j];
curr->status = 0;
if (slot->flags & NS_BUF_CHANGED) {
curr->buffer_addr = htole64(vtophys(addr));
- /* buffer has changed, unload and reload map */
+ /* buffer has changed, and reload map */
netmap_reload_map(adapter->rxtag, rxbuf->map,
- addr, na->buff_size);
+ addr, na->buff_size);
slot->flags &= ~NS_BUF_CHANGED;
}
bus_dmamap_sync(adapter->rxtag, rxbuf->map,
- BUS_DMASYNC_PREREAD);
+ BUS_DMASYNC_PREREAD);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
kring->nr_hwavail -= n;
- kring->nr_hwcur = ring->cur;
+ kring->nr_hwcur = k;
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* IMPORTANT: we must leave one free slot in the ring,
- * so move j back by one unit
+ * so move l back by one unit
*/
- j = (j == 0) ? lim : j - 1;
- E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), j);
+ l = (l == 0) ? lim : l - 1;
+ E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), l);
}
/* tell userspace that there are new packets */
@@ -290,55 +357,3 @@ lem_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
}
-/*
- * Register/unregister routine
- */
-static int
-lem_netmap_reg(struct ifnet *ifp, int onoff)
-{
- struct adapter *adapter = ifp->if_softc;
- struct netmap_adapter *na = NA(ifp);
- int error = 0;
-
- if (!na)
- return EINVAL;
-
- lem_disable_intr(adapter);
-
- /* Tell the stack that the interface is no longer active */
- ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
-
- /* lem_netmap_block_tasks(adapter); */
-#ifndef EM_LEGACY_IRQ
- taskqueue_block(adapter->tq);
- taskqueue_drain(adapter->tq, &adapter->rxtx_task);
- taskqueue_drain(adapter->tq, &adapter->link_task);
-#endif /* !EM_LEGCY_IRQ */
- if (onoff) {
- ifp->if_capenable |= IFCAP_NETMAP;
-
- /* save if_transmit to restore it when exiting.
- * XXX what about if_start and if_qflush ?
- */
- na->if_transmit = ifp->if_transmit;
- ifp->if_transmit = netmap_start;
-
- lem_init_locked(adapter);
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
- error = ENOMEM;
- goto fail;
- }
- } else {
-fail:
- /* restore non-netmap mode */
- ifp->if_transmit = na->if_transmit;
- ifp->if_capenable &= ~IFCAP_NETMAP;
- lem_init_locked(adapter); /* also enables intr */
- }
-
-#ifndef EM_LEGACY_IRQ
- taskqueue_unblock(adapter->tq);
-#endif /* !EM_LEGCY_IRQ */
-
- return (error);
-}
diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h
index efccf3a..105660c 100644
--- a/sys/dev/netmap/if_re_netmap.h
+++ b/sys/dev/netmap/if_re_netmap.h
@@ -25,7 +25,7 @@
/*
* $FreeBSD$
- * $Id: if_re_netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ * $Id: if_re_netmap.h 9802 2011-12-02 18:42:37Z luigi $
*
* netmap support for if_re
*/
@@ -56,7 +56,7 @@ re_netmap_attach(struct rl_softc *sc)
na.nm_rxsync = re_netmap_rxsync;
na.nm_lock = re_netmap_lock_wrapper;
na.nm_register = re_netmap_reg;
- na.buff_size = MCLBYTES;
+ na.buff_size = NETMAP_BUF_SIZE;
netmap_attach(&na, 1);
}
@@ -99,7 +99,7 @@ re_netmap_reg(struct ifnet *ifp, int onoff)
struct netmap_adapter *na = NA(ifp);
int error = 0;
- if (!na)
+ if (na == NULL)
return EINVAL;
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
@@ -109,9 +109,8 @@ re_netmap_reg(struct ifnet *ifp, int onoff)
if (onoff) {
ifp->if_capenable |= IFCAP_NETMAP;
- /* save if_transmit and restore it */
+ /* save if_transmit to restore it later */
na->if_transmit = ifp->if_transmit;
- /* XXX if_start and if_qflush ??? */
ifp->if_transmit = netmap_start;
re_init_locked(adapter);
@@ -127,23 +126,12 @@ fail:
ifp->if_capenable &= ~IFCAP_NETMAP;
re_init_locked(adapter); /* also enables intr */
}
- return (error);
-
+ return (error);
}
/*
* Reconcile kernel and user view of the transmit ring.
- *
- * Userspace has filled tx slots up to cur (excluded).
- * The last unused slot previously known to the kernel was nr_hwcur,
- * and the last interrupt reported nr_hwavail slots available
- * (using the special value -1 to indicate idle transmit ring).
- * The function must first update avail to what the kernel
- * knows (translating the -1 to nkr_num_slots - 1),
- * subtract the newly used slots (cur - nr_hwcur)
- * from both avail and nr_hwavail, and set nr_hwcur = cur
- * issuing a dmamap_sync on all slots.
*/
static int
re_netmap_txsync(void *a, u_int ring_nr, int do_lock)
@@ -153,10 +141,10 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(sc->rl_ifp);
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n, lim = kring->nkr_num_slots - 1;
k = ring->cur;
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ if (k > lim)
return netmap_ring_reinit(kring);
if (do_lock)
@@ -167,17 +155,18 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock)
sc->rl_ldata.rl_tx_list_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+ /* XXX move after the transmissions */
/* record completed transmissions */
- for (n = 0, j = sc->rl_ldata.rl_tx_considx;
- j != sc->rl_ldata.rl_tx_prodidx;
- n++, j = RL_TX_DESC_NXT(sc, j)) {
+ for (n = 0, l = sc->rl_ldata.rl_tx_considx;
+ l != sc->rl_ldata.rl_tx_prodidx;
+ n++, l = RL_TX_DESC_NXT(sc, l)) {
uint32_t cmdstat =
- le32toh(sc->rl_ldata.rl_tx_list[j].rl_cmdstat);
+ le32toh(sc->rl_ldata.rl_tx_list[l].rl_cmdstat);
if (cmdstat & RL_TDESC_STAT_OWN)
break;
}
if (n > 0) {
- sc->rl_ldata.rl_tx_considx = j;
+ sc->rl_ldata.rl_tx_considx = l;
sc->rl_ldata.rl_tx_free += n;
kring->nr_hwavail += n;
}
@@ -185,13 +174,13 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock)
/* update avail to what the hardware knows */
ring->avail = kring->nr_hwavail;
- /* we trust prodidx, not hwcur */
- j = kring->nr_hwcur = sc->rl_ldata.rl_tx_prodidx;
+ j = kring->nr_hwcur;
if (j != k) { /* we have new packets to send */
n = 0;
+ l = sc->rl_ldata.rl_tx_prodidx;
while (j != k) {
struct netmap_slot *slot = &ring->slot[j];
- struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[j];
+ struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[l];
int cmd = slot->len | RL_TDESC_CMD_EOF |
RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF ;
void *addr = NMB(slot);
@@ -200,10 +189,11 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock)
if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
if (do_lock)
RL_UNLOCK(sc);
+ // XXX what about prodidx ?
return netmap_ring_reinit(kring);
}
- if (j == lim) /* mark end of ring */
+ if (l == lim) /* mark end of ring */
cmd |= RL_TDESC_CMD_EOR;
if (slot->flags & NS_BUF_CHANGED) {
@@ -212,17 +202,19 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock)
desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
/* buffer has changed, unload and reload map */
netmap_reload_map(sc->rl_ldata.rl_tx_mtag,
- txd[j].tx_dmamap, addr, na->buff_size);
+ txd[l].tx_dmamap, addr, na->buff_size);
slot->flags &= ~NS_BUF_CHANGED;
}
slot->flags &= ~NS_REPORT;
desc->rl_cmdstat = htole32(cmd);
bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
- txd[j].tx_dmamap, BUS_DMASYNC_PREWRITE);
+ txd[l].tx_dmamap, BUS_DMASYNC_PREWRITE);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
- sc->rl_ldata.rl_tx_prodidx = kring->nr_hwcur = ring->cur;
+ sc->rl_ldata.rl_tx_prodidx = l;
+ kring->nr_hwcur = k;
/* decrease avail by number of sent packets */
ring->avail -= n;
@@ -243,15 +235,6 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock)
/*
* Reconcile kernel and user view of the receive ring.
- *
- * Userspace has read rx slots up to cur (excluded).
- * The last unread slot previously known to the kernel was nr_hwcur,
- * and the last interrupt reported nr_hwavail slots available.
- * We must subtract the newly consumed slots (cur - nr_hwcur)
- * from nr_hwavail, clearing the descriptors for the next
- * read, tell the hardware that they are available,
- * and set nr_hwcur = cur and avail = nr_hwavail.
- * issuing a dmamap_sync on all slots.
*/
static int
re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
@@ -261,10 +244,10 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(sc->rl_ifp);
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n, lim = kring->nkr_num_slots - 1;
k = ring->cur;
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ if (k > lim)
return netmap_ring_reinit(kring);
if (do_lock)
@@ -280,9 +263,10 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
* cleared (all buffers could have it cleared. The easiest one
* is to limit the amount of data reported up to 'lim'
*/
- j = sc->rl_ldata.rl_rx_prodidx;
+ l = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */
+ j = l + kring->nkr_hwofs;
for (n = kring->nr_hwavail; n < lim ; n++) {
- struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[j];
+ struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[l];
uint32_t rxstat = le32toh(cur_rx->rl_cmdstat);
uint32_t total_len;
@@ -294,11 +278,12 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
kring->ring->slot[j].len = total_len;
/* sync was in re_newbuf() */
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
- rxd[j].rx_dmamap, BUS_DMASYNC_POSTREAD);
- j = RL_RX_DESC_NXT(sc, j);
+ rxd[l].rx_dmamap, BUS_DMASYNC_POSTREAD);
+ j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
}
if (n != kring->nr_hwavail) {
- sc->rl_ldata.rl_rx_prodidx = j;
+ sc->rl_ldata.rl_rx_prodidx = l;
sc->rl_ifp->if_ipackets += n - kring->nr_hwavail;
kring->nr_hwavail = n;
}
@@ -312,9 +297,12 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
j = kring->nr_hwcur;
if (j != k) { /* userspace has read some packets. */
n = 0;
+ l = kring->nr_hwcur - kring->nkr_hwofs;
+ if (l < 0)
+ l += lim + 1;
while (j != k) {
struct netmap_slot *slot = ring->slot + j;
- struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[j];
+ struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[l];
int cmd = na->buff_size | RL_RDESC_CMD_OWN;
void *addr = NMB(slot);
@@ -324,7 +312,7 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
return netmap_ring_reinit(kring);
}
- if (j == lim) /* mark end of ring */
+ if (l == lim) /* mark end of ring */
cmd |= RL_RDESC_CMD_EOR;
desc->rl_cmdstat = htole32(cmd);
@@ -334,12 +322,13 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
- rxd[j].rx_dmamap, addr, na->buff_size);
+ rxd[l].rx_dmamap, addr, na->buff_size);
slot->flags &= ~NS_BUF_CHANGED;
}
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
- rxd[j].rx_dmamap, BUS_DMASYNC_PREREAD);
+ rxd[l].rx_dmamap, BUS_DMASYNC_PREREAD);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
kring->nr_hwavail -= n;
@@ -351,18 +340,22 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
}
/* tell userspace that there are new packets */
- ring->avail = kring->nr_hwavail ;
+ ring->avail = kring->nr_hwavail;
if (do_lock)
RL_UNLOCK(sc);
return 0;
}
+/*
+ * Additional routines to init the tx and rx rings.
+ * In other drivers we do that inline in the main code.
+ */
static void
re_netmap_tx_init(struct rl_softc *sc)
{
struct rl_txdesc *txd;
struct rl_desc *desc;
- int i;
+ int i, n;
struct netmap_adapter *na = NA(sc->rl_ifp);
struct netmap_slot *slot = netmap_reset(na, NR_TX, 0, 0);
@@ -372,11 +365,20 @@ re_netmap_tx_init(struct rl_softc *sc)
/* in netmap mode, overwrite addresses and maps */
txd = sc->rl_ldata.rl_tx_desc;
desc = sc->rl_ldata.rl_tx_list;
+ n = sc->rl_ldata.rl_tx_desc_cnt;
+
+ /* l points in the netmap ring, i points in the NIC ring */
+ for (i = 0; i < n; i++) {
+ void *addr;
+ uint64_t paddr;
+ struct netmap_kring *kring = &na->tx_rings[0];
+ int l = i + kring->nkr_hwofs;
- for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) {
- void *addr = NMB(slot+i);
- uint64_t paddr = vtophys(addr);
+ if (l >= n)
+ l -= n;
+ addr = NMB(slot + l);
+ paddr = vtophys(addr);
desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc[i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
netmap_load_map(sc->rl_ldata.rl_tx_mtag,
@@ -387,26 +389,39 @@ re_netmap_tx_init(struct rl_softc *sc)
static void
re_netmap_rx_init(struct rl_softc *sc)
{
- /* slot is NULL if we are not in netmap mode */
struct netmap_adapter *na = NA(sc->rl_ifp);
struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0);
struct rl_desc *desc = sc->rl_ldata.rl_rx_list;
uint32_t cmdstat;
- int i;
+ int i, n;
if (!slot)
return;
-
- for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) {
- void *addr = NMB(slot+i);
- uint64_t paddr = vtophys(addr);
-
+ n = sc->rl_ldata.rl_rx_desc_cnt;
+ for (i = 0; i < n; i++) {
+ void *addr;
+ uint64_t paddr;
+ struct netmap_kring *kring = &na->rx_rings[0];
+ int l = i + kring->nkr_hwofs;
+
+ if (l >= n)
+ l -= n;
+
+ addr = NMB(slot + l);
+ paddr = vtophys(addr);
desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc[i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
- cmdstat = slot[i].len = na->buff_size; // XXX
- if (i == sc->rl_ldata.rl_rx_desc_cnt - 1)
+ cmdstat = na->buff_size;
+ if (i == n - 1)
cmdstat |= RL_RDESC_CMD_EOR;
- desc[i].rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN);
+ /*
+ * userspace knows that hwavail packets were ready before the
+ * reset, so we need to tell the NIC that last hwavail
+ * descriptors of the ring are still owned by the driver.
+ */
+ if (i < n - 1 - kring->nr_hwavail) // XXX + 1 ?
+ cmdstat |= RL_RDESC_CMD_OWN;
+ desc[i].rl_cmdstat = htole32(cmdstat);
netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
sc->rl_ldata.rl_rx_desc[i].rx_dmamap,
diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h
index a4d5491..6c8b2b6 100644
--- a/sys/dev/netmap/ixgbe_netmap.h
+++ b/sys/dev/netmap/ixgbe_netmap.h
@@ -25,25 +25,48 @@
/*
* $FreeBSD$
- * $Id: ixgbe_netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ * $Id: ixgbe_netmap.h 9802 2011-12-02 18:42:37Z luigi $
*
* netmap modifications for ixgbe
+ *
+ * This file is meant to be a reference on how to implement
+ * netmap support for a network driver.
+ * This file contains code but only static or inline functions
+ * that are used by a single driver. To avoid replication of
+ * code we just #include it near the beginning of the
+ * standard driver.
*/
#include <net/netmap.h>
#include <sys/selinfo.h>
-// #include <vm/vm.h>
-// #include <vm/pmap.h> /* vtophys ? */
+/*
+ * Some drivers may need the following headers. Others
+ * already include them by default
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+ */
+
#include <dev/netmap/netmap_kern.h>
+/*
+ * prototypes for the new API calls that are used by the
+ * *_netmap_attach() routine.
+ */
static int ixgbe_netmap_reg(struct ifnet *, int onoff);
static int ixgbe_netmap_txsync(void *, u_int, int);
static int ixgbe_netmap_rxsync(void *, u_int, int);
static void ixgbe_netmap_lock_wrapper(void *, int, u_int);
-SYSCTL_NODE(_dev, OID_AUTO, ixgbe, CTLFLAG_RW, 0, "ixgbe card");
-
+/*
+ * The attach routine, called near the end of ixgbe_attach(),
+ * fills the parameters for netmap_attach() and calls it.
+ * It cannot fail, in the worst case (such as no memory)
+ * netmap mode will be disabled and the driver will only
+ * operate in standard mode.
+ */
static void
ixgbe_netmap_attach(struct adapter *adapter)
{
@@ -52,7 +75,7 @@ ixgbe_netmap_attach(struct adapter *adapter)
bzero(&na, sizeof(na));
na.ifp = adapter->ifp;
- na.separate_locks = 1;
+ na.separate_locks = 1; /* this card has separate rx/tx locks */
na.num_tx_desc = adapter->num_tx_desc;
na.num_rx_desc = adapter->num_rx_desc;
na.nm_txsync = ixgbe_netmap_txsync;
@@ -60,17 +83,18 @@ ixgbe_netmap_attach(struct adapter *adapter)
na.nm_lock = ixgbe_netmap_lock_wrapper;
na.nm_register = ixgbe_netmap_reg;
/*
+ * XXX where do we put this comment ?
* adapter->rx_mbuf_sz is set by SIOCSETMTU, but in netmap mode
* we allocate the buffers on the first register. So we must
* disallow a SIOCSETMTU when if_capenable & IFCAP_NETMAP is set.
*/
- na.buff_size = MCLBYTES;
+ na.buff_size = NETMAP_BUF_SIZE;
netmap_attach(&na, adapter->num_queues);
}
/*
- * wrapper to export locks to the generic code
+ * wrapper to export locks to the generic netmap code.
*/
static void
ixgbe_netmap_lock_wrapper(void *_a, int what, u_int queueid)
@@ -102,8 +126,8 @@ ixgbe_netmap_lock_wrapper(void *_a, int what, u_int queueid)
/*
- * support for netmap register/unregisted. We are already under core lock.
- * only called on the first init or the last unregister.
+ * Netmap register/unregister. We are already under core lock.
+ * Only called on the first register or the last unregister.
*/
static int
ixgbe_netmap_reg(struct ifnet *ifp, int onoff)
@@ -112,7 +136,7 @@ ixgbe_netmap_reg(struct ifnet *ifp, int onoff)
struct netmap_adapter *na = NA(ifp);
int error = 0;
- if (!na)
+ if (!na) /* probably, netmap_attach() failed */
return EINVAL;
ixgbe_disable_intr(adapter);
@@ -120,23 +144,28 @@ ixgbe_netmap_reg(struct ifnet *ifp, int onoff)
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
- if (onoff) {
+ if (onoff) { /* enable netmap mode */
ifp->if_capenable |= IFCAP_NETMAP;
- /* save if_transmit to restore it later */
+ /* save if_transmit and replace with our routine */
na->if_transmit = ifp->if_transmit;
ifp->if_transmit = netmap_start;
+ /*
+ * reinitialize the adapter, now with netmap flag set,
+ * so the rings will be set accordingly.
+ */
ixgbe_init_locked(adapter);
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
error = ENOMEM;
goto fail;
}
- } else {
+ } else { /* reset normal mode (explicit request or netmap failed) */
fail:
/* restore if_transmit */
ifp->if_transmit = na->if_transmit;
ifp->if_capenable &= ~IFCAP_NETMAP;
+ /* initialize the card, this time in standard mode */
ixgbe_init_locked(adapter); /* also enables intr */
}
return (error);
@@ -145,21 +174,23 @@ fail:
/*
* Reconcile kernel and user view of the transmit ring.
+ * This routine might be called frequently so it must be efficient.
+ *
+ * Userspace has filled tx slots up to ring->cur (excluded).
+ * The last unused slot previously known to the kernel was kring->nkr_hwcur,
+ * and the last interrupt reported kring->nr_hwavail slots available.
*
- * Userspace has filled tx slots up to cur (excluded).
- * The last unused slot previously known to the kernel was nr_hwcur,
- * and the last interrupt reported nr_hwavail slots available
- * (using the special value -1 to indicate idle transmit ring).
- * The function must first update avail to what the kernel
- * knows, subtract the newly used slots (cur - nr_hwcur)
- * from both avail and nr_hwavail, and set nr_hwcur = cur
+ * This function runs under lock (acquired from the caller or internally).
+ * It must first update ring->avail to what the kernel knows,
+ * subtract the newly used slots (ring->cur - kring->nkr_hwcur)
+ * from both avail and nr_hwavail, and set ring->nkr_hwcur = ring->cur
* issuing a dmamap_sync on all slots.
*
- * Check parameters in the struct netmap_ring.
- * We don't use avail, only check for bogus values.
- * Make sure cur is valid, and same goes for buffer indexes and lengths.
- * To avoid races, read the values once, and never use those from
- * the ring afterwards.
+ * Since ring comes from userspace, its content must be read only once,
+ * and validated before being used to update the kernel's structures.
+ * (this is also true for every use of ring in the kernel).
+ *
+ * ring->avail is never used, only checked for bogus values.
*/
static int
ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock)
@@ -169,42 +200,96 @@ ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, n = 0, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
- /* generate an interrupt approximately every half ring */
+ /*
+ * ixgbe can generate an interrupt on every tx packet, but it
+ * seems very expensive, so we interrupt once every half ring,
+ * or when requested with NS_REPORT
+ */
int report_frequency = kring->nkr_num_slots >> 1;
- k = ring->cur; /* ring is not protected by any lock */
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
- return netmap_ring_reinit(kring);
-
if (do_lock)
IXGBE_TX_LOCK(txr);
+ /* take a copy of ring->cur now, and never read it again */
+ k = ring->cur;
+ l = k - kring->nr_hwcur;
+ if (l < 0)
+ l += lim + 1;
+ /* if cur is invalid reinitialize the ring. */
+ if (k > lim || l > kring->nr_hwavail) {
+ if (do_lock)
+ IXGBE_TX_UNLOCK(txr);
+ return netmap_ring_reinit(kring);
+ }
+
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
- /* update avail to what the hardware knows */
- ring->avail = kring->nr_hwavail;
-
+ /*
+ * Process new packets to send. j is the current index in the
+ * netmap ring, l is the corresponding index in the NIC ring.
+ * The two numbers differ because upon a *_init() we reset
+ * the NIC ring but leave the netmap ring unchanged.
+ * For the transmit ring, we have
+ *
+ * j = kring->nr_hwcur
+ * l = IXGBE_TDT (not tracked in the driver)
+ * and
+ * j == (l + kring->nkr_hwofs) % ring_size
+ *
+ * In this driver kring->nkr_hwofs >= 0, but for other
+ * drivers it might be negative as well.
+ */
j = kring->nr_hwcur;
if (j != k) { /* we have new packets to send */
+ l = j - kring->nkr_hwofs;
+ if (l < 0) /* wraparound */
+ l += lim + 1;
+
while (j != k) {
+ /*
+ * Collect per-slot info.
+ * Note that txbuf and curr are indexed by l.
+ *
+ * In this driver we collect the buffer address
+ * (using the NMB() macro) because we always
+ * need to rewrite it into the NIC ring.
+ * Many other drivers preserve the address, so
+ * we only need to access it if NS_BUF_CHANGED
+ * is set.
+ */
struct netmap_slot *slot = &ring->slot[j];
- struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[j];
- union ixgbe_adv_tx_desc *curr = &txr->tx_base[j];
+ struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[l];
+ union ixgbe_adv_tx_desc *curr = &txr->tx_base[l];
void *addr = NMB(slot);
+ // XXX type for flags and len ?
int flags = ((slot->flags & NS_REPORT) ||
j == 0 || j == report_frequency) ?
IXGBE_TXD_CMD_RS : 0;
int len = slot->len;
+ /*
+ * Quick check for valid addr and len.
+ * NMB() returns netmap_buffer_base for invalid
+ * buffer indexes (but the address is still a
+ * valid one to be used in a ring). slot->len is
+ * unsigned so no need to check for negative values.
+ */
if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
+ring_reset:
if (do_lock)
IXGBE_TX_UNLOCK(txr);
return netmap_ring_reinit(kring);
}
slot->flags &= ~NS_REPORT;
+ /*
+ * Fill the slot in the NIC ring.
+ * In this driver we need to rewrite the buffer
+ * address in the NIC ring. Other drivers do not
+ * need this.
+ */
curr->read.buffer_addr = htole64(vtophys(addr));
curr->read.olinfo_status = 0;
curr->read.cmd_type_len =
@@ -212,6 +297,10 @@ ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock)
(IXGBE_ADVTXD_DTYP_DATA |
IXGBE_ADVTXD_DCMD_IFCS |
IXGBE_TXD_CMD_EOP | flags) );
+ /* If the buffer has changed, unload and reload map
+ * (and possibly the physical address in the NIC
+ * slot, but we did it already).
+ */
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, unload and reload map */
netmap_reload_map(txr->txtag, txbuf->map,
@@ -219,69 +308,89 @@ ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock)
slot->flags &= ~NS_BUF_CHANGED;
}
+ /* make sure changes to the buffer are synced */
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
- kring->nr_hwcur = k;
+ kring->nr_hwcur = k; /* the saved ring->cur */
/* decrease avail by number of sent packets */
- ring->avail -= n;
- kring->nr_hwavail = ring->avail;
+ kring->nr_hwavail -= n;
+ /* synchronize the NIC ring */
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
-
- IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), k);
+ /* (re)start the transmitter up to slot l (excluded) */
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), l);
}
+ /*
+ * If no packets are sent, or there is no room in the tx ring,
+ * Check whether there are completed transmissions.
+ * Because this is expensive (we need a register etc.)
+ * we only do it if absolutely necessary, i.e. there is no room
+ * in the tx ring, or where were no completed transmissions
+ * (meaning that probably the caller really wanted to check
+ * for completed transmissions).
+ */
if (n == 0 || kring->nr_hwavail < 1) {
- /* record completed transmissions. TODO
+ int delta;
+
+ /*
+ * Record completed transmissions.
+ * We (re)use the driver's txr->next_to_clean to keep
+ * track of the most recently completed transmission.
*
* The datasheet discourages the use of TDH to find out the
- * number of sent packets; the right way to do so, is to check
- * the DD bit inside the status of a packet descriptor. On the
- * other hand, we avoid to set the `report status' bit for
- * *all* outgoing packets (kind of interrupt mitigation),
- * consequently the DD bit is not guaranteed to be set for all
- * the packets: thats way, for the moment we continue to use
- * TDH.
+ * number of sent packets. We should rather check the DD
+ * status bit in a packet descriptor. However, we only set
+ * the "report status" bit for some descriptors (a kind of
+ * interrupt mitigation), so we can only check on those.
+ * For the time being we use TDH, as we do it infrequently
+ * enough not to pose performance problems.
*/
- j = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr));
- if (j >= kring->nkr_num_slots) { /* XXX can happen */
- D("TDH wrap %d", j);
- j -= kring->nkr_num_slots;
+ l = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr));
+ if (l >= kring->nkr_num_slots) { /* XXX can happen */
+ D("TDH wrap %d", l);
+ l -= kring->nkr_num_slots;
}
- int delta = j - txr->next_to_clean;
+ delta = l - txr->next_to_clean;
if (delta) {
- /* new transmissions were completed, increment
- ring->nr_hwavail. */
+ /* some tx completed, increment avail */
if (delta < 0)
delta += kring->nkr_num_slots;
- txr->next_to_clean = j;
+ txr->next_to_clean = l;
kring->nr_hwavail += delta;
- ring->avail = kring->nr_hwavail;
+ if (kring->nr_hwavail > lim)
+ goto ring_reset;
}
}
+ /* update avail to what the kernel knows */
+ ring->avail = kring->nr_hwavail;
if (do_lock)
IXGBE_TX_UNLOCK(txr);
return 0;
+
}
/*
* Reconcile kernel and user view of the receive ring.
+ * Same as for the txsync, this routine must be efficient and
+ * avoid races in accessing the shared regions.
+ *
+ * When called, userspace has read data from slots kring->nr_hwcur
+ * up to ring->cur (excluded).
*
- * Userspace has read rx slots up to cur (excluded).
- * The last unread slot previously known to the kernel was nr_hwcur,
- * and the last interrupt reported nr_hwavail slots available.
+ * The last interrupt reported kring->nr_hwavail slots available
+ * after kring->nr_hwcur.
* We must subtract the newly consumed slots (cur - nr_hwcur)
- * from nr_hwavail, clearing the descriptors for the next
- * read, tell the hardware that they are available,
- * and set nr_hwcur = cur and avail = nr_hwavail.
- * issuing a dmamap_sync on all slots.
+ * from nr_hwavail, make the descriptors available for the next reads,
+ * and set kring->nr_hwcur = ring->cur and ring->avail = kring->nr_hwavail.
*/
static int
ixgbe_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
@@ -291,86 +400,123 @@ ixgbe_netmap_rxsync(void *a, u_int ring_nr, int do_lock)
struct netmap_adapter *na = NA(adapter->ifp);
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, n, lim = kring->nkr_num_slots - 1;
+ int j, k, l, n, lim = kring->nkr_num_slots - 1;
- k = ring->cur; /* ring is not protected by any lock */
- if ( (kring->nr_kflags & NR_REINIT) || k > lim)
+ k = ring->cur; /* cache and check value, same as in txsync */
+ n = k - kring->nr_hwcur;
+ if (n < 0)
+ n += lim + 1;
+ if (k > lim || n > kring->nr_hwavail) /* userspace is cheating */
return netmap_ring_reinit(kring);
if (do_lock)
IXGBE_RX_LOCK(rxr);
+ if (n < 0)
+ n += lim + 1;
/* XXX check sync modes */
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
- j = rxr->next_to_check;
+ /*
+ * First part, import newly received packets into the netmap ring.
+ *
+ * j is the index of the next free slot in the netmap ring,
+ * and l is the index of the next received packet in the NIC ring,
+ * and they may differ in case if_init() has been called while
+ * in netmap mode. For the receive ring we have
+ *
+ * j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size
+ * l = rxr->next_to_check;
+ * and
+ * j == (l + kring->nkr_hwofs) % ring_size
+ *
+ * rxr->next_to_check is set to 0 on a ring reinit
+ */
+ l = rxr->next_to_check;
+ j = rxr->next_to_check + kring->nkr_hwofs;
+ if (j > lim)
+ j -= lim + 1;
+
for (n = 0; ; n++) {
- union ixgbe_adv_rx_desc *curr = &rxr->rx_base[j];
+ union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l];
uint32_t staterr = le32toh(curr->wb.upper.status_error);
if ((staterr & IXGBE_RXD_STAT_DD) == 0)
break;
ring->slot[j].len = le16toh(curr->wb.upper.length);
bus_dmamap_sync(rxr->ptag,
- rxr->rx_buffers[j].pmap, BUS_DMASYNC_POSTREAD);
+ rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
}
- if (n) {
- rxr->next_to_check = j;
+ if (n) { /* update the state variables */
+ rxr->next_to_check = l;
kring->nr_hwavail += n;
- if (kring->nr_hwavail >= lim - 10) {
- ND("rx ring %d almost full %d", ring_nr, kring->nr_hwavail);
- }
}
- /* skip past packets that userspace has already processed,
- * making them available for reception.
- * advance nr_hwcur and issue a bus_dmamap_sync on the
- * buffers so it is safe to write to them.
- * Also increase nr_hwavail
+ /*
+ * Skip past packets that userspace has already processed
+ * (from kring->nr_hwcur to ring->cur excluded), and make
+ * the buffers available for reception.
+ * As usual j is the index in the netmap ring, l is the index
+ * in the NIC ring, and j == (l + kring->nkr_hwofs) % ring_size
*/
j = kring->nr_hwcur;
if (j != k) { /* userspace has read some packets. */
n = 0;
+ l = kring->nr_hwcur - kring->nkr_hwofs;
+ if (l < 0)
+ l += lim + 1;
while (j != k) {
- struct netmap_slot *slot = ring->slot + j;
- union ixgbe_adv_rx_desc *curr = &rxr->rx_base[j];
- struct ixgbe_rx_buf *rxbuf = rxr->rx_buffers + j;
+ /* collect per-slot info, with similar validations
+ * and flag handling as in the txsync code.
+ *
+ * NOTE curr and rxbuf are indexed by l.
+ * Also, this driver needs to update the physical * address in the NIC ring, but other drivers
+ * may not have this requirement.
+ */
+ struct netmap_slot *slot = &ring->slot[j];
+ union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l];
+ struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[l];
void *addr = NMB(slot);
- if (addr == netmap_buffer_base) { /* bad buf */
- if (do_lock)
- IXGBE_RX_UNLOCK(rxr);
- return netmap_ring_reinit(kring);
- }
+ if (addr == netmap_buffer_base) /* bad buf */
+ goto ring_reset;
curr->wb.upper.status_error = 0;
curr->read.pkt_addr = htole64(vtophys(addr));
if (slot->flags & NS_BUF_CHANGED) {
netmap_reload_map(rxr->ptag, rxbuf->pmap,
- addr, na->buff_size);
+ addr, na->buff_size);
slot->flags &= ~NS_BUF_CHANGED;
}
bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
- BUS_DMASYNC_PREREAD);
+ BUS_DMASYNC_PREREAD);
j = (j == lim) ? 0 : j + 1;
+ l = (l == lim) ? 0 : l + 1;
n++;
}
kring->nr_hwavail -= n;
- kring->nr_hwcur = ring->cur;
+ kring->nr_hwcur = k;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* IMPORTANT: we must leave one free slot in the ring,
- * so move j back by one unit
+ * so move l back by one unit
*/
- j = (j == 0) ? lim : j - 1;
- IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), j);
+ l = (l == 0) ? lim : l - 1;
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), l);
}
/* tell userspace that there are new packets */
ring->avail = kring->nr_hwavail ;
if (do_lock)
IXGBE_RX_UNLOCK(rxr);
return 0;
+
+ring_reset:
+ if (do_lock)
+ IXGBE_RX_UNLOCK(rxr);
+ return netmap_ring_reinit(kring);
}
+/* end of file */
diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
index fef8516..34a0627 100644
--- a/sys/dev/netmap/netmap.c
+++ b/sys/dev/netmap/netmap.c
@@ -1,15 +1,15 @@
/*
* Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -25,7 +25,7 @@
/*
* $FreeBSD$
- * $Id: netmap.c 9662 2011-11-16 13:18:06Z luigi $
+ * $Id: netmap.c 9795 2011-12-02 11:39:08Z luigi $
*
* This module supports memory mapped access to network devices,
* see netmap(4).
@@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$");
#include <sys/module.h>
#include <sys/errno.h>
#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/jail.h>
#include <sys/kernel.h> /* types used in module initialization */
#include <sys/conf.h> /* cdevsw struct */
#include <sys/uio.h> /* uio struct */
@@ -70,6 +71,7 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/mman.h> /* PROT_EXEC */
#include <sys/poll.h>
+#include <sys/proc.h>
#include <vm/vm.h> /* vtophys */
#include <vm/pmap.h> /* vtophys */
#include <sys/socket.h> /* sockaddrs */
@@ -78,6 +80,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/bpf.h> /* BIOCIMMEDIATE */
+#include <net/vnet.h>
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <machine/bus.h> /* bus_dmamap_* */
@@ -678,6 +681,13 @@ get_ifp(const char *name, struct ifnet **ifp)
* Error routine called when txsync/rxsync detects an error.
* Can't do much more than resetting cur = hwcur, avail = hwavail.
* Return 1 on reinit.
+ *
+ * This routine is only called by the upper half of the kernel.
+ * It only reads hwcur (which is changed only by the upper half, too)
+ * and hwavail (which may be changed by the lower half, but only on
+ * a tx ring and only to increase it, so any error will be recovered
+ * on the next call). For the above, we don't strictly need to call
+ * it under lock.
*/
int
netmap_ring_reinit(struct netmap_kring *kring)
@@ -717,29 +727,10 @@ netmap_ring_reinit(struct netmap_kring *kring)
ring->avail, kring->nr_hwavail);
ring->cur = kring->nr_hwcur;
ring->avail = kring->nr_hwavail;
- ring->flags |= NR_REINIT;
- kring->na->flags |= NR_REINIT;
}
return (errors ? 1 : 0);
}
-/*
- * Clean the reinit flag for our rings.
- * XXX at the moment, clear for all rings
- */
-static void
-netmap_clean_reinit(struct netmap_adapter *na)
-{
- //struct netmap_kring *kring;
- u_int i;
-
- na->flags &= ~NR_REINIT;
- D("--- NR_REINIT reset on %s", na->ifp->if_xname);
- for (i = 0; i < na->num_queues + 1; i++) {
- na->tx_rings[i].ring->flags &= ~NR_REINIT;
- na->rx_rings[i].ring->flags &= ~NR_REINIT;
- }
-}
/*
* Set the ring ID. For devices with a single queue, a request
@@ -801,7 +792,7 @@ netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
*/
static int
netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data,
- __unused int fflag, __unused struct thread *td)
+ __unused int fflag, struct thread *td)
{
struct netmap_priv_d *priv = NULL;
struct ifnet *ifp;
@@ -812,9 +803,13 @@ netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data,
u_int i;
struct netmap_if *nifp;
+ CURVNET_SET(TD_TO_VNET(td));
+
error = devfs_get_cdevpriv((void **)&priv);
- if (error != ENOENT && error != 0)
+ if (error != ENOENT && error != 0) {
+ CURVNET_RESTORE();
return (error);
+ }
error = 0; /* Could be ENOENT */
switch (cmd) {
@@ -836,8 +831,10 @@ netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data,
break;
case NIOCREGIF:
- if (priv != NULL) /* thread already registered */
- return netmap_set_ringid(priv, nmr->nr_ringid);
+ if (priv != NULL) { /* thread already registered */
+ error = netmap_set_ringid(priv, nmr->nr_ringid);
+ break;
+ }
/* find the interface and a reference */
error = get_ifp(nmr->nr_name, &ifp); /* keep reference */
if (error)
@@ -927,8 +924,10 @@ error:
break;
case NIOCUNREGIF:
- if (priv == NULL)
- return (ENXIO);
+ if (priv == NULL) {
+ error = ENXIO;
+ break;
+ }
/* the interface is unregistered inside the
destructor of the private data. */
@@ -937,22 +936,21 @@ error:
case NIOCTXSYNC:
case NIOCRXSYNC:
- if (priv == NULL)
- return (ENXIO);
+ if (priv == NULL) {
+ error = ENXIO;
+ break;
+ }
ifp = priv->np_ifp; /* we have a reference */
na = NA(ifp); /* retrieve netmap adapter */
adapter = ifp->if_softc; /* shorthand */
- if (na->flags & NR_REINIT)
- netmap_clean_reinit(na);
-
if (priv->np_qfirst == na->num_queues) {
/* queues to/from host */
if (cmd == NIOCTXSYNC)
netmap_sync_to_host(na);
else
netmap_sync_from_host(na, NULL);
- return error;
+ break;
}
for (i = priv->np_qfirst; i < priv->np_qlast; i++) {
@@ -999,6 +997,7 @@ error:
}
}
+ CURVNET_RESTORE();
return (error);
}
@@ -1039,13 +1038,6 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td)
adapter = ifp->if_softc;
na = NA(ifp); /* retrieve netmap adapter */
- /* pending reinit, report up as a poll error. Pending
- * reads and writes are lost.
- */
- if (na->flags & NR_REINIT) {
- netmap_clean_reinit(na);
- revents |= POLLERR;
- }
/* how many queues we are scanning */
i = priv->np_qfirst;
if (i == na->num_queues) { /* from/to host */
@@ -1111,20 +1103,20 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td)
* data available. If this fails, then lock and call the sync
* routines.
*/
- for (i = priv->np_qfirst; want_rx && i < priv->np_qlast; i++) {
- kring = &na->rx_rings[i];
- if (kring->ring->avail > 0) {
- revents |= want_rx;
- want_rx = 0; /* also breaks the loop */
+ for (i = priv->np_qfirst; want_rx && i < priv->np_qlast; i++) {
+ kring = &na->rx_rings[i];
+ if (kring->ring->avail > 0) {
+ revents |= want_rx;
+ want_rx = 0; /* also breaks the loop */
+ }
}
- }
- for (i = priv->np_qfirst; want_tx && i < priv->np_qlast; i++) {
- kring = &na->tx_rings[i];
- if (kring->ring->avail > 0) {
- revents |= want_tx;
- want_tx = 0; /* also breaks the loop */
+ for (i = priv->np_qfirst; want_tx && i < priv->np_qlast; i++) {
+ kring = &na->tx_rings[i];
+ if (kring->ring->avail > 0) {
+ revents |= want_tx;
+ want_tx = 0; /* also breaks the loop */
+ }
}
- }
/*
* If we to push packets out (priv->np_txpoll) or want_tx is
@@ -1326,24 +1318,6 @@ done:
* netmap_reset() is called by the driver routines when reinitializing
* a ring. The driver is in charge of locking to protect the kring.
* If netmap mode is not set just return NULL.
- * Otherwise set NR_REINIT (in the ring and in na) to signal
- * that a ring has been reinitialized,
- * set cur = hwcur = 0 and avail = hwavail = num_slots - 1 .
- * IT IS IMPORTANT to leave one slot free even in the tx ring because
- * we rely on cur=hwcur only for empty rings.
- * These are good defaults but can be overridden later in the device
- * specific code if, after a reinit, the ring does not start from 0
- * (e.g. if_em.c does this).
- *
- * XXX we shouldn't be touching the ring, but there is a
- * race anyways and this is our best option.
- *
- * XXX setting na->flags makes the syscall code faster, as there is
- * only one place to check. On the other hand, we will need a better
- * way to notify multiple threads that rings have been reset.
- * One way is to increment na->rst_count at each ring reset.
- * Each thread in its own priv structure will keep a matching counter,
- * and on a reset will acknowledge and clean its own rings.
*/
struct netmap_slot *
netmap_reset(struct netmap_adapter *na, enum txrx tx, int n,
@@ -1351,8 +1325,7 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, int n,
{
struct netmap_kring *kring;
struct netmap_ring *ring;
- struct netmap_slot *slot;
- u_int i;
+ int new_hwofs, lim;
if (na == NULL)
return NULL; /* no netmap support here */
@@ -1360,74 +1333,26 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, int n,
return NULL; /* nothing to reinitialize */
kring = tx == NR_TX ? na->tx_rings + n : na->rx_rings + n;
ring = kring->ring;
- if (tx == NR_TX) {
- /*
- * The last argument is the new value of next_to_clean.
- *
- * In the TX ring, we have P pending transmissions (from
- * next_to_clean to nr_hwcur) followed by nr_hwavail free slots.
- * Generally we can use all the slots in the ring so
- * P = ring_size - nr_hwavail hence (modulo ring_size):
- * next_to_clean == nr_hwcur + nr_hwavail
- *
- * If, upon a reset, nr_hwavail == ring_size and next_to_clean
- * does not change we have nothing to report. Otherwise some
- * pending packets may be lost, or newly injected packets will.
- */
- /* if hwcur does not change, nothing to report.
- * otherwise remember the change so perhaps we can
- * shift the block at the next reinit
- */
- if (new_cur == kring->nr_hwcur &&
- kring->nr_hwavail == kring->nkr_num_slots - 1) {
- /* all ok */
- D("+++ NR_REINIT ok on %s TX[%d]", na->ifp->if_xname, n);
- } else {
- D("+++ NR_REINIT set on %s TX[%d]", na->ifp->if_xname, n);
- }
- ring->flags |= NR_REINIT;
- na->flags |= NR_REINIT;
- ring->avail = kring->nr_hwavail = kring->nkr_num_slots - 1;
- ring->cur = kring->nr_hwcur = new_cur;
- } else {
- /*
- * The last argument is the next free slot.
- * In the RX ring we have nr_hwavail full buffers starting
- * from nr_hwcur.
- * If nr_hwavail == 0 and nr_hwcur does not change we are ok
- * otherwise we might be in trouble as the buffers are
- * changing.
- */
- if (new_cur == kring->nr_hwcur && kring->nr_hwavail == 0) {
- /* all ok */
- D("+++ NR_REINIT ok on %s RX[%d]", na->ifp->if_xname, n);
- } else {
- D("+++ NR_REINIT set on %s RX[%d]", na->ifp->if_xname, n);
- }
- ring->flags |= NR_REINIT;
- na->flags |= NR_REINIT;
- ring->avail = kring->nr_hwavail = 0; /* no data */
- ring->cur = kring->nr_hwcur = new_cur;
- }
+ lim = kring->nkr_num_slots - 1;
+
+ if (tx == NR_TX)
+ new_hwofs = kring->nr_hwcur - new_cur;
+ else
+ new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur;
+ if (new_hwofs > lim)
+ new_hwofs -= lim + 1;
+
+ /* Alwayws set the new offset value and realign the ring. */
+ kring->nkr_hwofs = new_hwofs;
+ if (tx == NR_TX)
+ kring->nr_hwavail = kring->nkr_num_slots - 1;
+ D("new hwofs %d on %s %s[%d]",
+ kring->nkr_hwofs, na->ifp->if_xname,
+ tx == NR_TX ? "TX" : "RX", n);
- slot = ring->slot;
/*
- * Check that buffer indexes are correct. If we find a
- * bogus value we are a bit in trouble because we cannot
- * recover easily. Best we can do is (probably) persistently
- * reset the ring.
- */
- for (i = 0; i < kring->nkr_num_slots; i++) {
- if (slot[i].buf_idx >= netmap_total_buffers) {
- D("invalid buf_idx %d at slot %d", slot[i].buf_idx, i);
- slot[i].buf_idx = 0; /* XXX reset */
- }
- /* XXX we don't really need to set the length */
- slot[i].len = 0;
- }
- /* wakeup possible waiters, both on the ring and on the global
- * selfd. Perhaps a bit early now but the device specific
- * routine is locked so hopefully we won't have a race.
+ * We do the wakeup here, but the ring is not yet reconfigured.
+ * However, we are under lock so there are no races.
*/
selwakeuppri(&kring->si, PI_NET);
selwakeuppri(&kring[na->num_queues + 1 - n].si, PI_NET);
diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
index 5434609..eb36ec6 100644
--- a/sys/dev/netmap/netmap_kern.h
+++ b/sys/dev/netmap/netmap_kern.h
@@ -1,15 +1,15 @@
/*
* Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -25,7 +25,7 @@
/*
* $FreeBSD$
- * $Id: netmap_kern.h 9662 2011-11-16 13:18:06Z luigi $
+ * $Id: netmap_kern.h 9795 2011-12-02 11:39:08Z luigi $
*
* The header contains the definitions of constants and function
* prototypes used only in kernelspace.
@@ -68,7 +68,7 @@ struct netmap_kring {
u_int nr_kflags;
u_int nkr_num_slots;
- u_int nkr_hwofs; /* offset between NIC and netmap ring */
+ int nkr_hwofs; /* offset between NIC and netmap ring */
struct netmap_adapter *na; // debugging
struct selinfo si; /* poll/select wait queue */
};
@@ -94,7 +94,7 @@ struct netmap_adapter {
u_int num_rx_desc;
u_int buff_size;
- u_int flags; /* NR_REINIT */
+ u_int flags;
/* tx_rings and rx_rings are private but allocated
* as a contiguous chunk of memory. Each array has
* N+1 entries, for the adapter queues and for the host queue.
diff --git a/sys/net/netmap.h b/sys/net/netmap.h
index be9c686..4dec1fd 100644
--- a/sys/net/netmap.h
+++ b/sys/net/netmap.h
@@ -32,7 +32,7 @@
/*
* $FreeBSD$
- * $Id: netmap.h 9662 2011-11-16 13:18:06Z luigi $
+ * $Id: netmap.h 9753 2011-11-28 15:10:43Z luigi $
*
* This header contains the definitions of the constants and the
* structures needed by the ``netmap'' module, both kernel and
@@ -186,13 +186,6 @@ struct netmap_ring {
const uint16_t nr_buf_size;
uint16_t flags;
- /*
- * When a ring is reinitialized, the kernel sets kflags.
- * On exit from a syscall, if the flag is found set, we
- * also reinitialize the nr_* variables. The kflag is then
- * unconditionally copied to nr_flags and cleared.
- */
-#define NR_REINIT 0x0001 /* ring reinitialized! */
#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */
struct timeval ts; /* time of last *sync() */
diff --git a/tools/tools/netmap/pkt-gen.c b/tools/tools/netmap/pkt-gen.c
index 747bd9d..21dc8de 100644
--- a/tools/tools/netmap/pkt-gen.c
+++ b/tools/tools/netmap/pkt-gen.c
@@ -4,10 +4,10 @@
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
@@ -25,7 +25,7 @@
/*
* $FreeBSD$
- * $Id: pkt-gen.c 9638 2011-11-07 18:07:43Z luigi $
+ * $Id: pkt-gen.c 9827 2011-12-05 11:29:34Z luigi $
*
* Example program to show how to build a multithreaded packet
* source/sink using the netmap device.
@@ -45,6 +45,7 @@ const char *default_payload="netmap pkt-gen Luigi Rizzo and Matteo Landi\n"
#include <signal.h> /* signal */
#include <stdlib.h>
#include <stdio.h>
+#include <inttypes.h> /* PRI* macros */
#include <string.h> /* strcmp */
#include <fcntl.h> /* open */
#include <unistd.h> /* close */
@@ -616,7 +617,7 @@ tx_output(uint64_t sent, int size, double delta)
punit += 1;
}
- printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n",
+ printf("Sent %" PRIu64 " packets, %d bytes each, in %.2f seconds.\n",
sent, size, delta);
printf("Speed: %.2f%cpps. Bandwidth: %.2f%cbps.\n",
pps, units[punit], amount, units[aunit]);
@@ -636,7 +637,7 @@ rx_output(uint64_t received, double delta)
punit += 1;
}
- printf("Received %llu packets, in %.2f seconds.\n", received, delta);
+ printf("Received %" PRIu64 " packets, in %.2f seconds.\n", received, delta);
printf("Speed: %.2f%cpps.\n", pps, units[punit]);
}
@@ -971,7 +972,7 @@ main(int arc, char **argv)
if (pps < 10000)
continue;
pps = (my_count - prev)*1000000 / pps;
- D("%llu pps", pps);
+ D("%" PRIu64 " pps", pps);
prev = my_count;
toc = now;
if (done == g.nthreads)
OpenPOWER on IntegriCloud