diff options
-rw-r--r-- | sys/dev/ixgbe/ixgbe.c | 148 | ||||
-rw-r--r-- | sys/dev/netmap/if_em_netmap.h | 167 | ||||
-rw-r--r-- | sys/dev/netmap/if_igb_netmap.h | 137 | ||||
-rw-r--r-- | sys/dev/netmap/if_lem_netmap.h | 241 | ||||
-rw-r--r-- | sys/dev/netmap/if_re_netmap.h | 147 | ||||
-rw-r--r-- | sys/dev/netmap/ixgbe_netmap.h | 340 | ||||
-rw-r--r-- | sys/dev/netmap/netmap.c | 211 | ||||
-rw-r--r-- | sys/dev/netmap/netmap_kern.h | 18 | ||||
-rw-r--r-- | sys/net/netmap.h | 9 | ||||
-rw-r--r-- | tools/tools/netmap/pkt-gen.c | 17 |
10 files changed, 837 insertions, 598 deletions
diff --git a/sys/dev/ixgbe/ixgbe.c b/sys/dev/ixgbe/ixgbe.c index 9a709af..ae76e9f 100644 --- a/sys/dev/ixgbe/ixgbe.c +++ b/sys/dev/ixgbe/ixgbe.c @@ -313,6 +313,18 @@ static int atr_sample_rate = 20; static int fdir_pballoc = 1; #endif +#ifdef DEV_NETMAP +/* + * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to + * be a reference on how to implement netmap support in a driver. + * Additional comments are in ixgbe_netmap.h . + * + * <dev/netma/ixgbe_netmap.h> contains functions for netmap support + * that extend the standard driver. + */ +#include <dev/netmap/ixgbe_netmap.h> +#endif /* DEV_NETMAP */ + /********************************************************************* * Device identification routine * @@ -578,6 +590,9 @@ ixgbe_attach(device_t dev) ixgbe_add_hw_stats(adapter); +#ifdef DEV_NETMAP + ixgbe_netmap_attach(adapter); +#endif /* DEV_NETMAP */ INIT_DEBUGOUT("ixgbe_attach: end"); return (0); err_late: @@ -652,6 +667,9 @@ ixgbe_detach(device_t dev) ether_ifdetach(adapter->ifp); callout_drain(&adapter->timer); +#ifdef DEV_NETMAP + netmap_detach(adapter->ifp); +#endif /* DEV_NETMAP */ ixgbe_free_pci_resources(adapter); bus_generic_detach(dev); if_free(adapter->ifp); @@ -2813,9 +2831,20 @@ ixgbe_setup_transmit_ring(struct tx_ring *txr) struct adapter *adapter = txr->adapter; struct ixgbe_tx_buf *txbuf; int i; +#ifdef DEV_NETMAP + struct netmap_adapter *na = NA(adapter->ifp); + struct netmap_slot *slot; +#endif /* DEV_NETMAP */ /* Clear the old ring contents */ IXGBE_TX_LOCK(txr); +#ifdef DEV_NETMAP + /* + * (under lock): if in netmap mode, do some consistency + * checks and set slot to entry 0 of the netmap ring. + */ + slot = netmap_reset(na, NR_TX, txr->me, 0); +#endif /* DEV_NETMAP */ bzero((void *)txr->tx_base, (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); /* Reset indices */ @@ -2832,6 +2861,26 @@ ixgbe_setup_transmit_ring(struct tx_ring *txr) m_freem(txbuf->m_head); txbuf->m_head = NULL; } +#ifdef DEV_NETMAP + /* + * In netmap mode, set the map for the packet buffer. + * NOTE: Some drivers (not this one) also need to set + * the physical buffer address in the NIC ring. + * Slots in the netmap ring (indexed by "si") are + * kring->nkr_hwofs positions "ahead" wrt the + * corresponding slot in the NIC ring. In some drivers + * (not here) nkr_hwofs can be negative. When computing + * si = i + kring->nkr_hwofs make sure to handle wraparounds. + */ + if (slot) { + int si = i + na->tx_rings[txr->me].nkr_hwofs; + + if (si >= na->num_tx_desc) + si -= na->num_tx_desc; + netmap_load_map(txr->txtag, txbuf->map, + NMB(slot + si), na->buff_size); + } +#endif /* DEV_NETMAP */ /* Clear the EOP index */ txbuf->eop_index = -1; } @@ -3310,6 +3359,29 @@ ixgbe_txeof(struct tx_ring *txr) mtx_assert(&txr->tx_mtx, MA_OWNED); +#ifdef DEV_NETMAP + if (ifp->if_capenable & IFCAP_NETMAP) { + struct netmap_adapter *na = NA(ifp); + + /* + * In netmap mode, all the work is done in the context + * of the client thread. Interrupt handlers only wake up + * clients, which may be sleeping on individual rings + * or on a global resource for all rings. + * When the driver has separate locks, we need to + * release and re-acquire txlock to avoid deadlocks. + * XXX see if we can find a better way. + */ + selwakeuppri(&na->tx_rings[txr->me].si, PI_NET); + IXGBE_TX_UNLOCK(txr); + IXGBE_CORE_LOCK(adapter); + selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET); + IXGBE_CORE_UNLOCK(adapter); + IXGBE_TX_LOCK(txr); + return FALSE; + } +#endif /* DEV_NETMAP */ + if (txr->tx_avail == adapter->num_tx_desc) { txr->queue_status = IXGBE_QUEUE_IDLE; return FALSE; @@ -3698,6 +3770,10 @@ ixgbe_setup_receive_ring(struct rx_ring *rxr) bus_dma_segment_t pseg[1], hseg[1]; struct lro_ctrl *lro = &rxr->lro; int rsize, nsegs, error = 0; +#ifdef DEV_NETMAP + struct netmap_adapter *na = NA(rxr->adapter->ifp); + struct netmap_slot *slot; +#endif /* DEV_NETMAP */ adapter = rxr->adapter; ifp = adapter->ifp; @@ -3705,6 +3781,10 @@ ixgbe_setup_receive_ring(struct rx_ring *rxr) /* Clear the ring contents */ IXGBE_RX_LOCK(rxr); +#ifdef DEV_NETMAP + /* same as in ixgbe_setup_transmit_ring() */ + slot = netmap_reset(na, NR_RX, rxr->me, 0); +#endif /* DEV_NETMAP */ rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); bzero((void *)rxr->rx_base, rsize); @@ -3721,6 +3801,29 @@ ixgbe_setup_receive_ring(struct rx_ring *rxr) struct mbuf *mh, *mp; rxbuf = &rxr->rx_buffers[j]; +#ifdef DEV_NETMAP + /* + * In netmap mode, fill the map and set the buffer + * address in the NIC ring, considering the offset + * between the netmap and NIC rings (see comment in + * ixgbe_setup_transmit_ring() ). No need to allocate + * an mbuf, so end the block with a continue; + */ + if (slot) { + int sj = j + na->rx_rings[rxr->me].nkr_hwofs; + void *addr; + + if (sj >= na->num_rx_desc) + sj -= na->num_rx_desc; + addr = NMB(slot + sj); + netmap_load_map(rxr->ptag, + rxbuf->pmap, addr, na->buff_size); + /* Update descriptor */ + rxr->rx_base[j].read.pkt_addr = + htole64(vtophys(addr)); + continue; + } +#endif /* DEV_NETMAP */ /* ** Don't allocate mbufs if not ** doing header split, its wasteful @@ -3913,6 +4016,35 @@ ixgbe_initialize_receive_units(struct adapter *adapter) /* Setup the HW Rx Head and Tail Descriptor Pointers */ IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0); +#ifdef DEV_NETMAP + /* + * In netmap mode, we must preserve the buffers made + * available to userspace before the if_init() + * (this is true by default on the TX side, because + * init makes all buffers available to userspace). + * + * netmap_reset() and the device specific routines + * (e.g. ixgbe_setup_receive_rings()) map these + * buffers at the end of the NIC ring, so here we + * must set the RDT (tail) register to make sure + * they are not overwritten. + * + * In this driver the NIC ring starts at RDH = 0, + * RDT points to the first 'busy' slot, so RDT = 0 + * means the whole ring is available, and + * RDT = (num_rx_desc - X) means X slots are available. + * Computations are done modulo the ring size. + */ + if (ifp->if_capenable & IFCAP_NETMAP) { + struct netmap_adapter *na = NA(adapter->ifp); + struct netmap_kring *kring = &na->rx_rings[i]; + int t = na->num_rx_desc - kring->nr_hwavail; + + if (t >= na->num_rx_desc) + t -= adapter->num_rx_desc; + IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t); + } else +#endif /* DEV_NETMAP */ IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0); } @@ -4148,6 +4280,22 @@ ixgbe_rxeof(struct ix_queue *que, int count) IXGBE_RX_LOCK(rxr); +#ifdef DEV_NETMAP + if (ifp->if_capenable & IFCAP_NETMAP) { + /* + * Same as the txeof routine, only wakeup clients + * and make sure there are no deadlocks. + */ + struct netmap_adapter *na = NA(ifp); + + selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET); + IXGBE_RX_UNLOCK(rxr); + IXGBE_CORE_LOCK(adapter); + selwakeuppri(&na->rx_rings[na->num_queues + 1].si, PI_NET); + IXGBE_CORE_UNLOCK(adapter); + return (FALSE); + } +#endif /* DEV_NETMAP */ for (i = rxr->next_to_check; count != 0;) { struct mbuf *sendmp, *mh, *mp; u32 rsc, ptype; diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h index 0e220e7..681a652 100644 --- a/sys/dev/netmap/if_em_netmap.h +++ b/sys/dev/netmap/if_em_netmap.h @@ -9,7 +9,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -25,9 +25,12 @@ /* * $FreeBSD$ - * $Id: if_em_netmap.h 9662 2011-11-16 13:18:06Z luigi $ + * $Id: if_em_netmap.h 9802 2011-12-02 18:42:37Z luigi $ * * netmap changes for if_em. + * + * For structure and details on the individual functions please see + * ixgbe_netmap.h */ #include <net/netmap.h> @@ -58,12 +61,7 @@ em_netmap_attach(struct adapter *adapter) na.nm_rxsync = em_netmap_rxsync; na.nm_lock = em_netmap_lock_wrapper; na.nm_register = em_netmap_reg; - /* - * adapter->rx_mbuf_sz is set by SIOCSETMTU, but in netmap mode - * we allocate the buffers on the first register. So we must - * disallow a SIOCSETMTU when if_capenable & IFCAP_NETMAP is set. - */ - na.buff_size = MCLBYTES; + na.buff_size = NETMAP_BUF_SIZE; netmap_attach(&na, adapter->num_queues); } @@ -100,6 +98,7 @@ em_netmap_lock_wrapper(void *_a, int what, u_int queueid) } +// XXX do we need to block/unblock the tasks ? static void em_netmap_block_tasks(struct adapter *adapter) { @@ -162,9 +161,6 @@ em_netmap_reg(struct ifnet *ifp, int onoff) if (onoff) { ifp->if_capenable |= IFCAP_NETMAP; - /* save if_transmit for later restore. - * XXX also if_start and if_qflush ? - */ na->if_transmit = ifp->if_transmit; ifp->if_transmit = netmap_start; @@ -179,15 +175,13 @@ fail: ifp->if_transmit = na->if_transmit; ifp->if_capenable &= ~IFCAP_NETMAP; em_init_locked(adapter); /* also enable intr */ - } em_netmap_unblock_tasks(adapter); return (error); } /* - * Reconcile hardware and user view of the transmit ring, see - * ixgbe.c for details. + * Reconcile hardware and user view of the transmit ring. */ static int em_netmap_txsync(void *a, u_int ring_nr, int do_lock) @@ -197,13 +191,13 @@ em_netmap_txsync(void *a, u_int ring_nr, int do_lock) struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->tx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, n, lim = kring->nkr_num_slots - 1; + int j, k, l, n = 0, lim = kring->nkr_num_slots - 1; /* generate an interrupt approximately every half ring */ int report_frequency = kring->nkr_num_slots >> 1; k = ring->cur; - if ( (kring->nr_kflags & NR_REINIT) || k > lim) + if (k > lim) return netmap_ring_reinit(kring); if (do_lock) @@ -211,35 +205,20 @@ em_netmap_txsync(void *a, u_int ring_nr, int do_lock) bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); - /* record completed transmissions TODO - * - * instead of using TDH, we could read the transmitted status bit. + /* check for new packets to send. + * j indexes the netmap ring, l indexes the nic ring, and + * j = kring->nr_hwcur, l = E1000_TDT (not tracked), + * j == (l + kring->nkr_hwofs) % ring_size */ - j = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr)); - if (j >= kring->nkr_num_slots) { /* XXX can happen */ - D("TDH wrap %d", j); - j -= kring->nkr_num_slots; - } - int delta = j - txr->next_to_clean; - if (delta) { - /* new transmissions were completed, increment - ring->nr_hwavail. */ - if (delta < 0) - delta += kring->nkr_num_slots; - txr->next_to_clean = j; - kring->nr_hwavail += delta; - } - - /* update avail to what the hardware knows */ - ring->avail = kring->nr_hwavail; - j = kring->nr_hwcur; if (j != k) { /* we have packets to send */ - n = 0; + l = j - kring->nkr_hwofs; + if (l < 0) + l += lim + 1; while (j != k) { struct netmap_slot *slot = &ring->slot[j]; - struct e1000_tx_desc *curr = &txr->tx_base[j]; - struct em_buffer *txbuf = &txr->tx_buffers[j]; + struct e1000_tx_desc *curr = &txr->tx_base[l]; + struct em_buffer *txbuf = &txr->tx_buffers[l]; int flags = ((slot->flags & NS_REPORT) || j == 0 || j == report_frequency) ? E1000_TXD_CMD_RS : 0; @@ -254,42 +233,61 @@ em_netmap_txsync(void *a, u_int ring_nr, int do_lock) slot->flags &= ~NS_REPORT; curr->upper.data = 0; curr->lower.data = - htole32( - adapter->txd_cmd | - (E1000_TXD_CMD_EOP | flags) | - slot->len); + htole32(adapter->txd_cmd | len | + (E1000_TXD_CMD_EOP | flags) ); if (slot->flags & NS_BUF_CHANGED) { curr->buffer_addr = htole64(vtophys(addr)); - /* buffer has changed, unload and reload map */ + /* buffer has changed, reload map */ netmap_reload_map(txr->txtag, txbuf->map, - addr, na->buff_size); + addr, na->buff_size); slot->flags &= ~NS_BUF_CHANGED; } bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_PREWRITE); j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; n++; } - kring->nr_hwcur = ring->cur; + kring->nr_hwcur = k; /* decrease avail by number of sent packets */ - ring->avail -= n; - kring->nr_hwavail = ring->avail; + kring->nr_hwavail -= n; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), - ring->cur); + E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), l); } + + if (n == 0 || kring->nr_hwavail < 1) { + int delta; + + /* record completed transmissions using THD. */ + l = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr)); + if (l >= kring->nkr_num_slots) { /* XXX can happen */ + D("TDH wrap %d", l); + l -= kring->nkr_num_slots; + } + delta = l - txr->next_to_clean; + if (delta) { + /* some completed, increment hwavail. */ + if (delta < 0) + delta += kring->nkr_num_slots; + txr->next_to_clean = l; + kring->nr_hwavail += delta; + } + } + /* update avail to what the hardware knows */ + ring->avail = kring->nr_hwavail; + if (do_lock) EM_TX_UNLOCK(txr); return 0; } /* - * Reconcile kernel and user view of the receive ring, see ixgbe.c + * Reconcile kernel and user view of the receive ring. */ static int em_netmap_rxsync(void *a, u_int ring_nr, int do_lock) @@ -299,10 +297,10 @@ em_netmap_rxsync(void *a, u_int ring_nr, int do_lock) struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, n, lim = kring->nkr_num_slots - 1; + int j, k, l, n, lim = kring->nkr_num_slots - 1; k = ring->cur; - if ( (kring->nr_kflags & NR_REINIT) || k > lim) + if (k > lim) return netmap_ring_reinit(kring); if (do_lock) @@ -311,36 +309,52 @@ em_netmap_rxsync(void *a, u_int ring_nr, int do_lock) bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - /* acknowledge all the received packets. */ - j = rxr->next_to_check; + /* import newly received packets into the netmap ring. + * j is an index in the netmap ring, l in the NIC ring, and + * j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size + * l = rxr->next_to_check; + * and + * j == (l + kring->nkr_hwofs) % ring_size + */ + l = rxr->next_to_check; + j = l + kring->nkr_hwofs; + /* here nkr_hwofs can be negative so must check for j < 0 */ + if (j < 0) + j += lim + 1; + else if (j > lim) + j -= lim + 1; for (n = 0; ; n++) { - struct e1000_rx_desc *curr = &rxr->rx_base[j]; + struct e1000_rx_desc *curr = &rxr->rx_base[l]; if ((curr->status & E1000_RXD_STAT_DD) == 0) break; ring->slot[j].len = le16toh(curr->length); - bus_dmamap_sync(rxr->tag, rxr->rx_buffers[j].map, + bus_dmamap_sync(rxr->tag, rxr->rx_buffers[l].map, BUS_DMASYNC_POSTREAD); j = (j == lim) ? 0 : j + 1; + /* make sure next_to_refresh follows next_to_check */ + rxr->next_to_refresh = l; // XXX + l = (l == lim) ? 0 : l + 1; } if (n) { - rxr->next_to_check = j; + rxr->next_to_check = l; kring->nr_hwavail += n; } - /* skip past packets that userspace has already processed: - * making them available for reception. - * advance nr_hwcur and issue a bus_dmamap_sync on the - * buffers so it is safe to write to them. - * Also increase nr_hwavail - */ + /* skip past packets that userspace has already processed */ j = kring->nr_hwcur; if (j != k) { /* userspace has read some packets. */ n = 0; + l = j - kring->nkr_hwofs; /* NIC ring index */ + /* here nkr_hwofs can be negative so check for l > lim */ + if (l < 0) + l += lim + 1; + else if (l > lim) + l -= lim + 1; while (j != k) { struct netmap_slot *slot = &ring->slot[j]; - struct e1000_rx_desc *curr = &rxr->rx_base[j]; - struct em_buffer *rxbuf = &rxr->rx_buffers[j]; + struct e1000_rx_desc *curr = &rxr->rx_base[l]; + struct em_buffer *rxbuf = &rxr->rx_buffers[l]; void *addr = NMB(slot); if (addr == netmap_buffer_base) { /* bad buf */ @@ -352,28 +366,29 @@ em_netmap_rxsync(void *a, u_int ring_nr, int do_lock) curr->status = 0; if (slot->flags & NS_BUF_CHANGED) { curr->buffer_addr = htole64(vtophys(addr)); - /* buffer has changed, unload and reload map */ + /* buffer has changed, reload map */ netmap_reload_map(rxr->rxtag, rxbuf->map, - addr, na->buff_size); + addr, na->buff_size); slot->flags &= ~NS_BUF_CHANGED; } bus_dmamap_sync(rxr->rxtag, rxbuf->map, - BUS_DMASYNC_PREREAD); + BUS_DMASYNC_PREREAD); j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; n++; } kring->nr_hwavail -= n; - kring->nr_hwcur = ring->cur; + kring->nr_hwcur = k; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * IMPORTANT: we must leave one free slot in the ring, - * so move j back by one unit + * so move l back by one unit */ - j = (j == 0) ? lim : j - 1; - E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), j); + l = (l == 0) ? lim : l - 1; + E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l); } /* tell userspace that there are new packets */ ring->avail = kring->nr_hwavail ; diff --git a/sys/dev/netmap/if_igb_netmap.h b/sys/dev/netmap/if_igb_netmap.h index 0c14706..c048ec4 100644 --- a/sys/dev/netmap/if_igb_netmap.h +++ b/sys/dev/netmap/if_igb_netmap.h @@ -25,7 +25,7 @@ /* * $FreeBSD$ - * $Id: if_igb_netmap.h 9662 2011-11-16 13:18:06Z luigi $ + * $Id: if_igb_netmap.h 9802 2011-12-02 18:42:37Z luigi $ * * netmap modifications for igb * contribured by Ahmed Kooli @@ -58,12 +58,7 @@ igb_netmap_attach(struct adapter *adapter) na.nm_rxsync = igb_netmap_rxsync; na.nm_lock = igb_netmap_lock_wrapper; na.nm_register = igb_netmap_reg; - /* - * adapter->rx_mbuf_sz is set by SIOCSETMTU, but in netmap mode - * we allocate the buffers on the first register. So we must - * disallow a SIOCSETMTU when if_capenable & IFCAP_NETMAP is set. - */ - na.buff_size = MCLBYTES; + na.buff_size = NETMAP_BUF_SIZE; netmap_attach(&na, adapter->num_queues); } @@ -111,7 +106,7 @@ igb_netmap_reg(struct ifnet *ifp, int onoff) struct netmap_adapter *na = NA(ifp); int error = 0; - if (!na) + if (na == NULL) return EINVAL; igb_disable_intr(adapter); @@ -144,21 +139,6 @@ fail: /* * Reconcile kernel and user view of the transmit ring. - * - * Userspace has filled tx slots up to cur (excluded). - * The last unused slot previously known to the kernel was nr_hwcur, - * and the last interrupt reported nr_hwavail slots available - * (using the special value -1 to indicate idle transmit ring). - * The function must first update avail to what the kernel - * knows, subtract the newly used slots (cur - nr_hwcur) - * from both avail and nr_hwavail, and set nr_hwcur = cur - * issuing a dmamap_sync on all slots. - * - * Check parameters in the struct netmap_ring. - * We don't use avail, only check for bogus values. - * Make sure cur is valid, and same goes for buffer indexes and lengths. - * To avoid races, read the values once, and never use those from - * the ring afterwards. */ static int igb_netmap_txsync(void *a, u_int ring_nr, int do_lock) @@ -168,54 +148,40 @@ igb_netmap_txsync(void *a, u_int ring_nr, int do_lock) struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->tx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, n, lim = kring->nkr_num_slots - 1; + int j, k, l, n = 0, lim = kring->nkr_num_slots - 1; /* generate an interrupt approximately every half ring */ int report_frequency = kring->nkr_num_slots >> 1; - k = ring->cur; /* ring is not protected by any lock */ - if ( (kring->nr_kflags & NR_REINIT) || k > lim) + k = ring->cur; + if (k > lim) return netmap_ring_reinit(kring); if (do_lock) IGB_TX_LOCK(txr); bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_POSTREAD); - - /* record completed transmissions. TODO - * - * Instead of reading from the TDH register, we could and try to check - * the status bit of descriptor packets. - */ - j = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr)); - if (j >= kring->nkr_num_slots) /* XXX can it happen ? */ - j -= kring->nkr_num_slots; - int delta = j - txr->next_to_clean; - if (delta) { - /* new tx were completed */ - if (delta < 0) - delta += kring->nkr_num_slots; - txr->next_to_clean = j; - kring->nr_hwavail += delta; - } + BUS_DMASYNC_POSTREAD); /* update avail to what the hardware knows */ ring->avail = kring->nr_hwavail; - j = kring->nr_hwcur; + j = kring->nr_hwcur; /* netmap ring index */ if (j != k) { /* we have new packets to send */ u32 olinfo_status = 0; - n = 0; + int n = 0; + l = j - kring->nkr_hwofs; /* NIC ring index */ + if (l < 0) + l += lim + 1; /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) olinfo_status |= txr->me << 4; while (j != k) { struct netmap_slot *slot = &ring->slot[j]; - struct igb_tx_buffer *txbuf = &txr->tx_buffers[j]; + struct igb_tx_buffer *txbuf = &txr->tx_buffers[l]; union e1000_adv_tx_desc *curr = - (union e1000_adv_tx_desc *)&txr->tx_base[j]; + (union e1000_adv_tx_desc *)&txr->tx_base[l]; void *addr = NMB(slot); int flags = ((slot->flags & NS_REPORT) || j == 0 || j == report_frequency) ? @@ -229,6 +195,7 @@ igb_netmap_txsync(void *a, u_int ring_nr, int do_lock) } slot->flags &= ~NS_REPORT; + // XXX do we need to set the address ? curr->read.buffer_addr = htole64(vtophys(addr)); curr->read.olinfo_status = htole32(olinfo_status | @@ -239,7 +206,7 @@ igb_netmap_txsync(void *a, u_int ring_nr, int do_lock) E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DCMD_EOP | flags); if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, unload and reload map */ + /* buffer has changed, reload map */ netmap_reload_map(txr->txtag, txbuf->map, addr, na->buff_size); slot->flags &= ~NS_BUF_CHANGED; @@ -248,22 +215,40 @@ igb_netmap_txsync(void *a, u_int ring_nr, int do_lock) bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_PREWRITE); j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; n++; } kring->nr_hwcur = k; /* decrease avail by number of sent packets */ - ring->avail -= n; - kring->nr_hwavail = ring->avail; + kring->nr_hwavail -= n; + ring->avail = kring->nr_hwavail; - /* Set the watchdog */ + /* Set the watchdog XXX ? */ txr->queue_status = IGB_QUEUE_WORKING; txr->watchdog_time = ticks; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), k); + E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), l); + } + if (n == 0 || kring->nr_hwavail < 1) { + int delta; + + /* record completed transmission using TDH */ + l = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr)); + if (l >= kring->nkr_num_slots) /* XXX can it happen ? */ + l -= kring->nkr_num_slots; + delta = l - txr->next_to_clean; + if (delta) { + /* new tx were completed */ + if (delta < 0) + delta += kring->nkr_num_slots; + txr->next_to_clean = l; + kring->nr_hwavail += delta; + ring->avail = kring->nr_hwavail; + } } if (do_lock) IGB_TX_UNLOCK(txr); @@ -273,15 +258,6 @@ igb_netmap_txsync(void *a, u_int ring_nr, int do_lock) /* * Reconcile kernel and user view of the receive ring. - * - * Userspace has read rx slots up to cur (excluded). - * The last unread slot previously known to the kernel was nr_hwcur, - * and the last interrupt reported nr_hwavail slots available. - * We must subtract the newly consumed slots (cur - nr_hwcur) - * from nr_hwavail, clearing the descriptors for the next - * read, tell the hardware that they are available, - * and set nr_hwcur = cur and avail = nr_hwavail. - * issuing a dmamap_sync on all slots. */ static int igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock) @@ -291,10 +267,10 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock) struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, n, lim = kring->nkr_num_slots - 1; + int j, k, l, n, lim = kring->nkr_num_slots - 1; - k = ring->cur; /* ring is not protected by any lock */ - if ( (kring->nr_kflags & NR_REINIT) || k > lim) + k = ring->cur; + if (k > lim) return netmap_ring_reinit(kring); if (do_lock) @@ -304,9 +280,12 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock) bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - j = rxr->next_to_check; + l = rxr->next_to_check; + j = l + kring->nkr_hwofs; + if (j > lim) + j -= lim + 1; for (n = 0; ; n++) { - union e1000_adv_rx_desc *curr = &rxr->rx_base[j]; + union e1000_adv_rx_desc *curr = &rxr->rx_base[l]; uint32_t staterr = le32toh(curr->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) @@ -314,15 +293,13 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock) ring->slot[j].len = le16toh(curr->wb.upper.length); bus_dmamap_sync(rxr->ptag, - rxr->rx_buffers[j].pmap, BUS_DMASYNC_POSTREAD); + rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD); j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; } if (n) { - rxr->next_to_check = j; + rxr->next_to_check = l; kring->nr_hwavail += n; - if (kring->nr_hwavail >= lim - 10) { - ND("rx ring %d almost full %d", ring_nr, kring->nr_hwavail); - } } /* skip past packets that userspace has already processed, @@ -332,12 +309,15 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock) * Also increase nr_hwavail */ j = kring->nr_hwcur; + l = kring->nr_hwcur - kring->nkr_hwofs; + if (l < 0) + l += lim + 1; if (j != k) { /* userspace has read some packets. */ n = 0; while (j != k) { struct netmap_slot *slot = ring->slot + j; - union e1000_adv_rx_desc *curr = &rxr->rx_base[j]; - struct igb_rx_buf *rxbuf = rxr->rx_buffers + j; + union e1000_adv_rx_desc *curr = &rxr->rx_base[l]; + struct igb_rx_buf *rxbuf = rxr->rx_buffers + l; void *addr = NMB(slot); if (addr == netmap_buffer_base) { /* bad buf */ @@ -358,6 +338,7 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock) BUS_DMASYNC_PREREAD); j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; n++; } kring->nr_hwavail -= n; @@ -365,10 +346,10 @@ igb_netmap_rxsync(void *a, u_int ring_nr, int do_lock) bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* IMPORTANT: we must leave one free slot in the ring, - * so move j back by one unit + * so move l back by one unit */ - j = (j == 0) ? lim : j - 1; - E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), j); + l = (l == 0) ? lim : l - 1; + E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l); } /* tell userspace that there are new packets */ ring->avail = kring->nr_hwavail ; diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h index a8f3498..ae64cd6 100644 --- a/sys/dev/netmap/if_lem_netmap.h +++ b/sys/dev/netmap/if_lem_netmap.h @@ -25,9 +25,12 @@ /* * $FreeBSD$ - * $Id: if_lem_netmap.h 9662 2011-11-16 13:18:06Z luigi $ + * $Id: if_lem_netmap.h 9802 2011-12-02 18:42:37Z luigi $ * * netmap support for if_lem.c + * + * For structure and details on the individual functions please see + * ixgbe_netmap.h */ #include <net/netmap.h> @@ -59,7 +62,7 @@ lem_netmap_attach(struct adapter *adapter) na.nm_rxsync = lem_netmap_rxsync; na.nm_lock = lem_netmap_lock_wrapper; na.nm_register = lem_netmap_reg; - na.buff_size = MCLBYTES; + na.buff_size = NETMAP_BUF_SIZE; netmap_attach(&na, 1); } @@ -94,7 +97,61 @@ lem_netmap_lock_wrapper(void *_a, int what, u_int ringid) /* - * Reconcile kernel and user view of the transmit ring. see ixgbe.c + * Register/unregister routine + */ +static int +lem_netmap_reg(struct ifnet *ifp, int onoff) +{ + struct adapter *adapter = ifp->if_softc; + struct netmap_adapter *na = NA(ifp); + int error = 0; + + if (na == NULL) + return EINVAL; + + lem_disable_intr(adapter); + + /* Tell the stack that the interface is no longer active */ + ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); + + /* lem_netmap_block_tasks(adapter); */ +#ifndef EM_LEGACY_IRQ // XXX do we need this ? + taskqueue_block(adapter->tq); + taskqueue_drain(adapter->tq, &adapter->rxtx_task); + taskqueue_drain(adapter->tq, &adapter->link_task); +#endif /* !EM_LEGCY_IRQ */ + if (onoff) { + ifp->if_capenable |= IFCAP_NETMAP; + + /* save if_transmit to restore it when exiting. + * XXX what about if_start and if_qflush ? + */ + na->if_transmit = ifp->if_transmit; + ifp->if_transmit = netmap_start; + + lem_init_locked(adapter); + if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) { + error = ENOMEM; + goto fail; + } + } else { +fail: + /* restore non-netmap mode */ + ifp->if_transmit = na->if_transmit; + ifp->if_capenable &= ~IFCAP_NETMAP; + lem_init_locked(adapter); /* also enables intr */ + } + +#ifndef EM_LEGACY_IRQ + taskqueue_unblock(adapter->tq); // XXX do we need this ? +#endif /* !EM_LEGCY_IRQ */ + + return (error); +} + + +/* + * Reconcile kernel and user view of the transmit ring. */ static int lem_netmap_txsync(void *a, u_int ring_nr, int do_lock) @@ -103,13 +160,13 @@ lem_netmap_txsync(void *a, u_int ring_nr, int do_lock) struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->tx_rings[0]; struct netmap_ring *ring = kring->ring; - int j, k, n, lim = kring->nkr_num_slots - 1; + int j, k, l, n = 0, lim = kring->nkr_num_slots - 1; /* generate an interrupt approximately every half ring */ int report_frequency = kring->nkr_num_slots >> 1; k = ring->cur; - if ( (kring->nr_kflags & NR_REINIT) || k > lim) + if (k > lim) return netmap_ring_reinit(kring); if (do_lock) @@ -117,33 +174,18 @@ lem_netmap_txsync(void *a, u_int ring_nr, int do_lock) bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, BUS_DMASYNC_POSTREAD); - /* record completed transmissions TODO - * - * instead of using TDH, we could read the transmitted status bit. - */ - j = E1000_READ_REG(&adapter->hw, E1000_TDH(0)); - if (j >= kring->nkr_num_slots) { /* can it happen ? */ - D("bad TDH %d", j); - j -= kring->nkr_num_slots; - } - int delta = j - adapter->next_tx_to_clean; - if (delta) { - if (delta < 0) - delta += kring->nkr_num_slots; - adapter->next_tx_to_clean = j; - kring->nr_hwavail += delta; - } - /* update avail to what the hardware knows */ ring->avail = kring->nr_hwavail; - j = kring->nr_hwcur; + j = kring->nr_hwcur; /* points into the netmap ring */ if (j != k) { /* we have new packets to send */ - n = 0; + l = j - kring->nkr_hwofs; /* points into the NIC ring */ + if (l < 0) + l += lim + 1; while (j != k) { struct netmap_slot *slot = &ring->slot[j]; - struct e1000_tx_desc *curr = &adapter->tx_desc_base[j]; - struct em_buffer *txbuf = &adapter->tx_buffer_area[j]; + struct e1000_tx_desc *curr = &adapter->tx_desc_base[l]; + struct em_buffer *txbuf = &adapter->tx_buffer_area[l]; void *addr = NMB(slot); int flags = ((slot->flags & NS_REPORT) || j == 0 || j == report_frequency) ? @@ -156,34 +198,54 @@ lem_netmap_txsync(void *a, u_int ring_nr, int do_lock) return netmap_ring_reinit(kring); } + slot->flags &= ~NS_REPORT; curr->upper.data = 0; - /* always interrupt. XXX make it conditional */ curr->lower.data = htole32( adapter->txd_cmd | len | (E1000_TXD_CMD_EOP | flags) ); if (slot->flags & NS_BUF_CHANGED) { curr->buffer_addr = htole64(vtophys(addr)); - /* buffer has changed, unload and reload map */ + /* buffer has changed, reload map */ netmap_reload_map(adapter->txtag, txbuf->map, - addr, na->buff_size); + addr, na->buff_size); slot->flags &= ~NS_BUF_CHANGED; } bus_dmamap_sync(adapter->txtag, txbuf->map, - BUS_DMASYNC_PREWRITE); + BUS_DMASYNC_PREWRITE); j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; n++; } - kring->nr_hwcur = ring->cur; + kring->nr_hwcur = k; /* decrease avail by number of sent packets */ - ring->avail -= n; - kring->nr_hwavail = ring->avail; + kring->nr_hwavail -= n; + ring->avail = kring->nr_hwavail; bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), ring->cur); + E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), l); + } + + if (n == 0 || kring->nr_hwavail < 1) { + int delta; + + /* record completed transmissions using TDH */ + l = E1000_READ_REG(&adapter->hw, E1000_TDH(0)); + if (l >= kring->nkr_num_slots) { /* can it happen ? */ + D("bad TDH %d", l); + l -= kring->nkr_num_slots; + } + delta = l - adapter->next_tx_to_clean; + if (delta) { + if (delta < 0) + delta += kring->nkr_num_slots; + adapter->next_tx_to_clean = l; + kring->nr_hwavail += delta; + ring->avail = kring->nr_hwavail; + } } if (do_lock) EM_TX_UNLOCK(adapter); @@ -192,7 +254,7 @@ lem_netmap_txsync(void *a, u_int ring_nr, int do_lock) /* - * Reconcile kernel and user view of the receive ring. see ixgbe.c + * Reconcile kernel and user view of the receive ring. */ static int lem_netmap_rxsync(void *a, u_int ring_nr, int do_lock) @@ -201,10 +263,10 @@ lem_netmap_rxsync(void *a, u_int ring_nr, int do_lock) struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[0]; struct netmap_ring *ring = kring->ring; - int j, k, n, lim = kring->nkr_num_slots - 1; + int j, k, l, n, lim = kring->nkr_num_slots - 1; k = ring->cur; - if ( (kring->nr_kflags & NR_REINIT) || k > lim) + if (k > lim) return netmap_ring_reinit(kring); if (do_lock) @@ -213,40 +275,45 @@ lem_netmap_rxsync(void *a, u_int ring_nr, int do_lock) bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - /* acknowldge all the received packets. */ - j = adapter->next_rx_desc_to_check; + /* import newly received packets into the netmap ring */ + l = adapter->next_rx_desc_to_check; /* points into the NIC ring */ + j = l + kring->nkr_hwofs; /* points into the netmap ring */ + if (j > lim) + j -= lim + 1; for (n = 0; ; n++) { - struct e1000_rx_desc *curr = &adapter->rx_desc_base[j]; - int len = le16toh(adapter->rx_desc_base[j].length) - 4; // CRC + struct e1000_rx_desc *curr = &adapter->rx_desc_base[l]; + int len; if ((curr->status & E1000_RXD_STAT_DD) == 0) break; + len = le16toh(curr->length) - 4; // CRC if (len < 0) { D("bogus pkt size at %d", j); len = 0; } ring->slot[j].len = len; - bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[j].map, - BUS_DMASYNC_POSTREAD); + bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[l].map, + BUS_DMASYNC_POSTREAD); j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; } if (n) { - adapter->next_rx_desc_to_check = j; + adapter->next_rx_desc_to_check = l; kring->nr_hwavail += n; } - /* skip past packets that userspace has already processed, - * making them available for reception. We don't need to set - * the length as it is the same for all slots. - */ - j = kring->nr_hwcur; + /* skip past packets that userspace has already processed */ + j = kring->nr_hwcur; /* netmap ring index */ if (j != k) { /* userspace has read some packets. */ n = 0; + l = j - kring->nkr_hwofs; /* NIC ring index */ + if (l < 0) + l += lim + 1; while (j != k) { struct netmap_slot *slot = &ring->slot[j]; - struct e1000_rx_desc *curr = &adapter->rx_desc_base[j]; - struct em_buffer *rxbuf = &adapter->rx_buffer_area[j]; + struct e1000_rx_desc *curr = &adapter->rx_desc_base[l]; + struct em_buffer *rxbuf = &adapter->rx_buffer_area[l]; void *addr = NMB(slot); if (addr == netmap_buffer_base) { /* bad buf */ @@ -254,32 +321,32 @@ lem_netmap_rxsync(void *a, u_int ring_nr, int do_lock) EM_RX_UNLOCK(adapter); return netmap_ring_reinit(kring); } - curr = &adapter->rx_desc_base[j]; curr->status = 0; if (slot->flags & NS_BUF_CHANGED) { curr->buffer_addr = htole64(vtophys(addr)); - /* buffer has changed, unload and reload map */ + /* buffer has changed, and reload map */ netmap_reload_map(adapter->rxtag, rxbuf->map, - addr, na->buff_size); + addr, na->buff_size); slot->flags &= ~NS_BUF_CHANGED; } bus_dmamap_sync(adapter->rxtag, rxbuf->map, - BUS_DMASYNC_PREREAD); + BUS_DMASYNC_PREREAD); j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; n++; } kring->nr_hwavail -= n; - kring->nr_hwcur = ring->cur; + kring->nr_hwcur = k; bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * IMPORTANT: we must leave one free slot in the ring, - * so move j back by one unit + * so move l back by one unit */ - j = (j == 0) ? lim : j - 1; - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), j); + l = (l == 0) ? lim : l - 1; + E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), l); } /* tell userspace that there are new packets */ @@ -290,55 +357,3 @@ lem_netmap_rxsync(void *a, u_int ring_nr, int do_lock) } -/* - * Register/unregister routine - */ -static int -lem_netmap_reg(struct ifnet *ifp, int onoff) -{ - struct adapter *adapter = ifp->if_softc; - struct netmap_adapter *na = NA(ifp); - int error = 0; - - if (!na) - return EINVAL; - - lem_disable_intr(adapter); - - /* Tell the stack that the interface is no longer active */ - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); - - /* lem_netmap_block_tasks(adapter); */ -#ifndef EM_LEGACY_IRQ - taskqueue_block(adapter->tq); - taskqueue_drain(adapter->tq, &adapter->rxtx_task); - taskqueue_drain(adapter->tq, &adapter->link_task); -#endif /* !EM_LEGCY_IRQ */ - if (onoff) { - ifp->if_capenable |= IFCAP_NETMAP; - - /* save if_transmit to restore it when exiting. - * XXX what about if_start and if_qflush ? - */ - na->if_transmit = ifp->if_transmit; - ifp->if_transmit = netmap_start; - - lem_init_locked(adapter); - if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) { - error = ENOMEM; - goto fail; - } - } else { -fail: - /* restore non-netmap mode */ - ifp->if_transmit = na->if_transmit; - ifp->if_capenable &= ~IFCAP_NETMAP; - lem_init_locked(adapter); /* also enables intr */ - } - -#ifndef EM_LEGACY_IRQ - taskqueue_unblock(adapter->tq); -#endif /* !EM_LEGCY_IRQ */ - - return (error); -} diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h index efccf3a..105660c 100644 --- a/sys/dev/netmap/if_re_netmap.h +++ b/sys/dev/netmap/if_re_netmap.h @@ -25,7 +25,7 @@ /* * $FreeBSD$ - * $Id: if_re_netmap.h 9662 2011-11-16 13:18:06Z luigi $ + * $Id: if_re_netmap.h 9802 2011-12-02 18:42:37Z luigi $ * * netmap support for if_re */ @@ -56,7 +56,7 @@ re_netmap_attach(struct rl_softc *sc) na.nm_rxsync = re_netmap_rxsync; na.nm_lock = re_netmap_lock_wrapper; na.nm_register = re_netmap_reg; - na.buff_size = MCLBYTES; + na.buff_size = NETMAP_BUF_SIZE; netmap_attach(&na, 1); } @@ -99,7 +99,7 @@ re_netmap_reg(struct ifnet *ifp, int onoff) struct netmap_adapter *na = NA(ifp); int error = 0; - if (!na) + if (na == NULL) return EINVAL; /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); @@ -109,9 +109,8 @@ re_netmap_reg(struct ifnet *ifp, int onoff) if (onoff) { ifp->if_capenable |= IFCAP_NETMAP; - /* save if_transmit and restore it */ + /* save if_transmit to restore it later */ na->if_transmit = ifp->if_transmit; - /* XXX if_start and if_qflush ??? */ ifp->if_transmit = netmap_start; re_init_locked(adapter); @@ -127,23 +126,12 @@ fail: ifp->if_capenable &= ~IFCAP_NETMAP; re_init_locked(adapter); /* also enables intr */ } - return (error); - + return (error); } /* * Reconcile kernel and user view of the transmit ring. - * - * Userspace has filled tx slots up to cur (excluded). - * The last unused slot previously known to the kernel was nr_hwcur, - * and the last interrupt reported nr_hwavail slots available - * (using the special value -1 to indicate idle transmit ring). - * The function must first update avail to what the kernel - * knows (translating the -1 to nkr_num_slots - 1), - * subtract the newly used slots (cur - nr_hwcur) - * from both avail and nr_hwavail, and set nr_hwcur = cur - * issuing a dmamap_sync on all slots. */ static int re_netmap_txsync(void *a, u_int ring_nr, int do_lock) @@ -153,10 +141,10 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock) struct netmap_adapter *na = NA(sc->rl_ifp); struct netmap_kring *kring = &na->tx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, n, lim = kring->nkr_num_slots - 1; + int j, k, l, n, lim = kring->nkr_num_slots - 1; k = ring->cur; - if ( (kring->nr_kflags & NR_REINIT) || k > lim) + if (k > lim) return netmap_ring_reinit(kring); if (do_lock) @@ -167,17 +155,18 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock) sc->rl_ldata.rl_tx_list_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + /* XXX move after the transmissions */ /* record completed transmissions */ - for (n = 0, j = sc->rl_ldata.rl_tx_considx; - j != sc->rl_ldata.rl_tx_prodidx; - n++, j = RL_TX_DESC_NXT(sc, j)) { + for (n = 0, l = sc->rl_ldata.rl_tx_considx; + l != sc->rl_ldata.rl_tx_prodidx; + n++, l = RL_TX_DESC_NXT(sc, l)) { uint32_t cmdstat = - le32toh(sc->rl_ldata.rl_tx_list[j].rl_cmdstat); + le32toh(sc->rl_ldata.rl_tx_list[l].rl_cmdstat); if (cmdstat & RL_TDESC_STAT_OWN) break; } if (n > 0) { - sc->rl_ldata.rl_tx_considx = j; + sc->rl_ldata.rl_tx_considx = l; sc->rl_ldata.rl_tx_free += n; kring->nr_hwavail += n; } @@ -185,13 +174,13 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock) /* update avail to what the hardware knows */ ring->avail = kring->nr_hwavail; - /* we trust prodidx, not hwcur */ - j = kring->nr_hwcur = sc->rl_ldata.rl_tx_prodidx; + j = kring->nr_hwcur; if (j != k) { /* we have new packets to send */ n = 0; + l = sc->rl_ldata.rl_tx_prodidx; while (j != k) { struct netmap_slot *slot = &ring->slot[j]; - struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[j]; + struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[l]; int cmd = slot->len | RL_TDESC_CMD_EOF | RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF ; void *addr = NMB(slot); @@ -200,10 +189,11 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock) if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) { if (do_lock) RL_UNLOCK(sc); + // XXX what about prodidx ? return netmap_ring_reinit(kring); } - if (j == lim) /* mark end of ring */ + if (l == lim) /* mark end of ring */ cmd |= RL_TDESC_CMD_EOR; if (slot->flags & NS_BUF_CHANGED) { @@ -212,17 +202,19 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock) desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr)); /* buffer has changed, unload and reload map */ netmap_reload_map(sc->rl_ldata.rl_tx_mtag, - txd[j].tx_dmamap, addr, na->buff_size); + txd[l].tx_dmamap, addr, na->buff_size); slot->flags &= ~NS_BUF_CHANGED; } slot->flags &= ~NS_REPORT; desc->rl_cmdstat = htole32(cmd); bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag, - txd[j].tx_dmamap, BUS_DMASYNC_PREWRITE); + txd[l].tx_dmamap, BUS_DMASYNC_PREWRITE); j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; n++; } - sc->rl_ldata.rl_tx_prodidx = kring->nr_hwcur = ring->cur; + sc->rl_ldata.rl_tx_prodidx = l; + kring->nr_hwcur = k; /* decrease avail by number of sent packets */ ring->avail -= n; @@ -243,15 +235,6 @@ re_netmap_txsync(void *a, u_int ring_nr, int do_lock) /* * Reconcile kernel and user view of the receive ring. - * - * Userspace has read rx slots up to cur (excluded). - * The last unread slot previously known to the kernel was nr_hwcur, - * and the last interrupt reported nr_hwavail slots available. - * We must subtract the newly consumed slots (cur - nr_hwcur) - * from nr_hwavail, clearing the descriptors for the next - * read, tell the hardware that they are available, - * and set nr_hwcur = cur and avail = nr_hwavail. - * issuing a dmamap_sync on all slots. */ static int re_netmap_rxsync(void *a, u_int ring_nr, int do_lock) @@ -261,10 +244,10 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock) struct netmap_adapter *na = NA(sc->rl_ifp); struct netmap_kring *kring = &na->rx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, n, lim = kring->nkr_num_slots - 1; + int j, k, l, n, lim = kring->nkr_num_slots - 1; k = ring->cur; - if ( (kring->nr_kflags & NR_REINIT) || k > lim) + if (k > lim) return netmap_ring_reinit(kring); if (do_lock) @@ -280,9 +263,10 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock) * cleared (all buffers could have it cleared. The easiest one * is to limit the amount of data reported up to 'lim' */ - j = sc->rl_ldata.rl_rx_prodidx; + l = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */ + j = l + kring->nkr_hwofs; for (n = kring->nr_hwavail; n < lim ; n++) { - struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[j]; + struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[l]; uint32_t rxstat = le32toh(cur_rx->rl_cmdstat); uint32_t total_len; @@ -294,11 +278,12 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock) kring->ring->slot[j].len = total_len; /* sync was in re_newbuf() */ bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, - rxd[j].rx_dmamap, BUS_DMASYNC_POSTREAD); - j = RL_RX_DESC_NXT(sc, j); + rxd[l].rx_dmamap, BUS_DMASYNC_POSTREAD); + j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; } if (n != kring->nr_hwavail) { - sc->rl_ldata.rl_rx_prodidx = j; + sc->rl_ldata.rl_rx_prodidx = l; sc->rl_ifp->if_ipackets += n - kring->nr_hwavail; kring->nr_hwavail = n; } @@ -312,9 +297,12 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock) j = kring->nr_hwcur; if (j != k) { /* userspace has read some packets. */ n = 0; + l = kring->nr_hwcur - kring->nkr_hwofs; + if (l < 0) + l += lim + 1; while (j != k) { struct netmap_slot *slot = ring->slot + j; - struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[j]; + struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[l]; int cmd = na->buff_size | RL_RDESC_CMD_OWN; void *addr = NMB(slot); @@ -324,7 +312,7 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock) return netmap_ring_reinit(kring); } - if (j == lim) /* mark end of ring */ + if (l == lim) /* mark end of ring */ cmd |= RL_RDESC_CMD_EOR; desc->rl_cmdstat = htole32(cmd); @@ -334,12 +322,13 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock) desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr)); desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr)); netmap_reload_map(sc->rl_ldata.rl_rx_mtag, - rxd[j].rx_dmamap, addr, na->buff_size); + rxd[l].rx_dmamap, addr, na->buff_size); slot->flags &= ~NS_BUF_CHANGED; } bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, - rxd[j].rx_dmamap, BUS_DMASYNC_PREREAD); + rxd[l].rx_dmamap, BUS_DMASYNC_PREREAD); j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; n++; } kring->nr_hwavail -= n; @@ -351,18 +340,22 @@ re_netmap_rxsync(void *a, u_int ring_nr, int do_lock) BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD); } /* tell userspace that there are new packets */ - ring->avail = kring->nr_hwavail ; + ring->avail = kring->nr_hwavail; if (do_lock) RL_UNLOCK(sc); return 0; } +/* + * Additional routines to init the tx and rx rings. + * In other drivers we do that inline in the main code. + */ static void re_netmap_tx_init(struct rl_softc *sc) { struct rl_txdesc *txd; struct rl_desc *desc; - int i; + int i, n; struct netmap_adapter *na = NA(sc->rl_ifp); struct netmap_slot *slot = netmap_reset(na, NR_TX, 0, 0); @@ -372,11 +365,20 @@ re_netmap_tx_init(struct rl_softc *sc) /* in netmap mode, overwrite addresses and maps */ txd = sc->rl_ldata.rl_tx_desc; desc = sc->rl_ldata.rl_tx_list; + n = sc->rl_ldata.rl_tx_desc_cnt; + + /* l points in the netmap ring, i points in the NIC ring */ + for (i = 0; i < n; i++) { + void *addr; + uint64_t paddr; + struct netmap_kring *kring = &na->tx_rings[0]; + int l = i + kring->nkr_hwofs; - for (i = 0; i < sc->rl_ldata.rl_tx_desc_cnt; i++) { - void *addr = NMB(slot+i); - uint64_t paddr = vtophys(addr); + if (l >= n) + l -= n; + addr = NMB(slot + l); + paddr = vtophys(addr); desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr)); desc[i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr)); netmap_load_map(sc->rl_ldata.rl_tx_mtag, @@ -387,26 +389,39 @@ re_netmap_tx_init(struct rl_softc *sc) static void re_netmap_rx_init(struct rl_softc *sc) { - /* slot is NULL if we are not in netmap mode */ struct netmap_adapter *na = NA(sc->rl_ifp); struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0); struct rl_desc *desc = sc->rl_ldata.rl_rx_list; uint32_t cmdstat; - int i; + int i, n; if (!slot) return; - - for (i = 0; i < sc->rl_ldata.rl_rx_desc_cnt; i++) { - void *addr = NMB(slot+i); - uint64_t paddr = vtophys(addr); - + n = sc->rl_ldata.rl_rx_desc_cnt; + for (i = 0; i < n; i++) { + void *addr; + uint64_t paddr; + struct netmap_kring *kring = &na->rx_rings[0]; + int l = i + kring->nkr_hwofs; + + if (l >= n) + l -= n; + + addr = NMB(slot + l); + paddr = vtophys(addr); desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr)); desc[i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr)); - cmdstat = slot[i].len = na->buff_size; // XXX - if (i == sc->rl_ldata.rl_rx_desc_cnt - 1) + cmdstat = na->buff_size; + if (i == n - 1) cmdstat |= RL_RDESC_CMD_EOR; - desc[i].rl_cmdstat = htole32(cmdstat | RL_RDESC_CMD_OWN); + /* + * userspace knows that hwavail packets were ready before the + * reset, so we need to tell the NIC that last hwavail + * descriptors of the ring are still owned by the driver. + */ + if (i < n - 1 - kring->nr_hwavail) // XXX + 1 ? + cmdstat |= RL_RDESC_CMD_OWN; + desc[i].rl_cmdstat = htole32(cmdstat); netmap_reload_map(sc->rl_ldata.rl_rx_mtag, sc->rl_ldata.rl_rx_desc[i].rx_dmamap, diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h index a4d5491..6c8b2b6 100644 --- a/sys/dev/netmap/ixgbe_netmap.h +++ b/sys/dev/netmap/ixgbe_netmap.h @@ -25,25 +25,48 @@ /* * $FreeBSD$ - * $Id: ixgbe_netmap.h 9662 2011-11-16 13:18:06Z luigi $ + * $Id: ixgbe_netmap.h 9802 2011-12-02 18:42:37Z luigi $ * * netmap modifications for ixgbe + * + * This file is meant to be a reference on how to implement + * netmap support for a network driver. + * This file contains code but only static or inline functions + * that are used by a single driver. To avoid replication of + * code we just #include it near the beginning of the + * standard driver. */ #include <net/netmap.h> #include <sys/selinfo.h> -// #include <vm/vm.h> -// #include <vm/pmap.h> /* vtophys ? */ +/* + * Some drivers may need the following headers. Others + * already include them by default + +#include <vm/vm.h> +#include <vm/pmap.h> + + */ + #include <dev/netmap/netmap_kern.h> +/* + * prototypes for the new API calls that are used by the + * *_netmap_attach() routine. + */ static int ixgbe_netmap_reg(struct ifnet *, int onoff); static int ixgbe_netmap_txsync(void *, u_int, int); static int ixgbe_netmap_rxsync(void *, u_int, int); static void ixgbe_netmap_lock_wrapper(void *, int, u_int); -SYSCTL_NODE(_dev, OID_AUTO, ixgbe, CTLFLAG_RW, 0, "ixgbe card"); - +/* + * The attach routine, called near the end of ixgbe_attach(), + * fills the parameters for netmap_attach() and calls it. + * It cannot fail, in the worst case (such as no memory) + * netmap mode will be disabled and the driver will only + * operate in standard mode. + */ static void ixgbe_netmap_attach(struct adapter *adapter) { @@ -52,7 +75,7 @@ ixgbe_netmap_attach(struct adapter *adapter) bzero(&na, sizeof(na)); na.ifp = adapter->ifp; - na.separate_locks = 1; + na.separate_locks = 1; /* this card has separate rx/tx locks */ na.num_tx_desc = adapter->num_tx_desc; na.num_rx_desc = adapter->num_rx_desc; na.nm_txsync = ixgbe_netmap_txsync; @@ -60,17 +83,18 @@ ixgbe_netmap_attach(struct adapter *adapter) na.nm_lock = ixgbe_netmap_lock_wrapper; na.nm_register = ixgbe_netmap_reg; /* + * XXX where do we put this comment ? * adapter->rx_mbuf_sz is set by SIOCSETMTU, but in netmap mode * we allocate the buffers on the first register. So we must * disallow a SIOCSETMTU when if_capenable & IFCAP_NETMAP is set. */ - na.buff_size = MCLBYTES; + na.buff_size = NETMAP_BUF_SIZE; netmap_attach(&na, adapter->num_queues); } /* - * wrapper to export locks to the generic code + * wrapper to export locks to the generic netmap code. */ static void ixgbe_netmap_lock_wrapper(void *_a, int what, u_int queueid) @@ -102,8 +126,8 @@ ixgbe_netmap_lock_wrapper(void *_a, int what, u_int queueid) /* - * support for netmap register/unregisted. We are already under core lock. - * only called on the first init or the last unregister. + * Netmap register/unregister. We are already under core lock. + * Only called on the first register or the last unregister. */ static int ixgbe_netmap_reg(struct ifnet *ifp, int onoff) @@ -112,7 +136,7 @@ ixgbe_netmap_reg(struct ifnet *ifp, int onoff) struct netmap_adapter *na = NA(ifp); int error = 0; - if (!na) + if (!na) /* probably, netmap_attach() failed */ return EINVAL; ixgbe_disable_intr(adapter); @@ -120,23 +144,28 @@ ixgbe_netmap_reg(struct ifnet *ifp, int onoff) /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); - if (onoff) { + if (onoff) { /* enable netmap mode */ ifp->if_capenable |= IFCAP_NETMAP; - /* save if_transmit to restore it later */ + /* save if_transmit and replace with our routine */ na->if_transmit = ifp->if_transmit; ifp->if_transmit = netmap_start; + /* + * reinitialize the adapter, now with netmap flag set, + * so the rings will be set accordingly. + */ ixgbe_init_locked(adapter); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) { error = ENOMEM; goto fail; } - } else { + } else { /* reset normal mode (explicit request or netmap failed) */ fail: /* restore if_transmit */ ifp->if_transmit = na->if_transmit; ifp->if_capenable &= ~IFCAP_NETMAP; + /* initialize the card, this time in standard mode */ ixgbe_init_locked(adapter); /* also enables intr */ } return (error); @@ -145,21 +174,23 @@ fail: /* * Reconcile kernel and user view of the transmit ring. + * This routine might be called frequently so it must be efficient. + * + * Userspace has filled tx slots up to ring->cur (excluded). + * The last unused slot previously known to the kernel was kring->nkr_hwcur, + * and the last interrupt reported kring->nr_hwavail slots available. * - * Userspace has filled tx slots up to cur (excluded). - * The last unused slot previously known to the kernel was nr_hwcur, - * and the last interrupt reported nr_hwavail slots available - * (using the special value -1 to indicate idle transmit ring). - * The function must first update avail to what the kernel - * knows, subtract the newly used slots (cur - nr_hwcur) - * from both avail and nr_hwavail, and set nr_hwcur = cur + * This function runs under lock (acquired from the caller or internally). + * It must first update ring->avail to what the kernel knows, + * subtract the newly used slots (ring->cur - kring->nkr_hwcur) + * from both avail and nr_hwavail, and set ring->nkr_hwcur = ring->cur * issuing a dmamap_sync on all slots. * - * Check parameters in the struct netmap_ring. - * We don't use avail, only check for bogus values. - * Make sure cur is valid, and same goes for buffer indexes and lengths. - * To avoid races, read the values once, and never use those from - * the ring afterwards. + * Since ring comes from userspace, its content must be read only once, + * and validated before being used to update the kernel's structures. + * (this is also true for every use of ring in the kernel). + * + * ring->avail is never used, only checked for bogus values. */ static int ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock) @@ -169,42 +200,96 @@ ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock) struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->tx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, n = 0, lim = kring->nkr_num_slots - 1; + int j, k, l, n = 0, lim = kring->nkr_num_slots - 1; - /* generate an interrupt approximately every half ring */ + /* + * ixgbe can generate an interrupt on every tx packet, but it + * seems very expensive, so we interrupt once every half ring, + * or when requested with NS_REPORT + */ int report_frequency = kring->nkr_num_slots >> 1; - k = ring->cur; /* ring is not protected by any lock */ - if ( (kring->nr_kflags & NR_REINIT) || k > lim) - return netmap_ring_reinit(kring); - if (do_lock) IXGBE_TX_LOCK(txr); + /* take a copy of ring->cur now, and never read it again */ + k = ring->cur; + l = k - kring->nr_hwcur; + if (l < 0) + l += lim + 1; + /* if cur is invalid reinitialize the ring. */ + if (k > lim || l > kring->nr_hwavail) { + if (do_lock) + IXGBE_TX_UNLOCK(txr); + return netmap_ring_reinit(kring); + } + bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); - /* update avail to what the hardware knows */ - ring->avail = kring->nr_hwavail; - + /* + * Process new packets to send. j is the current index in the + * netmap ring, l is the corresponding index in the NIC ring. + * The two numbers differ because upon a *_init() we reset + * the NIC ring but leave the netmap ring unchanged. + * For the transmit ring, we have + * + * j = kring->nr_hwcur + * l = IXGBE_TDT (not tracked in the driver) + * and + * j == (l + kring->nkr_hwofs) % ring_size + * + * In this driver kring->nkr_hwofs >= 0, but for other + * drivers it might be negative as well. + */ j = kring->nr_hwcur; if (j != k) { /* we have new packets to send */ + l = j - kring->nkr_hwofs; + if (l < 0) /* wraparound */ + l += lim + 1; + while (j != k) { + /* + * Collect per-slot info. + * Note that txbuf and curr are indexed by l. + * + * In this driver we collect the buffer address + * (using the NMB() macro) because we always + * need to rewrite it into the NIC ring. + * Many other drivers preserve the address, so + * we only need to access it if NS_BUF_CHANGED + * is set. + */ struct netmap_slot *slot = &ring->slot[j]; - struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[j]; - union ixgbe_adv_tx_desc *curr = &txr->tx_base[j]; + struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[l]; + union ixgbe_adv_tx_desc *curr = &txr->tx_base[l]; void *addr = NMB(slot); + // XXX type for flags and len ? int flags = ((slot->flags & NS_REPORT) || j == 0 || j == report_frequency) ? IXGBE_TXD_CMD_RS : 0; int len = slot->len; + /* + * Quick check for valid addr and len. + * NMB() returns netmap_buffer_base for invalid + * buffer indexes (but the address is still a + * valid one to be used in a ring). slot->len is + * unsigned so no need to check for negative values. + */ if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) { +ring_reset: if (do_lock) IXGBE_TX_UNLOCK(txr); return netmap_ring_reinit(kring); } slot->flags &= ~NS_REPORT; + /* + * Fill the slot in the NIC ring. + * In this driver we need to rewrite the buffer + * address in the NIC ring. Other drivers do not + * need this. + */ curr->read.buffer_addr = htole64(vtophys(addr)); curr->read.olinfo_status = 0; curr->read.cmd_type_len = @@ -212,6 +297,10 @@ ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock) (IXGBE_ADVTXD_DTYP_DATA | IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP | flags) ); + /* If the buffer has changed, unload and reload map + * (and possibly the physical address in the NIC + * slot, but we did it already). + */ if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, unload and reload map */ netmap_reload_map(txr->txtag, txbuf->map, @@ -219,69 +308,89 @@ ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock) slot->flags &= ~NS_BUF_CHANGED; } + /* make sure changes to the buffer are synced */ bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_PREWRITE); j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; n++; } - kring->nr_hwcur = k; + kring->nr_hwcur = k; /* the saved ring->cur */ /* decrease avail by number of sent packets */ - ring->avail -= n; - kring->nr_hwavail = ring->avail; + kring->nr_hwavail -= n; + /* synchronize the NIC ring */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), k); + /* (re)start the transmitter up to slot l (excluded) */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), l); } + /* + * If no packets are sent, or there is no room in the tx ring, + * Check whether there are completed transmissions. + * Because this is expensive (we need a register etc.) + * we only do it if absolutely necessary, i.e. there is no room + * in the tx ring, or where were no completed transmissions + * (meaning that probably the caller really wanted to check + * for completed transmissions). + */ if (n == 0 || kring->nr_hwavail < 1) { - /* record completed transmissions. TODO + int delta; + + /* + * Record completed transmissions. + * We (re)use the driver's txr->next_to_clean to keep + * track of the most recently completed transmission. * * The datasheet discourages the use of TDH to find out the - * number of sent packets; the right way to do so, is to check - * the DD bit inside the status of a packet descriptor. On the - * other hand, we avoid to set the `report status' bit for - * *all* outgoing packets (kind of interrupt mitigation), - * consequently the DD bit is not guaranteed to be set for all - * the packets: thats way, for the moment we continue to use - * TDH. + * number of sent packets. We should rather check the DD + * status bit in a packet descriptor. However, we only set + * the "report status" bit for some descriptors (a kind of + * interrupt mitigation), so we can only check on those. + * For the time being we use TDH, as we do it infrequently + * enough not to pose performance problems. */ - j = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr)); - if (j >= kring->nkr_num_slots) { /* XXX can happen */ - D("TDH wrap %d", j); - j -= kring->nkr_num_slots; + l = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr)); + if (l >= kring->nkr_num_slots) { /* XXX can happen */ + D("TDH wrap %d", l); + l -= kring->nkr_num_slots; } - int delta = j - txr->next_to_clean; + delta = l - txr->next_to_clean; if (delta) { - /* new transmissions were completed, increment - ring->nr_hwavail. */ + /* some tx completed, increment avail */ if (delta < 0) delta += kring->nkr_num_slots; - txr->next_to_clean = j; + txr->next_to_clean = l; kring->nr_hwavail += delta; - ring->avail = kring->nr_hwavail; + if (kring->nr_hwavail > lim) + goto ring_reset; } } + /* update avail to what the kernel knows */ + ring->avail = kring->nr_hwavail; if (do_lock) IXGBE_TX_UNLOCK(txr); return 0; + } /* * Reconcile kernel and user view of the receive ring. + * Same as for the txsync, this routine must be efficient and + * avoid races in accessing the shared regions. + * + * When called, userspace has read data from slots kring->nr_hwcur + * up to ring->cur (excluded). * - * Userspace has read rx slots up to cur (excluded). - * The last unread slot previously known to the kernel was nr_hwcur, - * and the last interrupt reported nr_hwavail slots available. + * The last interrupt reported kring->nr_hwavail slots available + * after kring->nr_hwcur. * We must subtract the newly consumed slots (cur - nr_hwcur) - * from nr_hwavail, clearing the descriptors for the next - * read, tell the hardware that they are available, - * and set nr_hwcur = cur and avail = nr_hwavail. - * issuing a dmamap_sync on all slots. + * from nr_hwavail, make the descriptors available for the next reads, + * and set kring->nr_hwcur = ring->cur and ring->avail = kring->nr_hwavail. */ static int ixgbe_netmap_rxsync(void *a, u_int ring_nr, int do_lock) @@ -291,86 +400,123 @@ ixgbe_netmap_rxsync(void *a, u_int ring_nr, int do_lock) struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, n, lim = kring->nkr_num_slots - 1; + int j, k, l, n, lim = kring->nkr_num_slots - 1; - k = ring->cur; /* ring is not protected by any lock */ - if ( (kring->nr_kflags & NR_REINIT) || k > lim) + k = ring->cur; /* cache and check value, same as in txsync */ + n = k - kring->nr_hwcur; + if (n < 0) + n += lim + 1; + if (k > lim || n > kring->nr_hwavail) /* userspace is cheating */ return netmap_ring_reinit(kring); if (do_lock) IXGBE_RX_LOCK(rxr); + if (n < 0) + n += lim + 1; /* XXX check sync modes */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - j = rxr->next_to_check; + /* + * First part, import newly received packets into the netmap ring. + * + * j is the index of the next free slot in the netmap ring, + * and l is the index of the next received packet in the NIC ring, + * and they may differ in case if_init() has been called while + * in netmap mode. For the receive ring we have + * + * j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size + * l = rxr->next_to_check; + * and + * j == (l + kring->nkr_hwofs) % ring_size + * + * rxr->next_to_check is set to 0 on a ring reinit + */ + l = rxr->next_to_check; + j = rxr->next_to_check + kring->nkr_hwofs; + if (j > lim) + j -= lim + 1; + for (n = 0; ; n++) { - union ixgbe_adv_rx_desc *curr = &rxr->rx_base[j]; + union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l]; uint32_t staterr = le32toh(curr->wb.upper.status_error); if ((staterr & IXGBE_RXD_STAT_DD) == 0) break; ring->slot[j].len = le16toh(curr->wb.upper.length); bus_dmamap_sync(rxr->ptag, - rxr->rx_buffers[j].pmap, BUS_DMASYNC_POSTREAD); + rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD); j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; } - if (n) { - rxr->next_to_check = j; + if (n) { /* update the state variables */ + rxr->next_to_check = l; kring->nr_hwavail += n; - if (kring->nr_hwavail >= lim - 10) { - ND("rx ring %d almost full %d", ring_nr, kring->nr_hwavail); - } } - /* skip past packets that userspace has already processed, - * making them available for reception. - * advance nr_hwcur and issue a bus_dmamap_sync on the - * buffers so it is safe to write to them. - * Also increase nr_hwavail + /* + * Skip past packets that userspace has already processed + * (from kring->nr_hwcur to ring->cur excluded), and make + * the buffers available for reception. + * As usual j is the index in the netmap ring, l is the index + * in the NIC ring, and j == (l + kring->nkr_hwofs) % ring_size */ j = kring->nr_hwcur; if (j != k) { /* userspace has read some packets. */ n = 0; + l = kring->nr_hwcur - kring->nkr_hwofs; + if (l < 0) + l += lim + 1; while (j != k) { - struct netmap_slot *slot = ring->slot + j; - union ixgbe_adv_rx_desc *curr = &rxr->rx_base[j]; - struct ixgbe_rx_buf *rxbuf = rxr->rx_buffers + j; + /* collect per-slot info, with similar validations + * and flag handling as in the txsync code. + * + * NOTE curr and rxbuf are indexed by l. + * Also, this driver needs to update the physical * address in the NIC ring, but other drivers + * may not have this requirement. + */ + struct netmap_slot *slot = &ring->slot[j]; + union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l]; + struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[l]; void *addr = NMB(slot); - if (addr == netmap_buffer_base) { /* bad buf */ - if (do_lock) - IXGBE_RX_UNLOCK(rxr); - return netmap_ring_reinit(kring); - } + if (addr == netmap_buffer_base) /* bad buf */ + goto ring_reset; curr->wb.upper.status_error = 0; curr->read.pkt_addr = htole64(vtophys(addr)); if (slot->flags & NS_BUF_CHANGED) { netmap_reload_map(rxr->ptag, rxbuf->pmap, - addr, na->buff_size); + addr, na->buff_size); slot->flags &= ~NS_BUF_CHANGED; } bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_PREREAD); + BUS_DMASYNC_PREREAD); j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; n++; } kring->nr_hwavail -= n; - kring->nr_hwcur = ring->cur; + kring->nr_hwcur = k; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* IMPORTANT: we must leave one free slot in the ring, - * so move j back by one unit + * so move l back by one unit */ - j = (j == 0) ? lim : j - 1; - IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), j); + l = (l == 0) ? lim : l - 1; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), l); } /* tell userspace that there are new packets */ ring->avail = kring->nr_hwavail ; if (do_lock) IXGBE_RX_UNLOCK(rxr); return 0; + +ring_reset: + if (do_lock) + IXGBE_RX_UNLOCK(rxr); + return netmap_ring_reinit(kring); } +/* end of file */ diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index fef8516..34a0627 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -1,15 +1,15 @@ /* * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -25,7 +25,7 @@ /* * $FreeBSD$ - * $Id: netmap.c 9662 2011-11-16 13:18:06Z luigi $ + * $Id: netmap.c 9795 2011-12-02 11:39:08Z luigi $ * * This module supports memory mapped access to network devices, * see netmap(4). @@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$"); #include <sys/module.h> #include <sys/errno.h> #include <sys/param.h> /* defines used in kernel.h */ +#include <sys/jail.h> #include <sys/kernel.h> /* types used in module initialization */ #include <sys/conf.h> /* cdevsw struct */ #include <sys/uio.h> /* uio struct */ @@ -70,6 +71,7 @@ __FBSDID("$FreeBSD$"); #include <sys/malloc.h> #include <sys/mman.h> /* PROT_EXEC */ #include <sys/poll.h> +#include <sys/proc.h> #include <vm/vm.h> /* vtophys */ #include <vm/pmap.h> /* vtophys */ #include <sys/socket.h> /* sockaddrs */ @@ -78,6 +80,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <net/if.h> #include <net/bpf.h> /* BIOCIMMEDIATE */ +#include <net/vnet.h> #include <net/netmap.h> #include <dev/netmap/netmap_kern.h> #include <machine/bus.h> /* bus_dmamap_* */ @@ -678,6 +681,13 @@ get_ifp(const char *name, struct ifnet **ifp) * Error routine called when txsync/rxsync detects an error. * Can't do much more than resetting cur = hwcur, avail = hwavail. * Return 1 on reinit. + * + * This routine is only called by the upper half of the kernel. + * It only reads hwcur (which is changed only by the upper half, too) + * and hwavail (which may be changed by the lower half, but only on + * a tx ring and only to increase it, so any error will be recovered + * on the next call). For the above, we don't strictly need to call + * it under lock. */ int netmap_ring_reinit(struct netmap_kring *kring) @@ -717,29 +727,10 @@ netmap_ring_reinit(struct netmap_kring *kring) ring->avail, kring->nr_hwavail); ring->cur = kring->nr_hwcur; ring->avail = kring->nr_hwavail; - ring->flags |= NR_REINIT; - kring->na->flags |= NR_REINIT; } return (errors ? 1 : 0); } -/* - * Clean the reinit flag for our rings. - * XXX at the moment, clear for all rings - */ -static void -netmap_clean_reinit(struct netmap_adapter *na) -{ - //struct netmap_kring *kring; - u_int i; - - na->flags &= ~NR_REINIT; - D("--- NR_REINIT reset on %s", na->ifp->if_xname); - for (i = 0; i < na->num_queues + 1; i++) { - na->tx_rings[i].ring->flags &= ~NR_REINIT; - na->rx_rings[i].ring->flags &= ~NR_REINIT; - } -} /* * Set the ring ID. For devices with a single queue, a request @@ -801,7 +792,7 @@ netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid) */ static int netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data, - __unused int fflag, __unused struct thread *td) + __unused int fflag, struct thread *td) { struct netmap_priv_d *priv = NULL; struct ifnet *ifp; @@ -812,9 +803,13 @@ netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data, u_int i; struct netmap_if *nifp; + CURVNET_SET(TD_TO_VNET(td)); + error = devfs_get_cdevpriv((void **)&priv); - if (error != ENOENT && error != 0) + if (error != ENOENT && error != 0) { + CURVNET_RESTORE(); return (error); + } error = 0; /* Could be ENOENT */ switch (cmd) { @@ -836,8 +831,10 @@ netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data, break; case NIOCREGIF: - if (priv != NULL) /* thread already registered */ - return netmap_set_ringid(priv, nmr->nr_ringid); + if (priv != NULL) { /* thread already registered */ + error = netmap_set_ringid(priv, nmr->nr_ringid); + break; + } /* find the interface and a reference */ error = get_ifp(nmr->nr_name, &ifp); /* keep reference */ if (error) @@ -927,8 +924,10 @@ error: break; case NIOCUNREGIF: - if (priv == NULL) - return (ENXIO); + if (priv == NULL) { + error = ENXIO; + break; + } /* the interface is unregistered inside the destructor of the private data. */ @@ -937,22 +936,21 @@ error: case NIOCTXSYNC: case NIOCRXSYNC: - if (priv == NULL) - return (ENXIO); + if (priv == NULL) { + error = ENXIO; + break; + } ifp = priv->np_ifp; /* we have a reference */ na = NA(ifp); /* retrieve netmap adapter */ adapter = ifp->if_softc; /* shorthand */ - if (na->flags & NR_REINIT) - netmap_clean_reinit(na); - if (priv->np_qfirst == na->num_queues) { /* queues to/from host */ if (cmd == NIOCTXSYNC) netmap_sync_to_host(na); else netmap_sync_from_host(na, NULL); - return error; + break; } for (i = priv->np_qfirst; i < priv->np_qlast; i++) { @@ -999,6 +997,7 @@ error: } } + CURVNET_RESTORE(); return (error); } @@ -1039,13 +1038,6 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td) adapter = ifp->if_softc; na = NA(ifp); /* retrieve netmap adapter */ - /* pending reinit, report up as a poll error. Pending - * reads and writes are lost. - */ - if (na->flags & NR_REINIT) { - netmap_clean_reinit(na); - revents |= POLLERR; - } /* how many queues we are scanning */ i = priv->np_qfirst; if (i == na->num_queues) { /* from/to host */ @@ -1111,20 +1103,20 @@ netmap_poll(__unused struct cdev *dev, int events, struct thread *td) * data available. If this fails, then lock and call the sync * routines. */ - for (i = priv->np_qfirst; want_rx && i < priv->np_qlast; i++) { - kring = &na->rx_rings[i]; - if (kring->ring->avail > 0) { - revents |= want_rx; - want_rx = 0; /* also breaks the loop */ + for (i = priv->np_qfirst; want_rx && i < priv->np_qlast; i++) { + kring = &na->rx_rings[i]; + if (kring->ring->avail > 0) { + revents |= want_rx; + want_rx = 0; /* also breaks the loop */ + } } - } - for (i = priv->np_qfirst; want_tx && i < priv->np_qlast; i++) { - kring = &na->tx_rings[i]; - if (kring->ring->avail > 0) { - revents |= want_tx; - want_tx = 0; /* also breaks the loop */ + for (i = priv->np_qfirst; want_tx && i < priv->np_qlast; i++) { + kring = &na->tx_rings[i]; + if (kring->ring->avail > 0) { + revents |= want_tx; + want_tx = 0; /* also breaks the loop */ + } } - } /* * If we to push packets out (priv->np_txpoll) or want_tx is @@ -1326,24 +1318,6 @@ done: * netmap_reset() is called by the driver routines when reinitializing * a ring. The driver is in charge of locking to protect the kring. * If netmap mode is not set just return NULL. - * Otherwise set NR_REINIT (in the ring and in na) to signal - * that a ring has been reinitialized, - * set cur = hwcur = 0 and avail = hwavail = num_slots - 1 . - * IT IS IMPORTANT to leave one slot free even in the tx ring because - * we rely on cur=hwcur only for empty rings. - * These are good defaults but can be overridden later in the device - * specific code if, after a reinit, the ring does not start from 0 - * (e.g. if_em.c does this). - * - * XXX we shouldn't be touching the ring, but there is a - * race anyways and this is our best option. - * - * XXX setting na->flags makes the syscall code faster, as there is - * only one place to check. On the other hand, we will need a better - * way to notify multiple threads that rings have been reset. - * One way is to increment na->rst_count at each ring reset. - * Each thread in its own priv structure will keep a matching counter, - * and on a reset will acknowledge and clean its own rings. */ struct netmap_slot * netmap_reset(struct netmap_adapter *na, enum txrx tx, int n, @@ -1351,8 +1325,7 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, int n, { struct netmap_kring *kring; struct netmap_ring *ring; - struct netmap_slot *slot; - u_int i; + int new_hwofs, lim; if (na == NULL) return NULL; /* no netmap support here */ @@ -1360,74 +1333,26 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, int n, return NULL; /* nothing to reinitialize */ kring = tx == NR_TX ? na->tx_rings + n : na->rx_rings + n; ring = kring->ring; - if (tx == NR_TX) { - /* - * The last argument is the new value of next_to_clean. - * - * In the TX ring, we have P pending transmissions (from - * next_to_clean to nr_hwcur) followed by nr_hwavail free slots. - * Generally we can use all the slots in the ring so - * P = ring_size - nr_hwavail hence (modulo ring_size): - * next_to_clean == nr_hwcur + nr_hwavail - * - * If, upon a reset, nr_hwavail == ring_size and next_to_clean - * does not change we have nothing to report. Otherwise some - * pending packets may be lost, or newly injected packets will. - */ - /* if hwcur does not change, nothing to report. - * otherwise remember the change so perhaps we can - * shift the block at the next reinit - */ - if (new_cur == kring->nr_hwcur && - kring->nr_hwavail == kring->nkr_num_slots - 1) { - /* all ok */ - D("+++ NR_REINIT ok on %s TX[%d]", na->ifp->if_xname, n); - } else { - D("+++ NR_REINIT set on %s TX[%d]", na->ifp->if_xname, n); - } - ring->flags |= NR_REINIT; - na->flags |= NR_REINIT; - ring->avail = kring->nr_hwavail = kring->nkr_num_slots - 1; - ring->cur = kring->nr_hwcur = new_cur; - } else { - /* - * The last argument is the next free slot. - * In the RX ring we have nr_hwavail full buffers starting - * from nr_hwcur. - * If nr_hwavail == 0 and nr_hwcur does not change we are ok - * otherwise we might be in trouble as the buffers are - * changing. - */ - if (new_cur == kring->nr_hwcur && kring->nr_hwavail == 0) { - /* all ok */ - D("+++ NR_REINIT ok on %s RX[%d]", na->ifp->if_xname, n); - } else { - D("+++ NR_REINIT set on %s RX[%d]", na->ifp->if_xname, n); - } - ring->flags |= NR_REINIT; - na->flags |= NR_REINIT; - ring->avail = kring->nr_hwavail = 0; /* no data */ - ring->cur = kring->nr_hwcur = new_cur; - } + lim = kring->nkr_num_slots - 1; + + if (tx == NR_TX) + new_hwofs = kring->nr_hwcur - new_cur; + else + new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur; + if (new_hwofs > lim) + new_hwofs -= lim + 1; + + /* Alwayws set the new offset value and realign the ring. */ + kring->nkr_hwofs = new_hwofs; + if (tx == NR_TX) + kring->nr_hwavail = kring->nkr_num_slots - 1; + D("new hwofs %d on %s %s[%d]", + kring->nkr_hwofs, na->ifp->if_xname, + tx == NR_TX ? "TX" : "RX", n); - slot = ring->slot; /* - * Check that buffer indexes are correct. If we find a - * bogus value we are a bit in trouble because we cannot - * recover easily. Best we can do is (probably) persistently - * reset the ring. - */ - for (i = 0; i < kring->nkr_num_slots; i++) { - if (slot[i].buf_idx >= netmap_total_buffers) { - D("invalid buf_idx %d at slot %d", slot[i].buf_idx, i); - slot[i].buf_idx = 0; /* XXX reset */ - } - /* XXX we don't really need to set the length */ - slot[i].len = 0; - } - /* wakeup possible waiters, both on the ring and on the global - * selfd. Perhaps a bit early now but the device specific - * routine is locked so hopefully we won't have a race. + * We do the wakeup here, but the ring is not yet reconfigured. + * However, we are under lock so there are no races. */ selwakeuppri(&kring->si, PI_NET); selwakeuppri(&kring[na->num_queues + 1 - n].si, PI_NET); diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index 5434609..eb36ec6 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -1,15 +1,15 @@ /* * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -25,7 +25,7 @@ /* * $FreeBSD$ - * $Id: netmap_kern.h 9662 2011-11-16 13:18:06Z luigi $ + * $Id: netmap_kern.h 9795 2011-12-02 11:39:08Z luigi $ * * The header contains the definitions of constants and function * prototypes used only in kernelspace. @@ -68,7 +68,7 @@ struct netmap_kring { u_int nr_kflags; u_int nkr_num_slots; - u_int nkr_hwofs; /* offset between NIC and netmap ring */ + int nkr_hwofs; /* offset between NIC and netmap ring */ struct netmap_adapter *na; // debugging struct selinfo si; /* poll/select wait queue */ }; @@ -94,7 +94,7 @@ struct netmap_adapter { u_int num_rx_desc; u_int buff_size; - u_int flags; /* NR_REINIT */ + u_int flags; /* tx_rings and rx_rings are private but allocated * as a contiguous chunk of memory. Each array has * N+1 entries, for the adapter queues and for the host queue. diff --git a/sys/net/netmap.h b/sys/net/netmap.h index be9c686..4dec1fd 100644 --- a/sys/net/netmap.h +++ b/sys/net/netmap.h @@ -32,7 +32,7 @@ /* * $FreeBSD$ - * $Id: netmap.h 9662 2011-11-16 13:18:06Z luigi $ + * $Id: netmap.h 9753 2011-11-28 15:10:43Z luigi $ * * This header contains the definitions of the constants and the * structures needed by the ``netmap'' module, both kernel and @@ -186,13 +186,6 @@ struct netmap_ring { const uint16_t nr_buf_size; uint16_t flags; - /* - * When a ring is reinitialized, the kernel sets kflags. - * On exit from a syscall, if the flag is found set, we - * also reinitialize the nr_* variables. The kflag is then - * unconditionally copied to nr_flags and cleared. - */ -#define NR_REINIT 0x0001 /* ring reinitialized! */ #define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */ struct timeval ts; /* time of last *sync() */ diff --git a/tools/tools/netmap/pkt-gen.c b/tools/tools/netmap/pkt-gen.c index 747bd9d..21dc8de 100644 --- a/tools/tools/netmap/pkt-gen.c +++ b/tools/tools/netmap/pkt-gen.c @@ -4,10 +4,10 @@ * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND @@ -25,7 +25,7 @@ /* * $FreeBSD$ - * $Id: pkt-gen.c 9638 2011-11-07 18:07:43Z luigi $ + * $Id: pkt-gen.c 9827 2011-12-05 11:29:34Z luigi $ * * Example program to show how to build a multithreaded packet * source/sink using the netmap device. @@ -45,6 +45,7 @@ const char *default_payload="netmap pkt-gen Luigi Rizzo and Matteo Landi\n" #include <signal.h> /* signal */ #include <stdlib.h> #include <stdio.h> +#include <inttypes.h> /* PRI* macros */ #include <string.h> /* strcmp */ #include <fcntl.h> /* open */ #include <unistd.h> /* close */ @@ -616,7 +617,7 @@ tx_output(uint64_t sent, int size, double delta) punit += 1; } - printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n", + printf("Sent %" PRIu64 " packets, %d bytes each, in %.2f seconds.\n", sent, size, delta); printf("Speed: %.2f%cpps. Bandwidth: %.2f%cbps.\n", pps, units[punit], amount, units[aunit]); @@ -636,7 +637,7 @@ rx_output(uint64_t received, double delta) punit += 1; } - printf("Received %llu packets, in %.2f seconds.\n", received, delta); + printf("Received %" PRIu64 " packets, in %.2f seconds.\n", received, delta); printf("Speed: %.2f%cpps.\n", pps, units[punit]); } @@ -971,7 +972,7 @@ main(int arc, char **argv) if (pps < 10000) continue; pps = (my_count - prev)*1000000 / pps; - D("%llu pps", pps); + D("%" PRIu64 " pps", pps); prev = my_count; toc = now; if (done == g.nthreads) |