diff options
Diffstat (limited to 'sys/dev/netmap/ixgbe_netmap.h')
-rw-r--r-- | sys/dev/netmap/ixgbe_netmap.h | 340 |
1 files changed, 243 insertions, 97 deletions
diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h index a4d5491..6c8b2b6 100644 --- a/sys/dev/netmap/ixgbe_netmap.h +++ b/sys/dev/netmap/ixgbe_netmap.h @@ -25,25 +25,48 @@ /* * $FreeBSD$ - * $Id: ixgbe_netmap.h 9662 2011-11-16 13:18:06Z luigi $ + * $Id: ixgbe_netmap.h 9802 2011-12-02 18:42:37Z luigi $ * * netmap modifications for ixgbe + * + * This file is meant to be a reference on how to implement + * netmap support for a network driver. + * This file contains code but only static or inline functions + * that are used by a single driver. To avoid replication of + * code we just #include it near the beginning of the + * standard driver. */ #include <net/netmap.h> #include <sys/selinfo.h> -// #include <vm/vm.h> -// #include <vm/pmap.h> /* vtophys ? */ +/* + * Some drivers may need the following headers. Others + * already include them by default + +#include <vm/vm.h> +#include <vm/pmap.h> + + */ + #include <dev/netmap/netmap_kern.h> +/* + * prototypes for the new API calls that are used by the + * *_netmap_attach() routine. + */ static int ixgbe_netmap_reg(struct ifnet *, int onoff); static int ixgbe_netmap_txsync(void *, u_int, int); static int ixgbe_netmap_rxsync(void *, u_int, int); static void ixgbe_netmap_lock_wrapper(void *, int, u_int); -SYSCTL_NODE(_dev, OID_AUTO, ixgbe, CTLFLAG_RW, 0, "ixgbe card"); - +/* + * The attach routine, called near the end of ixgbe_attach(), + * fills the parameters for netmap_attach() and calls it. + * It cannot fail, in the worst case (such as no memory) + * netmap mode will be disabled and the driver will only + * operate in standard mode. + */ static void ixgbe_netmap_attach(struct adapter *adapter) { @@ -52,7 +75,7 @@ ixgbe_netmap_attach(struct adapter *adapter) bzero(&na, sizeof(na)); na.ifp = adapter->ifp; - na.separate_locks = 1; + na.separate_locks = 1; /* this card has separate rx/tx locks */ na.num_tx_desc = adapter->num_tx_desc; na.num_rx_desc = adapter->num_rx_desc; na.nm_txsync = ixgbe_netmap_txsync; @@ -60,17 +83,18 @@ ixgbe_netmap_attach(struct adapter *adapter) na.nm_lock = ixgbe_netmap_lock_wrapper; na.nm_register = ixgbe_netmap_reg; /* + * XXX where do we put this comment ? * adapter->rx_mbuf_sz is set by SIOCSETMTU, but in netmap mode * we allocate the buffers on the first register. So we must * disallow a SIOCSETMTU when if_capenable & IFCAP_NETMAP is set. */ - na.buff_size = MCLBYTES; + na.buff_size = NETMAP_BUF_SIZE; netmap_attach(&na, adapter->num_queues); } /* - * wrapper to export locks to the generic code + * wrapper to export locks to the generic netmap code. */ static void ixgbe_netmap_lock_wrapper(void *_a, int what, u_int queueid) @@ -102,8 +126,8 @@ ixgbe_netmap_lock_wrapper(void *_a, int what, u_int queueid) /* - * support for netmap register/unregisted. We are already under core lock. - * only called on the first init or the last unregister. + * Netmap register/unregister. We are already under core lock. + * Only called on the first register or the last unregister. */ static int ixgbe_netmap_reg(struct ifnet *ifp, int onoff) @@ -112,7 +136,7 @@ ixgbe_netmap_reg(struct ifnet *ifp, int onoff) struct netmap_adapter *na = NA(ifp); int error = 0; - if (!na) + if (!na) /* probably, netmap_attach() failed */ return EINVAL; ixgbe_disable_intr(adapter); @@ -120,23 +144,28 @@ ixgbe_netmap_reg(struct ifnet *ifp, int onoff) /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); - if (onoff) { + if (onoff) { /* enable netmap mode */ ifp->if_capenable |= IFCAP_NETMAP; - /* save if_transmit to restore it later */ + /* save if_transmit and replace with our routine */ na->if_transmit = ifp->if_transmit; ifp->if_transmit = netmap_start; + /* + * reinitialize the adapter, now with netmap flag set, + * so the rings will be set accordingly. + */ ixgbe_init_locked(adapter); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) { error = ENOMEM; goto fail; } - } else { + } else { /* reset normal mode (explicit request or netmap failed) */ fail: /* restore if_transmit */ ifp->if_transmit = na->if_transmit; ifp->if_capenable &= ~IFCAP_NETMAP; + /* initialize the card, this time in standard mode */ ixgbe_init_locked(adapter); /* also enables intr */ } return (error); @@ -145,21 +174,23 @@ fail: /* * Reconcile kernel and user view of the transmit ring. + * This routine might be called frequently so it must be efficient. + * + * Userspace has filled tx slots up to ring->cur (excluded). + * The last unused slot previously known to the kernel was kring->nkr_hwcur, + * and the last interrupt reported kring->nr_hwavail slots available. * - * Userspace has filled tx slots up to cur (excluded). - * The last unused slot previously known to the kernel was nr_hwcur, - * and the last interrupt reported nr_hwavail slots available - * (using the special value -1 to indicate idle transmit ring). - * The function must first update avail to what the kernel - * knows, subtract the newly used slots (cur - nr_hwcur) - * from both avail and nr_hwavail, and set nr_hwcur = cur + * This function runs under lock (acquired from the caller or internally). + * It must first update ring->avail to what the kernel knows, + * subtract the newly used slots (ring->cur - kring->nkr_hwcur) + * from both avail and nr_hwavail, and set ring->nkr_hwcur = ring->cur * issuing a dmamap_sync on all slots. * - * Check parameters in the struct netmap_ring. - * We don't use avail, only check for bogus values. - * Make sure cur is valid, and same goes for buffer indexes and lengths. - * To avoid races, read the values once, and never use those from - * the ring afterwards. + * Since ring comes from userspace, its content must be read only once, + * and validated before being used to update the kernel's structures. + * (this is also true for every use of ring in the kernel). + * + * ring->avail is never used, only checked for bogus values. */ static int ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock) @@ -169,42 +200,96 @@ ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock) struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->tx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, n = 0, lim = kring->nkr_num_slots - 1; + int j, k, l, n = 0, lim = kring->nkr_num_slots - 1; - /* generate an interrupt approximately every half ring */ + /* + * ixgbe can generate an interrupt on every tx packet, but it + * seems very expensive, so we interrupt once every half ring, + * or when requested with NS_REPORT + */ int report_frequency = kring->nkr_num_slots >> 1; - k = ring->cur; /* ring is not protected by any lock */ - if ( (kring->nr_kflags & NR_REINIT) || k > lim) - return netmap_ring_reinit(kring); - if (do_lock) IXGBE_TX_LOCK(txr); + /* take a copy of ring->cur now, and never read it again */ + k = ring->cur; + l = k - kring->nr_hwcur; + if (l < 0) + l += lim + 1; + /* if cur is invalid reinitialize the ring. */ + if (k > lim || l > kring->nr_hwavail) { + if (do_lock) + IXGBE_TX_UNLOCK(txr); + return netmap_ring_reinit(kring); + } + bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); - /* update avail to what the hardware knows */ - ring->avail = kring->nr_hwavail; - + /* + * Process new packets to send. j is the current index in the + * netmap ring, l is the corresponding index in the NIC ring. + * The two numbers differ because upon a *_init() we reset + * the NIC ring but leave the netmap ring unchanged. + * For the transmit ring, we have + * + * j = kring->nr_hwcur + * l = IXGBE_TDT (not tracked in the driver) + * and + * j == (l + kring->nkr_hwofs) % ring_size + * + * In this driver kring->nkr_hwofs >= 0, but for other + * drivers it might be negative as well. + */ j = kring->nr_hwcur; if (j != k) { /* we have new packets to send */ + l = j - kring->nkr_hwofs; + if (l < 0) /* wraparound */ + l += lim + 1; + while (j != k) { + /* + * Collect per-slot info. + * Note that txbuf and curr are indexed by l. + * + * In this driver we collect the buffer address + * (using the NMB() macro) because we always + * need to rewrite it into the NIC ring. + * Many other drivers preserve the address, so + * we only need to access it if NS_BUF_CHANGED + * is set. + */ struct netmap_slot *slot = &ring->slot[j]; - struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[j]; - union ixgbe_adv_tx_desc *curr = &txr->tx_base[j]; + struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[l]; + union ixgbe_adv_tx_desc *curr = &txr->tx_base[l]; void *addr = NMB(slot); + // XXX type for flags and len ? int flags = ((slot->flags & NS_REPORT) || j == 0 || j == report_frequency) ? IXGBE_TXD_CMD_RS : 0; int len = slot->len; + /* + * Quick check for valid addr and len. + * NMB() returns netmap_buffer_base for invalid + * buffer indexes (but the address is still a + * valid one to be used in a ring). slot->len is + * unsigned so no need to check for negative values. + */ if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) { +ring_reset: if (do_lock) IXGBE_TX_UNLOCK(txr); return netmap_ring_reinit(kring); } slot->flags &= ~NS_REPORT; + /* + * Fill the slot in the NIC ring. + * In this driver we need to rewrite the buffer + * address in the NIC ring. Other drivers do not + * need this. + */ curr->read.buffer_addr = htole64(vtophys(addr)); curr->read.olinfo_status = 0; curr->read.cmd_type_len = @@ -212,6 +297,10 @@ ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock) (IXGBE_ADVTXD_DTYP_DATA | IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP | flags) ); + /* If the buffer has changed, unload and reload map + * (and possibly the physical address in the NIC + * slot, but we did it already). + */ if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, unload and reload map */ netmap_reload_map(txr->txtag, txbuf->map, @@ -219,69 +308,89 @@ ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock) slot->flags &= ~NS_BUF_CHANGED; } + /* make sure changes to the buffer are synced */ bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_PREWRITE); j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; n++; } - kring->nr_hwcur = k; + kring->nr_hwcur = k; /* the saved ring->cur */ /* decrease avail by number of sent packets */ - ring->avail -= n; - kring->nr_hwavail = ring->avail; + kring->nr_hwavail -= n; + /* synchronize the NIC ring */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), k); + /* (re)start the transmitter up to slot l (excluded) */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), l); } + /* + * If no packets are sent, or there is no room in the tx ring, + * Check whether there are completed transmissions. + * Because this is expensive (we need a register etc.) + * we only do it if absolutely necessary, i.e. there is no room + * in the tx ring, or where were no completed transmissions + * (meaning that probably the caller really wanted to check + * for completed transmissions). + */ if (n == 0 || kring->nr_hwavail < 1) { - /* record completed transmissions. TODO + int delta; + + /* + * Record completed transmissions. + * We (re)use the driver's txr->next_to_clean to keep + * track of the most recently completed transmission. * * The datasheet discourages the use of TDH to find out the - * number of sent packets; the right way to do so, is to check - * the DD bit inside the status of a packet descriptor. On the - * other hand, we avoid to set the `report status' bit for - * *all* outgoing packets (kind of interrupt mitigation), - * consequently the DD bit is not guaranteed to be set for all - * the packets: thats way, for the moment we continue to use - * TDH. + * number of sent packets. We should rather check the DD + * status bit in a packet descriptor. However, we only set + * the "report status" bit for some descriptors (a kind of + * interrupt mitigation), so we can only check on those. + * For the time being we use TDH, as we do it infrequently + * enough not to pose performance problems. */ - j = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr)); - if (j >= kring->nkr_num_slots) { /* XXX can happen */ - D("TDH wrap %d", j); - j -= kring->nkr_num_slots; + l = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr)); + if (l >= kring->nkr_num_slots) { /* XXX can happen */ + D("TDH wrap %d", l); + l -= kring->nkr_num_slots; } - int delta = j - txr->next_to_clean; + delta = l - txr->next_to_clean; if (delta) { - /* new transmissions were completed, increment - ring->nr_hwavail. */ + /* some tx completed, increment avail */ if (delta < 0) delta += kring->nkr_num_slots; - txr->next_to_clean = j; + txr->next_to_clean = l; kring->nr_hwavail += delta; - ring->avail = kring->nr_hwavail; + if (kring->nr_hwavail > lim) + goto ring_reset; } } + /* update avail to what the kernel knows */ + ring->avail = kring->nr_hwavail; if (do_lock) IXGBE_TX_UNLOCK(txr); return 0; + } /* * Reconcile kernel and user view of the receive ring. + * Same as for the txsync, this routine must be efficient and + * avoid races in accessing the shared regions. + * + * When called, userspace has read data from slots kring->nr_hwcur + * up to ring->cur (excluded). * - * Userspace has read rx slots up to cur (excluded). - * The last unread slot previously known to the kernel was nr_hwcur, - * and the last interrupt reported nr_hwavail slots available. + * The last interrupt reported kring->nr_hwavail slots available + * after kring->nr_hwcur. * We must subtract the newly consumed slots (cur - nr_hwcur) - * from nr_hwavail, clearing the descriptors for the next - * read, tell the hardware that they are available, - * and set nr_hwcur = cur and avail = nr_hwavail. - * issuing a dmamap_sync on all slots. + * from nr_hwavail, make the descriptors available for the next reads, + * and set kring->nr_hwcur = ring->cur and ring->avail = kring->nr_hwavail. */ static int ixgbe_netmap_rxsync(void *a, u_int ring_nr, int do_lock) @@ -291,86 +400,123 @@ ixgbe_netmap_rxsync(void *a, u_int ring_nr, int do_lock) struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - int j, k, n, lim = kring->nkr_num_slots - 1; + int j, k, l, n, lim = kring->nkr_num_slots - 1; - k = ring->cur; /* ring is not protected by any lock */ - if ( (kring->nr_kflags & NR_REINIT) || k > lim) + k = ring->cur; /* cache and check value, same as in txsync */ + n = k - kring->nr_hwcur; + if (n < 0) + n += lim + 1; + if (k > lim || n > kring->nr_hwavail) /* userspace is cheating */ return netmap_ring_reinit(kring); if (do_lock) IXGBE_RX_LOCK(rxr); + if (n < 0) + n += lim + 1; /* XXX check sync modes */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - j = rxr->next_to_check; + /* + * First part, import newly received packets into the netmap ring. + * + * j is the index of the next free slot in the netmap ring, + * and l is the index of the next received packet in the NIC ring, + * and they may differ in case if_init() has been called while + * in netmap mode. For the receive ring we have + * + * j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size + * l = rxr->next_to_check; + * and + * j == (l + kring->nkr_hwofs) % ring_size + * + * rxr->next_to_check is set to 0 on a ring reinit + */ + l = rxr->next_to_check; + j = rxr->next_to_check + kring->nkr_hwofs; + if (j > lim) + j -= lim + 1; + for (n = 0; ; n++) { - union ixgbe_adv_rx_desc *curr = &rxr->rx_base[j]; + union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l]; uint32_t staterr = le32toh(curr->wb.upper.status_error); if ((staterr & IXGBE_RXD_STAT_DD) == 0) break; ring->slot[j].len = le16toh(curr->wb.upper.length); bus_dmamap_sync(rxr->ptag, - rxr->rx_buffers[j].pmap, BUS_DMASYNC_POSTREAD); + rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD); j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; } - if (n) { - rxr->next_to_check = j; + if (n) { /* update the state variables */ + rxr->next_to_check = l; kring->nr_hwavail += n; - if (kring->nr_hwavail >= lim - 10) { - ND("rx ring %d almost full %d", ring_nr, kring->nr_hwavail); - } } - /* skip past packets that userspace has already processed, - * making them available for reception. - * advance nr_hwcur and issue a bus_dmamap_sync on the - * buffers so it is safe to write to them. - * Also increase nr_hwavail + /* + * Skip past packets that userspace has already processed + * (from kring->nr_hwcur to ring->cur excluded), and make + * the buffers available for reception. + * As usual j is the index in the netmap ring, l is the index + * in the NIC ring, and j == (l + kring->nkr_hwofs) % ring_size */ j = kring->nr_hwcur; if (j != k) { /* userspace has read some packets. */ n = 0; + l = kring->nr_hwcur - kring->nkr_hwofs; + if (l < 0) + l += lim + 1; while (j != k) { - struct netmap_slot *slot = ring->slot + j; - union ixgbe_adv_rx_desc *curr = &rxr->rx_base[j]; - struct ixgbe_rx_buf *rxbuf = rxr->rx_buffers + j; + /* collect per-slot info, with similar validations + * and flag handling as in the txsync code. + * + * NOTE curr and rxbuf are indexed by l. + * Also, this driver needs to update the physical * address in the NIC ring, but other drivers + * may not have this requirement. + */ + struct netmap_slot *slot = &ring->slot[j]; + union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l]; + struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[l]; void *addr = NMB(slot); - if (addr == netmap_buffer_base) { /* bad buf */ - if (do_lock) - IXGBE_RX_UNLOCK(rxr); - return netmap_ring_reinit(kring); - } + if (addr == netmap_buffer_base) /* bad buf */ + goto ring_reset; curr->wb.upper.status_error = 0; curr->read.pkt_addr = htole64(vtophys(addr)); if (slot->flags & NS_BUF_CHANGED) { netmap_reload_map(rxr->ptag, rxbuf->pmap, - addr, na->buff_size); + addr, na->buff_size); slot->flags &= ~NS_BUF_CHANGED; } bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_PREREAD); + BUS_DMASYNC_PREREAD); j = (j == lim) ? 0 : j + 1; + l = (l == lim) ? 0 : l + 1; n++; } kring->nr_hwavail -= n; - kring->nr_hwcur = ring->cur; + kring->nr_hwcur = k; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* IMPORTANT: we must leave one free slot in the ring, - * so move j back by one unit + * so move l back by one unit */ - j = (j == 0) ? lim : j - 1; - IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), j); + l = (l == 0) ? lim : l - 1; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), l); } /* tell userspace that there are new packets */ ring->avail = kring->nr_hwavail ; if (do_lock) IXGBE_RX_UNLOCK(rxr); return 0; + +ring_reset: + if (do_lock) + IXGBE_RX_UNLOCK(rxr); + return netmap_ring_reinit(kring); } +/* end of file */ |