From a9158a088f06d0cb2559e534b4a69eb52ef67a16 Mon Sep 17 00:00:00 2001 From: yongari Date: Tue, 22 Dec 2009 18:57:07 +0000 Subject: Add bus_dma(9) and endianness support to ste(4). o Sorted includes and added missing header files. o Added basic endianness support. In theory ste(4) should work on any architectures. o Remove the use of contigmalloc(9), contigfree(9) and vtophys(9). o Added 8 byte alignment limitation of TX/RX descriptor. o Added 1 byte alignment requirement for TX/RX buffers. o ste(4) controllers does not support DAC. Limit DMA address space to be within 32bit address. o Added spare DMA map to gracefully recover from DMA map failure. o Removed dead code for checking STE_RXSTAT_DMADONE bit. The bit was already checked in each iteration of loop so it can't be true. o Added second argument count to ste_rxeof(). It is used to limit number of iterations done in RX handler. ATM polling is the only consumer. o Removed ste_rxeoc() which was added to address RX stuck issue (cvs rev 1.66). Unlike TX descriptors, ST201 supports chaining descriptors to form a ring for RX descriptors. If RX descriptor chaining is not supported it's possible for controller to stop receiving incoming frames once controller pass the end of RX descriptor which in turn requires driver post new RX descriptors to receive more frames. For TX descriptors which does not support chaning, we exactly do manual chaining in driver by concatenating new descriptors to the end of previous TX chain. Maybe the workaround was borrowed from other drivers that does not support RX descriptor chaining, which is not valid for ST201 controllers. I still have no idea how this address RX stuck issue and I can't reproduce the RX stuck issue on DFE-550TX controller. o Removed hw.ste_rxsyncs sysctl as the workaround was removed. o TX/RX side bus_dmamap_load_mbuf_sg(9) support. o Reimplemented optimized ste_encap(). o Simplified TX logic of ste_start_locked(). o Added comments for TFD/RFD requirements. o Increased number of RX descriptors to 128 from 64. 128 gave much better performance than 64 under high network loads. --- sys/dev/ste/if_ste.c | 743 +++++++++++++++++++++++++++++++++--------------- sys/dev/ste/if_stereg.h | 59 +++- 2 files changed, 553 insertions(+), 249 deletions(-) (limited to 'sys/dev') diff --git a/sys/dev/ste/if_ste.c b/sys/dev/ste/if_ste.c index 91bd968..0fb8929 100644 --- a/sys/dev/ste/if_ste.c +++ b/sys/dev/ste/if_ste.c @@ -39,14 +39,19 @@ __FBSDID("$FreeBSD$"); #include #include -#include -#include -#include +#include +#include #include +#include +#include +#include #include +#include #include +#include #include +#include #include #include #include @@ -55,14 +60,8 @@ __FBSDID("$FreeBSD$"); #include #include -#include - -#include /* for vtophys */ -#include /* for vtophys */ #include #include -#include -#include #include #include @@ -70,13 +69,13 @@ __FBSDID("$FreeBSD$"); #include #include +#include + /* "device miibus" required. See GENERIC if you get errors here. */ #include "miibus_if.h" #define STE_USEIOSPACE -#include - MODULE_DEPEND(ste, pci, 1, 1, 1); MODULE_DEPEND(ste, ether, 1, 1, 1); MODULE_DEPEND(ste, miibus, 1, 1, 1); @@ -96,8 +95,12 @@ static int ste_detach(device_t); static int ste_probe(device_t); static int ste_shutdown(device_t); +static int ste_dma_alloc(struct ste_softc *); +static void ste_dma_free(struct ste_softc *); +static void ste_dmamap_cb(void *, bus_dma_segment_t *, int, int); static int ste_eeprom_wait(struct ste_softc *); -static int ste_encap(struct ste_softc *, struct ste_chain *, struct mbuf *); +static int ste_encap(struct ste_softc *, struct mbuf **, + struct ste_chain *); static int ste_ifmedia_upd(struct ifnet *); static void ste_ifmedia_upd_locked(struct ifnet *); static void ste_ifmedia_sts(struct ifnet *, struct ifmediareq *); @@ -114,12 +117,10 @@ static int ste_mii_writereg(struct ste_softc *, struct ste_mii_frame *); static int ste_miibus_readreg(device_t, int, int); static void ste_miibus_statchg(device_t); static int ste_miibus_writereg(device_t, int, int, int); -static int ste_newbuf(struct ste_softc *, struct ste_chain_onefrag *, - struct mbuf *); +static int ste_newbuf(struct ste_softc *, struct ste_chain_onefrag *); static int ste_read_eeprom(struct ste_softc *, caddr_t, int, int, int); static void ste_reset(struct ste_softc *); -static void ste_rxeoc(struct ste_softc *); -static int ste_rxeof(struct ste_softc *); +static int ste_rxeof(struct ste_softc *, int); static void ste_setmulti(struct ste_softc *); static void ste_start(struct ifnet *); static void ste_start_locked(struct ifnet *); @@ -168,11 +169,6 @@ static devclass_t ste_devclass; DRIVER_MODULE(ste, pci, ste_driver, ste_devclass, 0, 0); DRIVER_MODULE(miibus, ste, miibus_driver, miibus_devclass, 0, 0); -SYSCTL_NODE(_hw, OID_AUTO, ste, CTLFLAG_RD, 0, "if_ste parameters"); - -static int ste_rxsyncs; -SYSCTL_INT(_hw_ste, OID_AUTO, rxsyncs, CTLFLAG_RW, &ste_rxsyncs, 0, ""); - #define STE_SETBIT4(sc, reg, x) \ CSR_WRITE_4(sc, reg, CSR_READ_4(sc, reg) | (x)) @@ -603,10 +599,7 @@ ste_poll_locked(struct ifnet *ifp, enum poll_cmd cmd, int count) STE_LOCK_ASSERT(sc); - sc->rxcycles = count; - if (cmd == POLL_AND_CHECK_STATUS) - ste_rxeoc(sc); - rx_npkts = ste_rxeof(sc); + rx_npkts = ste_rxeof(sc, count); ste_txeof(sc); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) ste_start_locked(ifp); @@ -666,10 +659,8 @@ ste_intr(void *xsc) if (!(status & STE_INTRS)) break; - if (status & STE_ISR_RX_DMADONE) { - ste_rxeoc(sc); - ste_rxeof(sc); - } + if (status & STE_ISR_RX_DMADONE) + ste_rxeof(sc, -1); if (status & STE_ISR_TX_DMADONE) ste_txeof(sc); @@ -701,62 +692,40 @@ ste_intr(void *xsc) STE_UNLOCK(sc); } -static void -ste_rxeoc(struct ste_softc *sc) -{ - struct ste_chain_onefrag *cur_rx; - - STE_LOCK_ASSERT(sc); - - if (sc->ste_cdata.ste_rx_head->ste_ptr->ste_status == 0) { - cur_rx = sc->ste_cdata.ste_rx_head; - do { - cur_rx = cur_rx->ste_next; - /* If the ring is empty, just return. */ - if (cur_rx == sc->ste_cdata.ste_rx_head) - return; - } while (cur_rx->ste_ptr->ste_status == 0); - if (sc->ste_cdata.ste_rx_head->ste_ptr->ste_status == 0) { - /* We've fallen behind the chip: catch it. */ - sc->ste_cdata.ste_rx_head = cur_rx; - ++ste_rxsyncs; - } - } -} - /* * A frame has been uploaded: pass the resulting mbuf chain up to * the higher level protocols. */ static int -ste_rxeof(struct ste_softc *sc) +ste_rxeof(struct ste_softc *sc, int count) { struct mbuf *m; struct ifnet *ifp; struct ste_chain_onefrag *cur_rx; uint32_t rxstat; - int total_len = 0, count = 0, rx_npkts = 0; - - STE_LOCK_ASSERT(sc); + int total_len, rx_npkts; ifp = sc->ste_ifp; - while ((rxstat = sc->ste_cdata.ste_rx_head->ste_ptr->ste_status) - & STE_RXSTAT_DMADONE) { + bus_dmamap_sync(sc->ste_cdata.ste_rx_list_tag, + sc->ste_cdata.ste_rx_list_map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + + cur_rx = sc->ste_cdata.ste_rx_head; + for (rx_npkts = 0; rx_npkts < STE_RX_LIST_CNT; rx_npkts++, + cur_rx = cur_rx->ste_next) { + rxstat = le32toh(cur_rx->ste_ptr->ste_status); + if ((rxstat & STE_RXSTAT_DMADONE) == 0) + break; #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) { - if (sc->rxcycles <= 0) + if (count == 0) break; - sc->rxcycles--; + count--; } #endif - if ((STE_RX_LIST_CNT - count) < 3) { + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; - } - - cur_rx = sc->ste_cdata.ste_rx_head; - sc->ste_cdata.ste_rx_head = cur_rx->ste_next; - /* * If an error occurs, update stats, clear the * status word and leave the mbuf cluster in place: @@ -769,22 +738,9 @@ ste_rxeof(struct ste_softc *sc) continue; } - /* - * If there error bit was not set, the upload complete - * bit should be set which means we have a valid packet. - * If not, something truly strange has happened. - */ - if (!(rxstat & STE_RXSTAT_DMADONE)) { - device_printf(sc->ste_dev, - "bad receive status -- packet dropped\n"); - ifp->if_ierrors++; - cur_rx->ste_ptr->ste_status = 0; - continue; - } - /* No errors; receive the packet. */ m = cur_rx->ste_mbuf; - total_len = cur_rx->ste_ptr->ste_status & STE_RXSTAT_FRAMELEN; + total_len = STE_RX_BYTES(rxstat); /* * Try to conjure up a new mbuf cluster. If that @@ -793,7 +749,7 @@ ste_rxeof(struct ste_softc *sc) * result in a lost packet, but there's little else we * can do in this situation. */ - if (ste_newbuf(sc, cur_rx, NULL) == ENOBUFS) { + if (ste_newbuf(sc, cur_rx) != 0) { ifp->if_ierrors++; cur_rx->ste_ptr->ste_status = 0; continue; @@ -806,10 +762,13 @@ ste_rxeof(struct ste_softc *sc) STE_UNLOCK(sc); (*ifp->if_input)(ifp, m); STE_LOCK(sc); + } - cur_rx->ste_ptr->ste_status = 0; - count++; - rx_npkts++; + if (rx_npkts > 0) { + sc->ste_cdata.ste_rx_head = cur_rx; + bus_dmamap_sync(sc->ste_cdata.ste_rx_list_tag, + sc->ste_cdata.ste_rx_list_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } return (rx_npkts); @@ -857,27 +816,40 @@ ste_txeof(struct ste_softc *sc) { struct ifnet *ifp; struct ste_chain *cur_tx; + uint32_t txstat; int idx; - ifp = sc->ste_ifp; + STE_LOCK_ASSERT(sc); + ifp = sc->ste_ifp; idx = sc->ste_cdata.ste_tx_cons; + if (idx == sc->ste_cdata.ste_tx_prod) + return; + + bus_dmamap_sync(sc->ste_cdata.ste_tx_list_tag, + sc->ste_cdata.ste_tx_list_map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + while (idx != sc->ste_cdata.ste_tx_prod) { cur_tx = &sc->ste_cdata.ste_tx_chain[idx]; - - if (!(cur_tx->ste_ptr->ste_ctl & STE_TXCTL_DMADONE)) + txstat = le32toh(cur_tx->ste_ptr->ste_ctl); + if ((txstat & STE_TXCTL_DMADONE) == 0) break; - + bus_dmamap_sync(sc->ste_cdata.ste_tx_tag, cur_tx->ste_map, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(sc->ste_cdata.ste_tx_tag, cur_tx->ste_map); + KASSERT(cur_tx->ste_mbuf != NULL, + ("%s: freeing NULL mbuf!\n", __func__)); m_freem(cur_tx->ste_mbuf); cur_tx->ste_mbuf = NULL; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ifp->if_opackets++; - + sc->ste_cdata.ste_tx_cnt--; STE_INC(idx, STE_TX_LIST_CNT); } sc->ste_cdata.ste_tx_cons = idx; - if (idx == sc->ste_cdata.ste_tx_prod) + if (sc->ste_cdata.ste_tx_cnt == 0) sc->ste_timer = 0; } @@ -1012,17 +984,8 @@ ste_attach(device_t dev) goto fail; } - /* Allocate the descriptor queues. */ - sc->ste_ldata = contigmalloc(sizeof(struct ste_list_data), M_DEVBUF, - M_NOWAIT, 0, 0xffffffff, PAGE_SIZE, 0); - - if (sc->ste_ldata == NULL) { - device_printf(dev, "no memory for list buffers!\n"); - error = ENXIO; + if ((error = ste_dma_alloc(sc)) != 0) goto fail; - } - - bzero(sc->ste_ldata, sizeof(struct ste_list_data)); ifp = sc->ste_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { @@ -1128,44 +1091,325 @@ ste_detach(device_t dev) if (ifp) if_free(ifp); - if (sc->ste_ldata) { - contigfree(sc->ste_ldata, sizeof(struct ste_list_data), - M_DEVBUF); - } - + ste_dma_free(sc); mtx_destroy(&sc->ste_mtx); return (0); } +struct ste_dmamap_arg { + bus_addr_t ste_busaddr; +}; + +static void +ste_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) +{ + struct ste_dmamap_arg *ctx; + + if (error != 0) + return; + + KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); + + ctx = (struct ste_dmamap_arg *)arg; + ctx->ste_busaddr = segs[0].ds_addr; +} + static int -ste_newbuf(struct ste_softc *sc, struct ste_chain_onefrag *c, struct mbuf *m) +ste_dma_alloc(struct ste_softc *sc) { - struct mbuf *m_new = NULL; - - if (m == NULL) { - MGETHDR(m_new, M_DONTWAIT, MT_DATA); - if (m_new == NULL) - return (ENOBUFS); - MCLGET(m_new, M_DONTWAIT); - if (!(m_new->m_flags & M_EXT)) { - m_freem(m_new); - return (ENOBUFS); + struct ste_chain *txc; + struct ste_chain_onefrag *rxc; + struct ste_dmamap_arg ctx; + int error, i; + + /* Create parent DMA tag. */ + error = bus_dma_tag_create( + bus_get_dma_tag(sc->ste_dev), /* parent */ + 1, 0, /* alignment, boundary */ + BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + BUS_SPACE_MAXSIZE_32BIT, /* maxsize */ + 0, /* nsegments */ + BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ + 0, /* flags */ + NULL, NULL, /* lockfunc, lockarg */ + &sc->ste_cdata.ste_parent_tag); + if (error != 0) { + device_printf(sc->ste_dev, + "could not create parent DMA tag.\n"); + goto fail; + } + + /* Create DMA tag for Tx descriptor list. */ + error = bus_dma_tag_create( + sc->ste_cdata.ste_parent_tag, /* parent */ + STE_DESC_ALIGN, 0, /* alignment, boundary */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + STE_TX_LIST_SZ, /* maxsize */ + 1, /* nsegments */ + STE_TX_LIST_SZ, /* maxsegsize */ + 0, /* flags */ + NULL, NULL, /* lockfunc, lockarg */ + &sc->ste_cdata.ste_tx_list_tag); + if (error != 0) { + device_printf(sc->ste_dev, + "could not create Tx list DMA tag.\n"); + goto fail; + } + + /* Create DMA tag for Rx descriptor list. */ + error = bus_dma_tag_create( + sc->ste_cdata.ste_parent_tag, /* parent */ + STE_DESC_ALIGN, 0, /* alignment, boundary */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + STE_RX_LIST_SZ, /* maxsize */ + 1, /* nsegments */ + STE_RX_LIST_SZ, /* maxsegsize */ + 0, /* flags */ + NULL, NULL, /* lockfunc, lockarg */ + &sc->ste_cdata.ste_rx_list_tag); + if (error != 0) { + device_printf(sc->ste_dev, + "could not create Rx list DMA tag.\n"); + goto fail; + } + + /* Create DMA tag for Tx buffers. */ + error = bus_dma_tag_create( + sc->ste_cdata.ste_parent_tag, /* parent */ + 1, 0, /* alignment, boundary */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + MCLBYTES * STE_MAXFRAGS, /* maxsize */ + STE_MAXFRAGS, /* nsegments */ + MCLBYTES, /* maxsegsize */ + 0, /* flags */ + NULL, NULL, /* lockfunc, lockarg */ + &sc->ste_cdata.ste_tx_tag); + if (error != 0) { + device_printf(sc->ste_dev, "could not create Tx DMA tag.\n"); + goto fail; + } + + /* Create DMA tag for Rx buffers. */ + error = bus_dma_tag_create( + sc->ste_cdata.ste_parent_tag, /* parent */ + 1, 0, /* alignment, boundary */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + MCLBYTES, /* maxsize */ + 1, /* nsegments */ + MCLBYTES, /* maxsegsize */ + 0, /* flags */ + NULL, NULL, /* lockfunc, lockarg */ + &sc->ste_cdata.ste_rx_tag); + if (error != 0) { + device_printf(sc->ste_dev, "could not create Rx DMA tag.\n"); + goto fail; + } + + /* Allocate DMA'able memory and load the DMA map for Tx list. */ + error = bus_dmamem_alloc(sc->ste_cdata.ste_tx_list_tag, + (void **)&sc->ste_ldata.ste_tx_list, + BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT, + &sc->ste_cdata.ste_tx_list_map); + if (error != 0) { + device_printf(sc->ste_dev, + "could not allocate DMA'able memory for Tx list.\n"); + goto fail; + } + ctx.ste_busaddr = 0; + error = bus_dmamap_load(sc->ste_cdata.ste_tx_list_tag, + sc->ste_cdata.ste_tx_list_map, sc->ste_ldata.ste_tx_list, + STE_TX_LIST_SZ, ste_dmamap_cb, &ctx, 0); + if (error != 0 || ctx.ste_busaddr == 0) { + device_printf(sc->ste_dev, + "could not load DMA'able memory for Tx list.\n"); + goto fail; + } + sc->ste_ldata.ste_tx_list_paddr = ctx.ste_busaddr; + + /* Allocate DMA'able memory and load the DMA map for Rx list. */ + error = bus_dmamem_alloc(sc->ste_cdata.ste_rx_list_tag, + (void **)&sc->ste_ldata.ste_rx_list, + BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT, + &sc->ste_cdata.ste_rx_list_map); + if (error != 0) { + device_printf(sc->ste_dev, + "could not allocate DMA'able memory for Rx list.\n"); + goto fail; + } + ctx.ste_busaddr = 0; + error = bus_dmamap_load(sc->ste_cdata.ste_rx_list_tag, + sc->ste_cdata.ste_rx_list_map, sc->ste_ldata.ste_rx_list, + STE_RX_LIST_SZ, ste_dmamap_cb, &ctx, 0); + if (error != 0 || ctx.ste_busaddr == 0) { + device_printf(sc->ste_dev, + "could not load DMA'able memory for Rx list.\n"); + goto fail; + } + sc->ste_ldata.ste_rx_list_paddr = ctx.ste_busaddr; + + /* Create DMA maps for Tx buffers. */ + for (i = 0; i < STE_TX_LIST_CNT; i++) { + txc = &sc->ste_cdata.ste_tx_chain[i]; + txc->ste_ptr = NULL; + txc->ste_mbuf = NULL; + txc->ste_next = NULL; + txc->ste_phys = 0; + txc->ste_map = NULL; + error = bus_dmamap_create(sc->ste_cdata.ste_tx_tag, 0, + &txc->ste_map); + if (error != 0) { + device_printf(sc->ste_dev, + "could not create Tx dmamap.\n"); + goto fail; + } + } + /* Create DMA maps for Rx buffers. */ + if ((error = bus_dmamap_create(sc->ste_cdata.ste_rx_tag, 0, + &sc->ste_cdata.ste_rx_sparemap)) != 0) { + device_printf(sc->ste_dev, + "could not create spare Rx dmamap.\n"); + goto fail; + } + for (i = 0; i < STE_RX_LIST_CNT; i++) { + rxc = &sc->ste_cdata.ste_rx_chain[i]; + rxc->ste_ptr = NULL; + rxc->ste_mbuf = NULL; + rxc->ste_next = NULL; + rxc->ste_map = NULL; + error = bus_dmamap_create(sc->ste_cdata.ste_rx_tag, 0, + &rxc->ste_map); + if (error != 0) { + device_printf(sc->ste_dev, + "could not create Rx dmamap.\n"); + goto fail; } - m_new->m_len = m_new->m_pkthdr.len = MCLBYTES; - } else { - m_new = m; - m_new->m_len = m_new->m_pkthdr.len = MCLBYTES; - m_new->m_data = m_new->m_ext.ext_buf; } - m_adj(m_new, ETHER_ALIGN); +fail: + return (error); +} + +static void +ste_dma_free(struct ste_softc *sc) +{ + struct ste_chain *txc; + struct ste_chain_onefrag *rxc; + int i; - c->ste_mbuf = m_new; - c->ste_ptr->ste_status = 0; - c->ste_ptr->ste_frag.ste_addr = vtophys(mtod(m_new, caddr_t)); - c->ste_ptr->ste_frag.ste_len = (1536 + ETHER_VLAN_ENCAP_LEN) | STE_FRAG_LAST; + /* Tx buffers. */ + if (sc->ste_cdata.ste_tx_tag != NULL) { + for (i = 0; i < STE_TX_LIST_CNT; i++) { + txc = &sc->ste_cdata.ste_tx_chain[i]; + if (txc->ste_map != NULL) { + bus_dmamap_destroy(sc->ste_cdata.ste_tx_tag, + txc->ste_map); + txc->ste_map = NULL; + } + } + bus_dma_tag_destroy(sc->ste_cdata.ste_tx_tag); + sc->ste_cdata.ste_tx_tag = NULL; + } + /* Rx buffers. */ + if (sc->ste_cdata.ste_rx_tag != NULL) { + for (i = 0; i < STE_RX_LIST_CNT; i++) { + rxc = &sc->ste_cdata.ste_rx_chain[i]; + if (rxc->ste_map != NULL) { + bus_dmamap_destroy(sc->ste_cdata.ste_rx_tag, + rxc->ste_map); + rxc->ste_map = NULL; + } + } + if (sc->ste_cdata.ste_rx_sparemap != NULL) { + bus_dmamap_destroy(sc->ste_cdata.ste_rx_tag, + sc->ste_cdata.ste_rx_sparemap); + sc->ste_cdata.ste_rx_sparemap = NULL; + } + bus_dma_tag_destroy(sc->ste_cdata.ste_rx_tag); + sc->ste_cdata.ste_rx_tag = NULL; + } + /* Tx descriptor list. */ + if (sc->ste_cdata.ste_tx_list_tag != NULL) { + if (sc->ste_cdata.ste_tx_list_map != NULL) + bus_dmamap_unload(sc->ste_cdata.ste_tx_list_tag, + sc->ste_cdata.ste_tx_list_map); + if (sc->ste_cdata.ste_tx_list_map != NULL && + sc->ste_ldata.ste_tx_list != NULL) + bus_dmamem_free(sc->ste_cdata.ste_tx_list_tag, + sc->ste_ldata.ste_tx_list, + sc->ste_cdata.ste_tx_list_map); + sc->ste_ldata.ste_tx_list = NULL; + sc->ste_cdata.ste_tx_list_map = NULL; + bus_dma_tag_destroy(sc->ste_cdata.ste_tx_list_tag); + sc->ste_cdata.ste_tx_list_tag = NULL; + } + /* Rx descriptor list. */ + if (sc->ste_cdata.ste_rx_list_tag != NULL) { + if (sc->ste_cdata.ste_rx_list_map != NULL) + bus_dmamap_unload(sc->ste_cdata.ste_rx_list_tag, + sc->ste_cdata.ste_rx_list_map); + if (sc->ste_cdata.ste_rx_list_map != NULL && + sc->ste_ldata.ste_rx_list != NULL) + bus_dmamem_free(sc->ste_cdata.ste_rx_list_tag, + sc->ste_ldata.ste_rx_list, + sc->ste_cdata.ste_rx_list_map); + sc->ste_ldata.ste_rx_list = NULL; + sc->ste_cdata.ste_rx_list_map = NULL; + bus_dma_tag_destroy(sc->ste_cdata.ste_rx_list_tag); + sc->ste_cdata.ste_rx_list_tag = NULL; + } + if (sc->ste_cdata.ste_parent_tag != NULL) { + bus_dma_tag_destroy(sc->ste_cdata.ste_parent_tag); + sc->ste_cdata.ste_parent_tag = NULL; + } +} + +static int +ste_newbuf(struct ste_softc *sc, struct ste_chain_onefrag *rxc) +{ + struct mbuf *m; + bus_dma_segment_t segs[1]; + bus_dmamap_t map; + int error, nsegs; + + m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); + if (m == NULL) + return (ENOBUFS); + m->m_len = m->m_pkthdr.len = MCLBYTES; + m_adj(m, ETHER_ALIGN); + + if ((error = bus_dmamap_load_mbuf_sg(sc->ste_cdata.ste_rx_tag, + sc->ste_cdata.ste_rx_sparemap, m, segs, &nsegs, 0)) != 0) { + m_freem(m); + return (error); + } + KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); + if (rxc->ste_mbuf != NULL) { + bus_dmamap_sync(sc->ste_cdata.ste_rx_tag, rxc->ste_map, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(sc->ste_cdata.ste_rx_tag, rxc->ste_map); + } + map = rxc->ste_map; + rxc->ste_map = sc->ste_cdata.ste_rx_sparemap; + sc->ste_cdata.ste_rx_sparemap = map; + bus_dmamap_sync(sc->ste_cdata.ste_rx_tag, rxc->ste_map, + BUS_DMASYNC_PREREAD); + rxc->ste_mbuf = m; + rxc->ste_ptr->ste_status = 0; + rxc->ste_ptr->ste_frag.ste_addr = htole32(segs[0].ds_addr); + rxc->ste_ptr->ste_frag.ste_len = htole32(segs[0].ds_len | + STE_FRAG_LAST); return (0); } @@ -1174,30 +1418,31 @@ ste_init_rx_list(struct ste_softc *sc) { struct ste_chain_data *cd; struct ste_list_data *ld; - int i; + int error, i; cd = &sc->ste_cdata; - ld = sc->ste_ldata; - + ld = &sc->ste_ldata; + bzero(ld->ste_rx_list, STE_RX_LIST_SZ); for (i = 0; i < STE_RX_LIST_CNT; i++) { cd->ste_rx_chain[i].ste_ptr = &ld->ste_rx_list[i]; - if (ste_newbuf(sc, &cd->ste_rx_chain[i], NULL) == ENOBUFS) - return (ENOBUFS); + error = ste_newbuf(sc, &cd->ste_rx_chain[i]); + if (error != 0) + return (error); if (i == (STE_RX_LIST_CNT - 1)) { - cd->ste_rx_chain[i].ste_next = - &cd->ste_rx_chain[0]; - ld->ste_rx_list[i].ste_next = - vtophys(&ld->ste_rx_list[0]); + cd->ste_rx_chain[i].ste_next = &cd->ste_rx_chain[0]; + ld->ste_rx_list[i].ste_next = ld->ste_rx_list_paddr + + (sizeof(struct ste_desc_onefrag) * 0); } else { - cd->ste_rx_chain[i].ste_next = - &cd->ste_rx_chain[i + 1]; - ld->ste_rx_list[i].ste_next = - vtophys(&ld->ste_rx_list[i + 1]); + cd->ste_rx_chain[i].ste_next = &cd->ste_rx_chain[i + 1]; + ld->ste_rx_list[i].ste_next = ld->ste_rx_list_paddr + + (sizeof(struct ste_desc_onefrag) * (i + 1)); } - ld->ste_rx_list[i].ste_status = 0; } cd->ste_rx_head = &cd->ste_rx_chain[0]; + bus_dmamap_sync(sc->ste_cdata.ste_rx_list_tag, + sc->ste_cdata.ste_rx_list_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } @@ -1210,22 +1455,32 @@ ste_init_tx_list(struct ste_softc *sc) int i; cd = &sc->ste_cdata; - ld = sc->ste_ldata; + ld = &sc->ste_ldata; + bzero(ld->ste_tx_list, STE_TX_LIST_SZ); for (i = 0; i < STE_TX_LIST_CNT; i++) { cd->ste_tx_chain[i].ste_ptr = &ld->ste_tx_list[i]; - cd->ste_tx_chain[i].ste_ptr->ste_next = 0; - cd->ste_tx_chain[i].ste_ptr->ste_ctl = 0; - cd->ste_tx_chain[i].ste_phys = vtophys(&ld->ste_tx_list[i]); - if (i == (STE_TX_LIST_CNT - 1)) - cd->ste_tx_chain[i].ste_next = - &cd->ste_tx_chain[0]; - else - cd->ste_tx_chain[i].ste_next = - &cd->ste_tx_chain[i + 1]; + cd->ste_tx_chain[i].ste_mbuf = NULL; + if (i == (STE_TX_LIST_CNT - 1)) { + cd->ste_tx_chain[i].ste_next = &cd->ste_tx_chain[0]; + cd->ste_tx_chain[i].ste_phys = htole32(STE_ADDR_LO( + ld->ste_tx_list_paddr + + (sizeof(struct ste_desc) * 0))); + } else { + cd->ste_tx_chain[i].ste_next = &cd->ste_tx_chain[i + 1]; + cd->ste_tx_chain[i].ste_phys = htole32(STE_ADDR_LO( + ld->ste_tx_list_paddr + + (sizeof(struct ste_desc) * (i + 1)))); + } } + cd->ste_last_tx = NULL; cd->ste_tx_prod = 0; cd->ste_tx_cons = 0; + cd->ste_tx_cnt = 0; + + bus_dmamap_sync(sc->ste_cdata.ste_tx_list_tag, + sc->ste_cdata.ste_tx_list_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } static void @@ -1258,7 +1513,7 @@ ste_init_locked(struct ste_softc *sc) } /* Init RX list */ - if (ste_init_rx_list(sc) == ENOBUFS) { + if (ste_init_rx_list(sc) != 0) { device_printf(sc->ste_dev, "initialization failed: no memory for RX buffers\n"); ste_stop(sc); @@ -1303,11 +1558,11 @@ ste_init_locked(struct ste_softc *sc) STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_RXDMA_STALL); ste_wait(sc); CSR_WRITE_4(sc, STE_RX_DMALIST_PTR, - vtophys(&sc->ste_ldata->ste_rx_list[0])); + STE_ADDR_LO(sc->ste_ldata.ste_rx_list_paddr)); STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_RXDMA_UNSTALL); STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_RXDMA_UNSTALL); - /* Set TX polling interval (defer until we TX first packet */ + /* Set TX polling interval(defer until we TX first packet). */ CSR_WRITE_1(sc, STE_TX_DMAPOLL_PERIOD, 0); /* Load address of the TX list */ @@ -1317,7 +1572,6 @@ ste_init_locked(struct ste_softc *sc) STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_TXDMA_UNSTALL); STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_TXDMA_UNSTALL); ste_wait(sc); - sc->ste_tx_prev = NULL; /* Enable receiver and transmitter */ CSR_WRITE_2(sc, STE_MACCTL0, 0); @@ -1353,6 +1607,8 @@ static void ste_stop(struct ste_softc *sc) { struct ifnet *ifp; + struct ste_chain_onefrag *cur_rx; + struct ste_chain *cur_tx; int i; STE_LOCK_ASSERT(sc); @@ -1377,20 +1633,28 @@ ste_stop(struct ste_softc *sc) sc->ste_link = 0; for (i = 0; i < STE_RX_LIST_CNT; i++) { - if (sc->ste_cdata.ste_rx_chain[i].ste_mbuf != NULL) { - m_freem(sc->ste_cdata.ste_rx_chain[i].ste_mbuf); - sc->ste_cdata.ste_rx_chain[i].ste_mbuf = NULL; + cur_rx = &sc->ste_cdata.ste_rx_chain[i]; + if (cur_rx->ste_mbuf != NULL) { + bus_dmamap_sync(sc->ste_cdata.ste_rx_tag, + cur_rx->ste_map, BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(sc->ste_cdata.ste_rx_tag, + cur_rx->ste_map); + m_freem(cur_rx->ste_mbuf); + cur_rx->ste_mbuf = NULL; } } for (i = 0; i < STE_TX_LIST_CNT; i++) { - if (sc->ste_cdata.ste_tx_chain[i].ste_mbuf != NULL) { - m_freem(sc->ste_cdata.ste_tx_chain[i].ste_mbuf); - sc->ste_cdata.ste_tx_chain[i].ste_mbuf = NULL; + cur_tx = &sc->ste_cdata.ste_tx_chain[i]; + if (cur_tx->ste_mbuf != NULL) { + bus_dmamap_sync(sc->ste_cdata.ste_tx_tag, + cur_tx->ste_map, BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(sc->ste_cdata.ste_tx_tag, + cur_tx->ste_map); + m_freem(cur_tx->ste_mbuf); + cur_tx->ste_mbuf = NULL; } } - - bzero(sc->ste_ldata, sizeof(struct ste_list_data)); } static void @@ -1505,48 +1769,60 @@ ste_ioctl(struct ifnet *ifp, u_long command, caddr_t data) } static int -ste_encap(struct ste_softc *sc, struct ste_chain *c, struct mbuf *m_head) +ste_encap(struct ste_softc *sc, struct mbuf **m_head, struct ste_chain *txc) { + struct ste_frag *frag; struct mbuf *m; - struct ste_desc *d; - struct ste_frag *f = NULL; - int frag = 0; - - d = c->ste_ptr; - d->ste_ctl = 0; + struct ste_desc *desc; + bus_dma_segment_t txsegs[STE_MAXFRAGS]; + int error, i, nsegs; -encap_retry: - for (m = m_head, frag = 0; m != NULL; m = m->m_next) { - if (m->m_len != 0) { - if (frag == STE_MAXFRAGS) - break; - f = &d->ste_frags[frag]; - f->ste_addr = vtophys(mtod(m, vm_offset_t)); - f->ste_len = m->m_len; - frag++; + STE_LOCK_ASSERT(sc); + M_ASSERTPKTHDR((*m_head)); + + error = bus_dmamap_load_mbuf_sg(sc->ste_cdata.ste_tx_tag, + txc->ste_map, *m_head, txsegs, &nsegs, 0); + if (error == EFBIG) { + m = m_collapse(*m_head, M_DONTWAIT, STE_MAXFRAGS); + if (m == NULL) { + m_freem(*m_head); + *m_head = NULL; + return (ENOMEM); } - } - - if (m != NULL) { - struct mbuf *mn; - - /* - * We ran out of segments. We have to recopy this - * mbuf chain first. Bail out if we can't get the - * new buffers. - */ - mn = m_defrag(m_head, M_DONTWAIT); - if (mn == NULL) { - m_freem(m_head); - return ENOMEM; + *m_head = m; + error = bus_dmamap_load_mbuf_sg(sc->ste_cdata.ste_tx_tag, + txc->ste_map, *m_head, txsegs, &nsegs, 0); + if (error != 0) { + m_freem(*m_head); + *m_head = NULL; + return (error); } - m_head = mn; - goto encap_retry; + } else if (error != 0) + return (error); + if (nsegs == 0) { + m_freem(*m_head); + *m_head = NULL; + return (EIO); } - - c->ste_mbuf = m_head; - d->ste_frags[frag - 1].ste_len |= STE_FRAG_LAST; - d->ste_ctl = 1; + bus_dmamap_sync(sc->ste_cdata.ste_tx_tag, txc->ste_map, + BUS_DMASYNC_PREWRITE); + + desc = txc->ste_ptr; + for (i = 0; i < nsegs; i++) { + frag = &desc->ste_frags[i]; + frag->ste_addr = htole32(STE_ADDR_LO(txsegs[i].ds_addr)); + frag->ste_len = htole32(txsegs[i].ds_len); + } + desc->ste_frags[i - 1].ste_len |= htole32(STE_FRAG_LAST); + /* + * Because we use Tx polling we can't chain multiple + * Tx descriptors here. Otherwise we race with controller. + */ + desc->ste_next = 0; + desc->ste_ctl = htole32(STE_TXCTL_ALIGN_DIS | STE_TXCTL_DMAINTR); + txc->ste_mbuf = *m_head; + STE_INC(sc->ste_cdata.ste_tx_prod, STE_TX_LIST_CNT); + sc->ste_cdata.ste_tx_cnt++; return (0); } @@ -1568,7 +1844,7 @@ ste_start_locked(struct ifnet *ifp) struct ste_softc *sc; struct ste_chain *cur_tx; struct mbuf *m_head = NULL; - int idx; + int enq; sc = ifp->if_softc; STE_LOCK_ASSERT(sc); @@ -1579,62 +1855,56 @@ ste_start_locked(struct ifnet *ifp) if (ifp->if_drv_flags & IFF_DRV_OACTIVE) return; - idx = sc->ste_cdata.ste_tx_prod; - - while (sc->ste_cdata.ste_tx_chain[idx].ste_mbuf == NULL) { - /* - * We cannot re-use the last (free) descriptor; - * the chip may not have read its ste_next yet. - */ - if (STE_NEXT(idx, STE_TX_LIST_CNT) == - sc->ste_cdata.ste_tx_cons) { + for (enq = 0; !IFQ_DRV_IS_EMPTY(&ifp->if_snd);) { + if (sc->ste_cdata.ste_tx_cnt == STE_TX_LIST_CNT - 1) { + /* + * Controller may have cached copy of the last used + * next ptr so we have to reserve one TFD to avoid + * TFD overruns. + */ ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } - IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; - - cur_tx = &sc->ste_cdata.ste_tx_chain[idx]; - - if (ste_encap(sc, cur_tx, m_head) != 0) + cur_tx = &sc->ste_cdata.ste_tx_chain[sc->ste_cdata.ste_tx_prod]; + if (ste_encap(sc, &m_head, cur_tx) != 0) { + if (m_head == NULL) + break; + IFQ_DRV_PREPEND(&ifp->if_snd, m_head); break; - - cur_tx->ste_ptr->ste_next = 0; - - if (sc->ste_tx_prev == NULL) { - cur_tx->ste_ptr->ste_ctl = STE_TXCTL_DMAINTR | 1; - /* Load address of the TX list */ + } + if (sc->ste_cdata.ste_last_tx == NULL) { + bus_dmamap_sync(sc->ste_cdata.ste_tx_list_tag, + sc->ste_cdata.ste_tx_list_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_TXDMA_STALL); ste_wait(sc); - CSR_WRITE_4(sc, STE_TX_DMALIST_PTR, - vtophys(&sc->ste_ldata->ste_tx_list[0])); - - /* Set TX polling interval to start TX engine */ + STE_ADDR_LO(sc->ste_ldata.ste_tx_list_paddr)); CSR_WRITE_1(sc, STE_TX_DMAPOLL_PERIOD, 64); - STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_TXDMA_UNSTALL); ste_wait(sc); - }else{ - cur_tx->ste_ptr->ste_ctl = STE_TXCTL_DMAINTR | 1; - sc->ste_tx_prev->ste_ptr->ste_next - = cur_tx->ste_phys; + } else { + sc->ste_cdata.ste_last_tx->ste_ptr->ste_next = + sc->ste_cdata.ste_last_tx->ste_phys; + bus_dmamap_sync(sc->ste_cdata.ste_tx_list_tag, + sc->ste_cdata.ste_tx_list_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } + sc->ste_cdata.ste_last_tx = cur_tx; - sc->ste_tx_prev = cur_tx; - + enq++; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ - BPF_MTAP(ifp, cur_tx->ste_mbuf); - - STE_INC(idx, STE_TX_LIST_CNT); - sc->ste_timer = 5; + BPF_MTAP(ifp, m_head); } - sc->ste_cdata.ste_tx_prod = idx; + + if (enq > 0) + sc->ste_timer = STE_TX_TIMEOUT; } static void @@ -1650,8 +1920,7 @@ ste_watchdog(struct ste_softc *sc) ste_txeoc(sc); ste_txeof(sc); - ste_rxeoc(sc); - ste_rxeof(sc); + ste_rxeof(sc, -1); ste_reset(sc); ste_init_locked(sc); diff --git a/sys/dev/ste/if_stereg.h b/sys/dev/ste/if_stereg.h index 8de1961..49c52aa 100644 --- a/sys/dev/ste/if_stereg.h +++ b/sys/dev/ste/if_stereg.h @@ -412,6 +412,14 @@ struct ste_frag { #define STE_FRAG_LAST 0x80000000 #define STE_FRAG_LEN 0x00001FFF +/* + * A TFD is 16 to 512 bytes in length which means it can have up to 126 + * fragments for a single Tx frame. Since most frames used in stack have + * 3-4 fragments supporting 8 fragments would be enough for normal + * operation. If we encounter more than 8 fragments we'll collapse them + * into a frame that has less than or equal to 8 fragments. Each buffer + * address of a fragment has no alignment limitation. + */ #define STE_MAXFRAGS 8 struct ste_desc { @@ -420,6 +428,12 @@ struct ste_desc { struct ste_frag ste_frags[STE_MAXFRAGS]; }; +/* + * A RFD has the same structure of TFD which in turn means hardware + * supports scatter operation in Rx buffer. Since we just allocate Rx + * buffer with m_getcl(9) there is no fragmentation at all so use + * single fragment for RFD. + */ struct ste_desc_onefrag { uint32_t ste_next; uint32_t ste_status; @@ -427,6 +441,7 @@ struct ste_desc_onefrag { }; #define STE_TXCTL_WORDALIGN 0x00000003 +#define STE_TXCTL_ALIGN_DIS 0x00000001 #define STE_TXCTL_FRAMEID 0x000003FC #define STE_TXCTL_NOCRC 0x00002000 #define STE_TXCTL_TXINTR 0x00008000 @@ -445,6 +460,8 @@ struct ste_desc_onefrag { #define STE_RXSTAT_DMA_OFLOW 0x01000000 #define STE_RXATAT_ONEBUF 0x10000000 +#define STE_RX_BYTES(x) ((x) & STE_RXSTAT_FRAMELEN) + /* * register space access macros */ @@ -462,13 +479,22 @@ struct ste_desc_onefrag { #define CSR_READ_1(sc, reg) \ bus_space_read_1(sc->ste_btag, sc->ste_bhandle, reg) +#define STE_DESC_ALIGN 8 +#define STE_RX_LIST_CNT 128 +#define STE_TX_LIST_CNT 128 +#define STE_RX_LIST_SZ \ + (sizeof(struct ste_desc_onefrag) * STE_RX_LIST_CNT) +#define STE_TX_LIST_SZ \ + (sizeof(struct ste_desc) * STE_TX_LIST_CNT) +#define STE_ADDR_LO(x) ((uint64_t)(x) & 0xFFFFFFFF) +#define STE_ADDR_HI(x) ((uint64_t)(x) >> 32) + +#define STE_TX_TIMEOUT 5 #define STE_TIMEOUT 1000 #define STE_MIN_FRAMELEN 60 #define STE_PACKET_SIZE 1536 -#define ETHER_ALIGN 2 -#define STE_RX_LIST_CNT 64 -#define STE_TX_LIST_CNT 128 #define STE_INC(x, y) (x) = (x + 1) % y +#define STE_DEC(x, y) (x) = ((x) + ((y) - 1)) % (y) #define STE_NEXT(x, y) (x + 1) % y struct ste_type { @@ -478,8 +504,10 @@ struct ste_type { }; struct ste_list_data { - struct ste_desc_onefrag ste_rx_list[STE_RX_LIST_CNT]; - struct ste_desc ste_tx_list[STE_TX_LIST_CNT]; + struct ste_desc_onefrag *ste_rx_list; + bus_addr_t ste_rx_list_paddr; + struct ste_desc *ste_tx_list; + bus_addr_t ste_tx_list_paddr; }; struct ste_chain { @@ -487,21 +515,32 @@ struct ste_chain { struct mbuf *ste_mbuf; struct ste_chain *ste_next; uint32_t ste_phys; + bus_dmamap_t ste_map; }; struct ste_chain_onefrag { struct ste_desc_onefrag *ste_ptr; struct mbuf *ste_mbuf; struct ste_chain_onefrag *ste_next; + bus_dmamap_t ste_map; }; struct ste_chain_data { + bus_dma_tag_t ste_parent_tag; + bus_dma_tag_t ste_rx_tag; + bus_dma_tag_t ste_tx_tag; + bus_dma_tag_t ste_rx_list_tag; + bus_dmamap_t ste_rx_list_map; + bus_dma_tag_t ste_tx_list_tag; + bus_dmamap_t ste_tx_list_map; + bus_dmamap_t ste_rx_sparemap; struct ste_chain_onefrag ste_rx_chain[STE_RX_LIST_CNT]; - struct ste_chain ste_tx_chain[STE_TX_LIST_CNT]; + struct ste_chain ste_tx_chain[STE_TX_LIST_CNT]; struct ste_chain_onefrag *ste_rx_head; - + struct ste_chain *ste_last_tx; int ste_tx_prod; int ste_tx_cons; + int ste_tx_cnt; }; struct ste_softc { @@ -518,15 +557,11 @@ struct ste_softc { uint8_t ste_link; int ste_if_flags; int ste_timer; - struct ste_chain *ste_tx_prev; - struct ste_list_data *ste_ldata; + struct ste_list_data ste_ldata; struct ste_chain_data ste_cdata; struct callout ste_stat_callout; struct mtx ste_mtx; uint8_t ste_one_phy; -#ifdef DEVICE_POLLING - int rxcycles; -#endif }; #define STE_LOCK(_sc) mtx_lock(&(_sc)->ste_mtx) -- cgit v1.1